[llvm-commits] [Review Request][Patch]Introduce the VectorizeConfig class.
Hongbin Zheng
etherzhhb at gmail.com
Thu Apr 5 08:24:35 PDT 2012
On Thu, Apr 5, 2012 at 11:23 PM, Hongbin Zheng <etherzhhb at gmail.com> wrote:
> hi,
>
> The update to date patch attached, please review :)
>
> best regards
> ether
>
> ---
> include/llvm/Transforms/Vectorize.h | 68 ++++++++++++++++++++++-
> lib/Transforms/Vectorize/BBVectorize.cpp | 90 +++++++++++++++++++-----------
> 2 files changed, 124 insertions(+), 34 deletions(-)
>
> diff --git a/include/llvm/Transforms/Vectorize.h
> b/include/llvm/Transforms/Vectorize.h
> index ad06937..80ccf0c 100644
> --- a/include/llvm/Transforms/Vectorize.h
> +++ b/include/llvm/Transforms/Vectorize.h
> @@ -20,10 +20,73 @@ class BasicBlock;
> class BasicBlockPass;
>
> //===----------------------------------------------------------------------===//
> +/// @brief Vectorize configuration.
> +struct VectorizeConfig {
> + //===--------------------------------------------------------------------===//
> + // Target architecture related parameters
> +
> + /// @brief The size of the native vector registers.
> + unsigned VectorBits;
> +
> + /// @brief Don't try to vectorize integer values.
> + bool NoInts;
> +
> + /// @brief Don't try to vectorize floating-point values.
> + bool NoFloats;
> +
> + /// @brief Don't try to vectorize casting (conversion) operations.
> + bool NoCasts;
> +
> + /// @brief Don't try to vectorize floating-point math intrinsics.
> + bool NoMath;
> +
> + /// @brief Don't try to vectorize the fused-multiply-add intrinsic.
> + bool NoFMA;
> +
> + /// @brief Don't try to vectorize loads and stores.
> + bool NoMemOps;
> +
> + /// @brief Only generate aligned loads and stores.
> + bool AlignedOnly;
> +
> + //===--------------------------------------------------------------------===//
> + // Misc parameters
> +
> + /// @brief The required chain depth for vectorization.
> + unsigned ReqChainDepth;
> +
> + /// @brief The maximum search distance for instruction pairs.
> + unsigned SearchLimit;
> +
> + /// @brief The maximum number of candidate pairs with which to use a full
> + /// cycle check.
> + unsigned MaxCandPairsForCycleCheck;
> +
> + /// @brief Replicating one element to a pair breaks the chain.
> + bool SplatBreaksChain;
> +
> + /// @brief The maximum number of pairable instructions per group.
> + unsigned MaxInsts;
> +
> + /// @brief The maximum number of pairing iterations.
> + unsigned MaxIter;
> +
> + /// @brief Don't boost the chain-depth contribution of loads and stores.
> + bool NoMemOpBoost;
> +
> + /// @brief Use a fast instruction dependency analysis.
> + bool FastDep;
> +
> + /// @brief Initialize the VectorizeConfig from command line options.
> + VectorizeConfig();
> +};
> +
> +//===----------------------------------------------------------------------===//
> //
> // BBVectorize - A basic-block vectorization pass.
> //
> -BasicBlockPass *createBBVectorizePass();
> +BasicBlockPass *
> +createBBVectorizePass(const VectorizeConfig &C = VectorizeConfig());
>
> //===----------------------------------------------------------------------===//
> /// @brief Vectorize the BasicBlock.
> @@ -35,7 +98,8 @@ BasicBlockPass *createBBVectorizePass();
> ///
> /// @return True if the BB is changed, false otherwise.
> ///
> -bool vectorizeBasicBlock(Pass *P, BasicBlock &BB);
> +bool vectorizeBasicBlock(Pass *P, BasicBlock &BB,
> + VectorizeConfig C = VectorizeConfig());
sorry, should be "const" too
>
> } // End llvm namespace
>
> diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp
> b/lib/Transforms/Vectorize/BBVectorize.cpp
> index 5abb242..a9d09fd 100644
> --- a/lib/Transforms/Vectorize/BBVectorize.cpp
> +++ b/lib/Transforms/Vectorize/BBVectorize.cpp
> @@ -140,11 +140,15 @@ STATISTIC(NumFusedOps, "Number of operations
> fused by bb-vectorize");
> namespace {
> struct BBVectorize : public BasicBlockPass {
> static char ID; // Pass identification, replacement for typeid
> - BBVectorize() : BasicBlockPass(ID) {
> +
> + VectorizeConfig Config;
> +
> + BBVectorize(VectorizeConfig C = VectorizeConfig())
> + : BasicBlockPass(ID), Config(C) {
> initializeBBVectorizePass(*PassRegistry::getPassRegistry());
> }
>
> - BBVectorize(Pass *P) : BasicBlockPass(ID) {
> + BBVectorize(Pass *P, VectorizeConfig C) : BasicBlockPass(ID), Config(C) {
> AA = &P->getAnalysis<AliasAnalysis>();
> SE = &P->getAnalysis<ScalarEvolution>();
> TD = P->getAnalysisIfAvailable<TargetData>();
> @@ -291,9 +295,10 @@ namespace {
> // Iterate a sufficient number of times to merge types of size 1 bit,
> // then 2 bits, then 4, etc. up to half of the target vector width of the
> // target vector register.
> - for (unsigned v = 2, n = 1; v <= VectorBits && (!MaxIter || n
> <= MaxIter);
> + for (unsigned v = 2, n = 1;
> + v <= Config.VectorBits && (!Config.MaxIter || n <= Config.MaxIter);
> v *= 2, ++n) {
> - DEBUG(dbgs() << "BBV: fusing loop #" << n <<
> + DEBUG(dbgs() << "BBV: fusing loop #" << n <<
> " for " << BB.getName() << " in " <<
> BB.getParent()->getName() << "...\n");
> if (vectorizePairs(BB))
> @@ -343,7 +348,7 @@ namespace {
> // candidate chains where longer chains are considered to be better.
> // Note: when this function returns 0, the resulting instructions are
> // not actually fused.
> - static inline size_t getDepthFactor(Value *V) {
> + inline size_t getDepthFactor(Value *V) {
> // InsertElement and ExtractElement have a depth factor of zero. This is
> // for two reasons: First, they cannot be usefully fused. Second, because
> // the pass generates a lot of these, they can confuse the simple metric
> @@ -357,8 +362,8 @@ namespace {
>
> // Give a load or store half of the required depth so that load/store
> // pairs will vectorize.
> - if (!NoMemOpBoost && (isa<LoadInst>(V) || isa<StoreInst>(V)))
> - return ReqChainDepth/2;
> + if (!Config.NoMemOpBoost && (isa<LoadInst>(V) || isa<StoreInst>(V)))
> + return Config.ReqChainDepth/2;
>
> return 1;
> }
> @@ -431,9 +436,9 @@ namespace {
> case Intrinsic::exp:
> case Intrinsic::exp2:
> case Intrinsic::pow:
> - return !NoMath;
> + return !Config.NoMath;
> case Intrinsic::fma:
> - return !NoFMA;
> + return !Config.NoFMA;
> }
> }
>
> @@ -527,16 +532,16 @@ namespace {
> } else if (LoadInst *L = dyn_cast<LoadInst>(I)) {
> // Vectorize simple loads if possbile:
> IsSimpleLoadStore = L->isSimple();
> - if (!IsSimpleLoadStore || NoMemOps)
> + if (!IsSimpleLoadStore || Config.NoMemOps)
> return false;
> } else if (StoreInst *S = dyn_cast<StoreInst>(I)) {
> // Vectorize simple stores if possbile:
> IsSimpleLoadStore = S->isSimple();
> - if (!IsSimpleLoadStore || NoMemOps)
> + if (!IsSimpleLoadStore || Config.NoMemOps)
> return false;
> } else if (CastInst *C = dyn_cast<CastInst>(I)) {
> // We can vectorize casts, but not casts of pointer types, etc.
> - if (NoCasts)
> + if (Config.NoCasts)
> return false;
>
> Type *SrcTy = C->getSrcTy();
> @@ -576,14 +581,14 @@ namespace {
> !(VectorType::isValidElementType(T2) || T2->isVectorTy()))
> return false;
>
> - if (NoInts && (T1->isIntOrIntVectorTy() || T2->isIntOrIntVectorTy()))
> + if (Config.NoInts && (T1->isIntOrIntVectorTy() ||
> T2->isIntOrIntVectorTy()))
> return false;
>
> - if (NoFloats && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy()))
> + if (Config.NoFloats && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy()))
> return false;
>
> - if (T1->getPrimitiveSizeInBits() > VectorBits/2 ||
> - T2->getPrimitiveSizeInBits() > VectorBits/2)
> + if (T1->getPrimitiveSizeInBits() > Config.VectorBits/2 ||
> + T2->getPrimitiveSizeInBits() > Config.VectorBits/2)
> return false;
>
> return true;
> @@ -611,7 +616,7 @@ namespace {
> LI->isVolatile() != LJ->isVolatile() ||
> LI->getOrdering() != LJ->getOrdering() ||
> LI->getSynchScope() != LJ->getSynchScope())
> - return false;
> + return false;
> } else if ((SI = dyn_cast<StoreInst>(I)) && (SJ =
> dyn_cast<StoreInst>(J))) {
> if (SI->getValueOperand()->getType() !=
> SJ->getValueOperand()->getType() ||
> @@ -632,7 +637,7 @@ namespace {
> int64_t OffsetInElmts = 0;
> if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
> OffsetInElmts) && abs64(OffsetInElmts) == 1) {
> - if (AlignedOnly) {
> + if (Config.AlignedOnly) {
> Type *aType = isa<StoreInst>(I) ?
> cast<StoreInst>(I)->getValueOperand()->getType() : I->getType();
> // An aligned load or store is possible only if the instruction
> @@ -753,12 +758,12 @@ namespace {
> AliasSetTracker WriteSet(*AA);
> bool JAfterStart = IAfterStart;
> BasicBlock::iterator J = llvm::next(I);
> - for (unsigned ss = 0; J != E && ss <= SearchLimit; ++J, ++ss) {
> + for (unsigned ss = 0; J != E && ss <= Config.SearchLimit; ++J, ++ss) {
> if (J == Start) JAfterStart = true;
>
> // Determine if J uses I, if so, exit the loop.
> - bool UsesI = trackUsesOfI(Users, WriteSet, I, J, !FastDep);
> - if (FastDep) {
> + bool UsesI = trackUsesOfI(Users, WriteSet, I, J, !Config.FastDep);
> + if (Config.FastDep) {
> // Note: For this heuristic to be effective, independent operations
> // must tend to be intermixed. This is likely to be true from some
> // kinds of grouped loop unrolling (but not the generic LLVM pass),
> @@ -796,7 +801,7 @@ namespace {
> // If we have already found too many pairs, break here and
> this function
> // will be called again starting after the last instruction selected
> // during this invocation.
> - if (PairableInsts.size() >= MaxInsts) {
> + if (PairableInsts.size() >= Config.MaxInsts) {
> ShouldContinue = true;
> break;
> }
> @@ -841,7 +846,7 @@ namespace {
> ConnectedPairs.insert(VPPair(P, ValuePair(*J, *I)));
> }
>
> - if (SplatBreaksChain) continue;
> + if (Config.SplatBreaksChain) continue;
> // Look for cases where just the first value in the pair is used by
> // both members of another pair (splatting).
> for (Value::use_iterator J = P.first->use_begin(); J != E; ++J) {
> @@ -850,7 +855,7 @@ namespace {
> }
> }
>
> - if (SplatBreaksChain) return;
> + if (Config.SplatBreaksChain) return;
> // Look for cases where just the second value in the pair is used by
> // both members of another pair (splatting).
> for (Value::use_iterator I = P.second->use_begin(),
> @@ -1280,7 +1285,7 @@ namespace {
> << *J->first << " <-> " << *J->second << "} of depth " <<
> MaxDepth << " and size " << PrunedTree.size() <<
> " (effective size: " << EffSize << ")\n");
> - if (MaxDepth >= ReqChainDepth && EffSize > BestEffSize) {
> + if (MaxDepth >= Config.ReqChainDepth && EffSize > BestEffSize) {
> BestMaxDepth = MaxDepth;
> BestEffSize = EffSize;
> BestTree = PrunedTree;
> @@ -1296,7 +1301,8 @@ namespace {
> std::multimap<ValuePair, ValuePair> &ConnectedPairs,
> DenseSet<ValuePair> &PairableInstUsers,
> DenseMap<Value *, Value *>& ChosenPairs) {
> - bool UseCycleCheck = CandidatePairs.size() <= MaxCandPairsForCycleCheck;
> + bool UseCycleCheck =
> + CandidatePairs.size() <= Config.MaxCandPairsForCycleCheck;
> std::multimap<ValuePair, ValuePair> PairableInstUserMap;
> for (std::vector<Value *>::iterator I = PairableInsts.begin(),
> E = PairableInsts.end(); I != E; ++I) {
> @@ -1547,11 +1553,11 @@ namespace {
> unsigned IID = F->getIntrinsicID();
> if (o == NumOperands-1) {
> BasicBlock &BB = *I->getParent();
> -
> +
> Module *M = BB.getParent()->getParent();
> Type *ArgType = I->getType();
> Type *VArgType = getVecTypeForPair(ArgType);
> -
> +
> // FIXME: is it safe to do this here?
> ReplacedOperands[o] = Intrinsic::getDeclaration(M,
> (Intrinsic::ID) IID, VArgType);
> @@ -1867,11 +1873,31 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
> INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
> INITIALIZE_PASS_END(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
>
> -BasicBlockPass *llvm::createBBVectorizePass() {
> - return new BBVectorize();
> +BasicBlockPass *llvm::createBBVectorizePass(const VectorizeConfig &C) {
> + return new BBVectorize(C);
> }
>
> -bool llvm::vectorizeBasicBlock(Pass *P, BasicBlock &BB) {
> - BBVectorize BBVectorizer(P);
> +bool llvm::vectorizeBasicBlock(Pass *P, BasicBlock &BB, VectorizeConfig C) {
> + BBVectorize BBVectorizer(P, C);
> return BBVectorizer.vectorizeBB(BB);
> }
> +
> +//===----------------------------------------------------------------------===//
> +VectorizeConfig::VectorizeConfig() {
> + VectorBits = ::VectorBits;
> + NoInts = ::NoInts;
> + NoFloats = ::NoFloats;
> + NoCasts = ::NoCasts;
> + NoMath = ::NoMath;
> + NoFMA = ::NoFMA;
> + NoMemOps = ::NoMemOps;
> + AlignedOnly = ::AlignedOnly;
> + ReqChainDepth= ::ReqChainDepth;
> + SearchLimit = ::SearchLimit;
> + MaxCandPairsForCycleCheck = ::MaxCandPairsForCycleCheck;
> + SplatBreaksChain = ::SplatBreaksChain;
> + MaxInsts = ::MaxInsts;
> + MaxIter = ::MaxIter;
> + NoMemOpBoost = ::NoMemOpBoost;
> + FastDep = ::FastDep;
> +}
> --
> 1.7.5.4
>
>> Hal Finkel
>> Postdoctoral Appointee
>> Leadership Computing Facility
>> Argonne National Laboratory
More information about the llvm-commits
mailing list