[llvm-commits] [llvm] r154096 - in /llvm/trunk: include/llvm/Transforms/Vectorize.h lib/Transforms/Vectorize/BBVectorize.cpp

Thu Apr 5 08:46:55 PDT 2012

Author: ether
Date: Thu Apr  5 10:46:55 2012
New Revision: 154096

URL: http://llvm.org/viewvc/llvm-project?rev=154096&view=rev
Log:
Introduce the VectorizeConfig class, with which we can control the behavior
  of the BBVectorizePass without using command line option. As pointed out
  by Hal, we can ask the TargetLoweringInfo for the architecture specific
  VectorizeConfig to perform vectorizing with architecture specific
  information.

Modified:
    llvm/trunk/include/llvm/Transforms/Vectorize.h
    llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp

Modified: llvm/trunk/include/llvm/Transforms/Vectorize.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Vectorize.h?rev=154096&r1=154095&r2=154096&view=diff
==============================================================================

--- llvm/trunk/include/llvm/Transforms/Vectorize.h (original)
+++ llvm/trunk/include/llvm/Transforms/Vectorize.h Thu Apr  5 10:46:55 2012
@@ -20,10 +20,73 @@
 class BasicBlockPass;
 
 //===----------------------------------------------------------------------===//
+/// @brief Vectorize configuration.
+struct VectorizeConfig {
+  //===--------------------------------------------------------------------===//
+  // Target architecture related parameters
+
+  /// @brief The size of the native vector registers.
+  unsigned VectorBits;
+
+  /// @brief Don't try to vectorize integer values.
+  bool NoInts;
+
+  /// @brief Don't try to vectorize floating-point values.
+  bool NoFloats;
+
+  /// @brief Don't try to vectorize casting (conversion) operations.
+  bool NoCasts;
+
+  /// @brief Don't try to vectorize floating-point math intrinsics.
+  bool NoMath;
+
+  /// @brief Don't try to vectorize the fused-multiply-add intrinsic.
+  bool NoFMA;
+
+  /// @brief Don't try to vectorize loads and stores.
+  bool NoMemOps;
+
+  /// @brief Only generate aligned loads and stores.
+  bool AlignedOnly;
+
+  //===--------------------------------------------------------------------===//
+  // Misc parameters
+
+  /// @brief The required chain depth for vectorization.
+  unsigned ReqChainDepth;
+
+  /// @brief The maximum search distance for instruction pairs.
+  unsigned SearchLimit;
+
+  /// @brief The maximum number of candidate pairs with which to use a full
+  ///        cycle check.
+  unsigned MaxCandPairsForCycleCheck;
+
+  /// @brief Replicating one element to a pair breaks the chain.
+  bool SplatBreaksChain;
+
+  /// @brief The maximum number of pairable instructions per group.
+  unsigned MaxInsts;
+
+  /// @brief The maximum number of pairing iterations.
+  unsigned MaxIter;
+
+  /// @brief Don't boost the chain-depth contribution of loads and stores.
+  bool NoMemOpBoost;
+
+  /// @brief Use a fast instruction dependency analysis.
+  bool FastDep;
+
+  /// @brief Initialize the VectorizeConfig from command line options.
+  VectorizeConfig();
+};
+
+//===----------------------------------------------------------------------===//
 //
 // BBVectorize - A basic-block vectorization pass.
 //
-BasicBlockPass *createBBVectorizePass();
+BasicBlockPass *
+createBBVectorizePass(const VectorizeConfig &C = VectorizeConfig());
 
 //===----------------------------------------------------------------------===//
 /// @brief Vectorize the BasicBlock.
@@ -35,7 +98,8 @@
 ///
 /// @return True if the BB is changed, false otherwise.
 ///
-bool vectorizeBasicBlock(Pass *P, BasicBlock &BB);
+bool vectorizeBasicBlock(Pass *P, BasicBlock &BB,
+                         const VectorizeConfig &C = VectorizeConfig());
 
 } // End llvm namespace
 

Modified: llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp?rev=154096&r1=154095&r2=154096&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/BBVectorize.cpp Thu Apr  5 10:46:55 2012
@@ -140,11 +140,16 @@
 namespace {
   struct BBVectorize : public BasicBlockPass {
     static char ID; // Pass identification, replacement for typeid
-    BBVectorize() : BasicBlockPass(ID) {
+
+    VectorizeConfig Config;
+
+    BBVectorize(const VectorizeConfig &C = VectorizeConfig())
+      : BasicBlockPass(ID), Config(C) {
       initializeBBVectorizePass(*PassRegistry::getPassRegistry());
     }
 
-    BBVectorize(Pass *P) : BasicBlockPass(ID) {
+    BBVectorize(Pass *P, const VectorizeConfig &C)
+      : BasicBlockPass(ID), Config(C) {
       AA = &P->getAnalysis<AliasAnalysis>();
       SE = &P->getAnalysis<ScalarEvolution>();
       TD = P->getAnalysisIfAvailable<TargetData>();
@@ -291,9 +296,10 @@
       // Iterate a sufficient number of times to merge types of size 1 bit,
       // then 2 bits, then 4, etc. up to half of the target vector width of the
       // target vector register.
-      for (unsigned v = 2, n = 1; v <= VectorBits && (!MaxIter || n <= MaxIter);
+      for (unsigned v = 2, n = 1;
+           v <= Config.VectorBits && (!Config.MaxIter || n <= Config.MaxIter);
            v *= 2, ++n) {
-        DEBUG(dbgs() << "BBV: fusing loop #" << n << 
+        DEBUG(dbgs() << "BBV: fusing loop #" << n <<
               " for " << BB.getName() << " in " <<
               BB.getParent()->getName() << "...\n");
         if (vectorizePairs(BB))
@@ -343,7 +349,7 @@
     // candidate chains where longer chains are considered to be better.
     // Note: when this function returns 0, the resulting instructions are
     // not actually fused.
-    static inline size_t getDepthFactor(Value *V) {
+    inline size_t getDepthFactor(Value *V) {
       // InsertElement and ExtractElement have a depth factor of zero. This is
       // for two reasons: First, they cannot be usefully fused. Second, because
       // the pass generates a lot of these, they can confuse the simple metric
@@ -357,8 +363,8 @@
 
       // Give a load or store half of the required depth so that load/store
       // pairs will vectorize.
-      if (!NoMemOpBoost && (isa<LoadInst>(V) || isa<StoreInst>(V)))
-        return ReqChainDepth/2;
+      if (!Config.NoMemOpBoost && (isa<LoadInst>(V) || isa<StoreInst>(V)))
+        return Config.ReqChainDepth/2;
 
       return 1;
     }
@@ -431,9 +437,9 @@
       case Intrinsic::exp:
       case Intrinsic::exp2:
       case Intrinsic::pow:
-        return !NoMath;
+        return !Config.NoMath;
       case Intrinsic::fma:
-        return !NoFMA;
+        return !Config.NoFMA;
       }
     }
 
@@ -527,16 +533,16 @@
     } else if (LoadInst *L = dyn_cast<LoadInst>(I)) {
       // Vectorize simple loads if possbile:
       IsSimpleLoadStore = L->isSimple();
-      if (!IsSimpleLoadStore || NoMemOps)
+      if (!IsSimpleLoadStore || Config.NoMemOps)
         return false;
     } else if (StoreInst *S = dyn_cast<StoreInst>(I)) {
       // Vectorize simple stores if possbile:
       IsSimpleLoadStore = S->isSimple();
-      if (!IsSimpleLoadStore || NoMemOps)
+      if (!IsSimpleLoadStore || Config.NoMemOps)
         return false;
     } else if (CastInst *C = dyn_cast<CastInst>(I)) {
       // We can vectorize casts, but not casts of pointer types, etc.
-      if (NoCasts)
+      if (Config.NoCasts)
         return false;
 
       Type *SrcTy = C->getSrcTy();
@@ -576,14 +582,14 @@
         !(VectorType::isValidElementType(T2) || T2->isVectorTy()))
       return false;
 
-    if (NoInts && (T1->isIntOrIntVectorTy() || T2->isIntOrIntVectorTy()))
+    if (Config.NoInts && (T1->isIntOrIntVectorTy() || T2->isIntOrIntVectorTy()))
       return false;
 
-    if (NoFloats && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy()))
+    if (Config.NoFloats && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy()))
       return false;
 
-    if (T1->getPrimitiveSizeInBits() > VectorBits/2 ||
-        T2->getPrimitiveSizeInBits() > VectorBits/2)
+    if (T1->getPrimitiveSizeInBits() > Config.VectorBits/2 ||
+        T2->getPrimitiveSizeInBits() > Config.VectorBits/2)
       return false;
 
     return true;
@@ -611,7 +617,7 @@
           LI->isVolatile() != LJ->isVolatile() ||
           LI->getOrdering() != LJ->getOrdering() ||
           LI->getSynchScope() != LJ->getSynchScope())
-        return false; 
+        return false;
     } else if ((SI = dyn_cast<StoreInst>(I)) && (SJ = dyn_cast<StoreInst>(J))) {
       if (SI->getValueOperand()->getType() !=
             SJ->getValueOperand()->getType() ||
@@ -632,7 +638,7 @@
       int64_t OffsetInElmts = 0;
       if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
             OffsetInElmts) && abs64(OffsetInElmts) == 1) {
-        if (AlignedOnly) {
+        if (Config.AlignedOnly) {
           Type *aType = isa<StoreInst>(I) ?
             cast<StoreInst>(I)->getValueOperand()->getType() : I->getType();
           // An aligned load or store is possible only if the instruction
@@ -753,12 +759,12 @@
       AliasSetTracker WriteSet(*AA);
       bool JAfterStart = IAfterStart;
       BasicBlock::iterator J = llvm::next(I);
-      for (unsigned ss = 0; J != E && ss <= SearchLimit; ++J, ++ss) {
+      for (unsigned ss = 0; J != E && ss <= Config.SearchLimit; ++J, ++ss) {
         if (J == Start) JAfterStart = true;
 
         // Determine if J uses I, if so, exit the loop.
-        bool UsesI = trackUsesOfI(Users, WriteSet, I, J, !FastDep);
-        if (FastDep) {
+        bool UsesI = trackUsesOfI(Users, WriteSet, I, J, !Config.FastDep);
+        if (Config.FastDep) {
           // Note: For this heuristic to be effective, independent operations
           // must tend to be intermixed. This is likely to be true from some
           // kinds of grouped loop unrolling (but not the generic LLVM pass),
@@ -796,7 +802,7 @@
         // If we have already found too many pairs, break here and this function
         // will be called again starting after the last instruction selected
         // during this invocation.
-        if (PairableInsts.size() >= MaxInsts) {
+        if (PairableInsts.size() >= Config.MaxInsts) {
           ShouldContinue = true;
           break;
         }
@@ -841,7 +847,7 @@
           ConnectedPairs.insert(VPPair(P, ValuePair(*J, *I)));
       }
 
-      if (SplatBreaksChain) continue;
+      if (Config.SplatBreaksChain) continue;
       // Look for cases where just the first value in the pair is used by
       // both members of another pair (splatting).
       for (Value::use_iterator J = P.first->use_begin(); J != E; ++J) {
@@ -850,7 +856,7 @@
       }
     }
 
-    if (SplatBreaksChain) return;
+    if (Config.SplatBreaksChain) return;
     // Look for cases where just the second value in the pair is used by
     // both members of another pair (splatting).
     for (Value::use_iterator I = P.second->use_begin(),
@@ -1280,7 +1286,7 @@
              << *J->first << " <-> " << *J->second << "} of depth " <<
              MaxDepth << " and size " << PrunedTree.size() <<
             " (effective size: " << EffSize << ")\n");
-      if (MaxDepth >= ReqChainDepth && EffSize > BestEffSize) {
+      if (MaxDepth >= Config.ReqChainDepth && EffSize > BestEffSize) {
         BestMaxDepth = MaxDepth;
         BestEffSize = EffSize;
         BestTree = PrunedTree;
@@ -1296,7 +1302,8 @@
                       std::multimap<ValuePair, ValuePair> &ConnectedPairs,
                       DenseSet<ValuePair> &PairableInstUsers,
                       DenseMap<Value *, Value *>& ChosenPairs) {
-    bool UseCycleCheck = CandidatePairs.size() <= MaxCandPairsForCycleCheck;
+    bool UseCycleCheck =
+     CandidatePairs.size() <= Config.MaxCandPairsForCycleCheck;
     std::multimap<ValuePair, ValuePair> PairableInstUserMap;
     for (std::vector<Value *>::iterator I = PairableInsts.begin(),
          E = PairableInsts.end(); I != E; ++I) {
@@ -1547,11 +1554,11 @@
         unsigned IID = F->getIntrinsicID();
         if (o == NumOperands-1) {
           BasicBlock &BB = *I->getParent();
-  
+
           Module *M = BB.getParent()->getParent();
           Type *ArgType = I->getType();
           Type *VArgType = getVecTypeForPair(ArgType);
-  
+
           // FIXME: is it safe to do this here?
           ReplacedOperands[o] = Intrinsic::getDeclaration(M,
             (Intrinsic::ID) IID, VArgType);
@@ -1867,11 +1874,32 @@
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
 INITIALIZE_PASS_END(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
 
-BasicBlockPass *llvm::createBBVectorizePass() {
-  return new BBVectorize();
+BasicBlockPass *llvm::createBBVectorizePass(const VectorizeConfig &C) {
+  return new BBVectorize(C);
 }
 
-bool llvm::vectorizeBasicBlock(Pass *P, BasicBlock &BB) {
-  BBVectorize BBVectorizer(P);
+bool
+llvm::vectorizeBasicBlock(Pass *P, BasicBlock &BB, const VectorizeConfig &C) {
+  BBVectorize BBVectorizer(P, C);
   return BBVectorizer.vectorizeBB(BB);
 }
+
+//===----------------------------------------------------------------------===//
+VectorizeConfig::VectorizeConfig() {
+  VectorBits = ::VectorBits;
+  NoInts = ::NoInts;
+  NoFloats = ::NoFloats;
+  NoCasts = ::NoCasts;
+  NoMath = ::NoMath;
+  NoFMA = ::NoFMA;
+  NoMemOps = ::NoMemOps;
+  AlignedOnly = ::AlignedOnly;
+  ReqChainDepth= ::ReqChainDepth;
+  SearchLimit = ::SearchLimit;
+  MaxCandPairsForCycleCheck = ::MaxCandPairsForCycleCheck;
+  SplatBreaksChain = ::SplatBreaksChain;
+  MaxInsts = ::MaxInsts;
+  MaxIter = ::MaxIter;
+  NoMemOpBoost = ::NoMemOpBoost;
+  FastDep = ::FastDep;
+}