[llvm-commits] [Review Request][Patch]Introduce the VectorizeConfig class.

Hongbin Zheng etherzhhb at gmail.com
Thu Apr 5 07:33:02 PDT 2012


hi Hal and tobi,

This patch introduce VectorizeConfig class, which allow us control the
behavior of the BBVectorize pass. The VectorizeConfig class provides a
default contructor which reads the configurations from the original
command line options. So when we want to override some parameter in
the VectorizeConfig, we can write code like:

// Construct the configuration with default ctor, which reads the
configurations from the original command line options
VectorizeConfig C;
// Disable vectorize floating-point math intrinsics
C.NoMath = true;

Any suggestion or comment?

best regards
ether

>From 4220f9833fbfed71e506ef94aee5f5f60ca01997 Mon Sep 17 00:00:00 2001
From: ether <etherzhhb at gmail.com>
Date: Thu, 5 Apr 2012 22:01:25 +0800
Subject: [PATCH 1/2] Introduce the VectorizeConfig class, with which we can
 control the behavior   of the BBVectorizePass without
 using command line option. As pointed out   by Hal, we
 can ask the TargetLoweringInfo for the architecture
 specific   VectorizeConfig to perform vectorizing with
 architecture specific   information.

---
 include/llvm/Transforms/Vectorize.h      |   67 +++++++++++++++++++++-
 lib/Transforms/Vectorize/BBVectorize.cpp |   90 +++++++++++++++++++-----------
 2 files changed, 123 insertions(+), 34 deletions(-)

diff --git a/include/llvm/Transforms/Vectorize.h
b/include/llvm/Transforms/Vectorize.h
index ad06937..ceeefcd 100644
--- a/include/llvm/Transforms/Vectorize.h
+++ b/include/llvm/Transforms/Vectorize.h
@@ -20,10 +20,72 @@ class BasicBlock;
 class BasicBlockPass;

 //===----------------------------------------------------------------------===//
+/// @brief Vectorize configuration.
+struct VectorizeConfig {
+  //===--------------------------------------------------------------------===//
+  // Target architecture related parameters
+
+  /// @brief The size of the native vector registers.
+  unsigned VectorBits;
+
+  /// @brief Don't try to vectorize integer values.
+  bool NoInts;
+
+  /// @brief Don't try to vectorize floating-point values.
+  bool NoFloats;
+
+  /// @brief Don't try to vectorize casting (conversion) operations.
+  bool NoCasts;
+
+  /// @briefDon't try to vectorize floating-point math intrinsics.
+  bool NoMath;
+
+  /// @brief Don't try to vectorize the fused-multiply-add intrinsic.
+  bool NoFMA;
+
+  /// @brief Don't try to vectorize loads and stores.
+  bool NoMemOps;
+
+  /// @brief Only generate aligned loads and stores.
+  bool AlignedOnly;
+
+  //===--------------------------------------------------------------------===//
+  // Misc parameters
+
+  /// @brief The required chain depth for vectorization.
+  unsigned ReqChainDepth;
+
+  /// @brief The maximum search distance for instruction pairs.
+  unsigned SearchLimit;
+
+  /// @brief The maximum number of candidate pairs with which to use a full
+  ///        cycle check.
+  unsigned MaxCandPairsForCycleCheck;
+
+  /// @brief Replicating one element to a pair breaks the chain.
+  bool SplatBreaksChain;
+
+  /// @brief The maximum number of pairable instructions per group.
+  unsigned MaxInsts;
+
+  /// @brief The maximum number of pairing iterations.
+  unsigned MaxIter;
+
+  /// @brief Don't boost the chain-depth contribution of loads and stores.
+  bool NoMemOpBoost;
+
+  /// @brief Use a fast instruction dependency analysis.
+  bool FastDep;
+
+  /// @brief Initialize the VectorizeConfig from command line options.
+  VectorizeConfig();
+};
+
+//===----------------------------------------------------------------------===//
 //
 // BBVectorize - A basic-block vectorization pass.
 //
-BasicBlockPass *createBBVectorizePass();
+BasicBlockPass *createBBVectorizePass(VectorizeConfig C = VectorizeConfig());

 //===----------------------------------------------------------------------===//
 /// @brief Vectorize the BasicBlock.
@@ -35,7 +97,8 @@ BasicBlockPass *createBBVectorizePass();
 ///
 /// @return True if the BB is changed, false otherwise.
 ///
-bool vectorizeBasicBlock(Pass *P, BasicBlock &BB);
+bool vectorizeBasicBlock(Pass *P, BasicBlock &BB,
+                         VectorizeConfig C = VectorizeConfig());

 } // End llvm namespace

diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp
b/lib/Transforms/Vectorize/BBVectorize.cpp
index 5abb242..7672d5c 100644
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -140,11 +140,15 @@ STATISTIC(NumFusedOps, "Number of operations
fused by bb-vectorize");
 namespace {
   struct BBVectorize : public BasicBlockPass {
     static char ID; // Pass identification, replacement for typeid
-    BBVectorize() : BasicBlockPass(ID) {
+
+    VectorizeConfig Config;
+
+    BBVectorize(VectorizeConfig C = VectorizeConfig())
+      : BasicBlockPass(ID), Config(C) {
       initializeBBVectorizePass(*PassRegistry::getPassRegistry());
     }

-    BBVectorize(Pass *P) : BasicBlockPass(ID) {
+    BBVectorize(Pass *P, VectorizeConfig C) : BasicBlockPass(ID), Config(C) {
       AA = &P->getAnalysis<AliasAnalysis>();
       SE = &P->getAnalysis<ScalarEvolution>();
       TD = P->getAnalysisIfAvailable<TargetData>();
@@ -291,9 +295,10 @@ namespace {
       // Iterate a sufficient number of times to merge types of size 1 bit,
       // then 2 bits, then 4, etc. up to half of the target vector width of the
       // target vector register.
-      for (unsigned v = 2, n = 1; v <= VectorBits && (!MaxIter || n
<= MaxIter);
+      for (unsigned v = 2, n = 1;
+           v <= Config.VectorBits && (!Config.MaxIter || n <= Config.MaxIter);
            v *= 2, ++n) {
-        DEBUG(dbgs() << "BBV: fusing loop #" << n <<
+        DEBUG(dbgs() << "BBV: fusing loop #" << n <<
               " for " << BB.getName() << " in " <<
               BB.getParent()->getName() << "...\n");
         if (vectorizePairs(BB))
@@ -343,7 +348,7 @@ namespace {
     // candidate chains where longer chains are considered to be better.
     // Note: when this function returns 0, the resulting instructions are
     // not actually fused.
-    static inline size_t getDepthFactor(Value *V) {
+    inline size_t getDepthFactor(Value *V) {
       // InsertElement and ExtractElement have a depth factor of zero. This is
       // for two reasons: First, they cannot be usefully fused. Second, because
       // the pass generates a lot of these, they can confuse the simple metric
@@ -357,8 +362,8 @@ namespace {

       // Give a load or store half of the required depth so that load/store
       // pairs will vectorize.
-      if (!NoMemOpBoost && (isa<LoadInst>(V) || isa<StoreInst>(V)))
-        return ReqChainDepth/2;
+      if (!Config.NoMemOpBoost && (isa<LoadInst>(V) || isa<StoreInst>(V)))
+        return Config.ReqChainDepth/2;

       return 1;
     }
@@ -431,9 +436,9 @@ namespace {
       case Intrinsic::exp:
       case Intrinsic::exp2:
       case Intrinsic::pow:
-        return !NoMath;
+        return !Config.NoMath;
       case Intrinsic::fma:
-        return !NoFMA;
+        return !Config.NoFMA;
       }
     }

@@ -527,16 +532,16 @@ namespace {
     } else if (LoadInst *L = dyn_cast<LoadInst>(I)) {
       // Vectorize simple loads if possbile:
       IsSimpleLoadStore = L->isSimple();
-      if (!IsSimpleLoadStore || NoMemOps)
+      if (!IsSimpleLoadStore || Config.NoMemOps)
         return false;
     } else if (StoreInst *S = dyn_cast<StoreInst>(I)) {
       // Vectorize simple stores if possbile:
       IsSimpleLoadStore = S->isSimple();
-      if (!IsSimpleLoadStore || NoMemOps)
+      if (!IsSimpleLoadStore || Config.NoMemOps)
         return false;
     } else if (CastInst *C = dyn_cast<CastInst>(I)) {
       // We can vectorize casts, but not casts of pointer types, etc.
-      if (NoCasts)
+      if (Config.NoCasts)
         return false;

       Type *SrcTy = C->getSrcTy();
@@ -576,14 +581,14 @@ namespace {
         !(VectorType::isValidElementType(T2) || T2->isVectorTy()))
       return false;

-    if (NoInts && (T1->isIntOrIntVectorTy() || T2->isIntOrIntVectorTy()))
+    if (Config.NoInts && (T1->isIntOrIntVectorTy() ||
T2->isIntOrIntVectorTy()))
       return false;

-    if (NoFloats && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy()))
+    if (Config.NoFloats && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy()))
       return false;

-    if (T1->getPrimitiveSizeInBits() > VectorBits/2 ||
-        T2->getPrimitiveSizeInBits() > VectorBits/2)
+    if (T1->getPrimitiveSizeInBits() > Config.VectorBits/2 ||
+        T2->getPrimitiveSizeInBits() > Config.VectorBits/2)
       return false;

     return true;
@@ -611,7 +616,7 @@ namespace {
           LI->isVolatile() != LJ->isVolatile() ||
           LI->getOrdering() != LJ->getOrdering() ||
           LI->getSynchScope() != LJ->getSynchScope())
-        return false;
+        return false;
     } else if ((SI = dyn_cast<StoreInst>(I)) && (SJ =
dyn_cast<StoreInst>(J))) {
       if (SI->getValueOperand()->getType() !=
             SJ->getValueOperand()->getType() ||
@@ -632,7 +637,7 @@ namespace {
       int64_t OffsetInElmts = 0;
       if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
             OffsetInElmts) && abs64(OffsetInElmts) == 1) {
-        if (AlignedOnly) {
+        if (Config.AlignedOnly) {
           Type *aType = isa<StoreInst>(I) ?
             cast<StoreInst>(I)->getValueOperand()->getType() : I->getType();
           // An aligned load or store is possible only if the instruction
@@ -753,12 +758,12 @@ namespace {
       AliasSetTracker WriteSet(*AA);
       bool JAfterStart = IAfterStart;
       BasicBlock::iterator J = llvm::next(I);
-      for (unsigned ss = 0; J != E && ss <= SearchLimit; ++J, ++ss) {
+      for (unsigned ss = 0; J != E && ss <= Config.SearchLimit; ++J, ++ss) {
         if (J == Start) JAfterStart = true;

         // Determine if J uses I, if so, exit the loop.
-        bool UsesI = trackUsesOfI(Users, WriteSet, I, J, !FastDep);
-        if (FastDep) {
+        bool UsesI = trackUsesOfI(Users, WriteSet, I, J, !Config.FastDep);
+        if (Config.FastDep) {
           // Note: For this heuristic to be effective, independent operations
           // must tend to be intermixed. This is likely to be true from some
           // kinds of grouped loop unrolling (but not the generic LLVM pass),
@@ -796,7 +801,7 @@ namespace {
         // If we have already found too many pairs, break here and
this function
         // will be called again starting after the last instruction selected
         // during this invocation.
-        if (PairableInsts.size() >= MaxInsts) {
+        if (PairableInsts.size() >= Config.MaxInsts) {
           ShouldContinue = true;
           break;
         }
@@ -841,7 +846,7 @@ namespace {
           ConnectedPairs.insert(VPPair(P, ValuePair(*J, *I)));
       }

-      if (SplatBreaksChain) continue;
+      if (Config.SplatBreaksChain) continue;
       // Look for cases where just the first value in the pair is used by
       // both members of another pair (splatting).
       for (Value::use_iterator J = P.first->use_begin(); J != E; ++J) {
@@ -850,7 +855,7 @@ namespace {
       }
     }

-    if (SplatBreaksChain) return;
+    if (Config.SplatBreaksChain) return;
     // Look for cases where just the second value in the pair is used by
     // both members of another pair (splatting).
     for (Value::use_iterator I = P.second->use_begin(),
@@ -1280,7 +1285,7 @@ namespace {
              << *J->first << " <-> " << *J->second << "} of depth " <<
              MaxDepth << " and size " << PrunedTree.size() <<
             " (effective size: " << EffSize << ")\n");
-      if (MaxDepth >= ReqChainDepth && EffSize > BestEffSize) {
+      if (MaxDepth >= Config.ReqChainDepth && EffSize > BestEffSize) {
         BestMaxDepth = MaxDepth;
         BestEffSize = EffSize;
         BestTree = PrunedTree;
@@ -1296,7 +1301,8 @@ namespace {
                       std::multimap<ValuePair, ValuePair> &ConnectedPairs,
                       DenseSet<ValuePair> &PairableInstUsers,
                       DenseMap<Value *, Value *>& ChosenPairs) {
-    bool UseCycleCheck = CandidatePairs.size() <= MaxCandPairsForCycleCheck;
+    bool UseCycleCheck =
+     CandidatePairs.size() <= Config.MaxCandPairsForCycleCheck;
     std::multimap<ValuePair, ValuePair> PairableInstUserMap;
     for (std::vector<Value *>::iterator I = PairableInsts.begin(),
          E = PairableInsts.end(); I != E; ++I) {
@@ -1547,11 +1553,11 @@ namespace {
         unsigned IID = F->getIntrinsicID();
         if (o == NumOperands-1) {
           BasicBlock &BB = *I->getParent();
-
+
           Module *M = BB.getParent()->getParent();
           Type *ArgType = I->getType();
           Type *VArgType = getVecTypeForPair(ArgType);
-
+
           // FIXME: is it safe to do this here?
           ReplacedOperands[o] = Intrinsic::getDeclaration(M,
             (Intrinsic::ID) IID, VArgType);
@@ -1867,11 +1873,31 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
 INITIALIZE_PASS_END(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)

-BasicBlockPass *llvm::createBBVectorizePass() {
-  return new BBVectorize();
+BasicBlockPass *llvm::createBBVectorizePass(VectorizeConfig C) {
+  return new BBVectorize(C);
 }

-bool llvm::vectorizeBasicBlock(Pass *P, BasicBlock &BB) {
-  BBVectorize BBVectorizer(P);
+bool llvm::vectorizeBasicBlock(Pass *P, BasicBlock &BB, VectorizeConfig C) {
+  BBVectorize BBVectorizer(P, C);
   return BBVectorizer.vectorizeBB(BB);
 }
+
+//===----------------------------------------------------------------------===//
+VectorizeConfig::VectorizeConfig() {
+  VectorBits = ::VectorBits;
+  NoInts = ::NoInts;
+  NoFloats = ::NoFloats;
+  NoCasts = ::NoCasts;
+  NoMath = ::NoMath;
+  NoFMA = ::NoFMA;
+  NoMemOps = ::NoMemOps;
+  AlignedOnly = ::AlignedOnly;
+  ReqChainDepth= ::ReqChainDepth;
+  SearchLimit = ::SearchLimit;
+  MaxCandPairsForCycleCheck = ::MaxCandPairsForCycleCheck;
+  SplatBreaksChain = ::SplatBreaksChain;
+  MaxInsts = ::MaxInsts;
+  MaxIter = ::MaxIter;
+  NoMemOpBoost = ::NoMemOpBoost;
+  FastDep = ::FastDep;
+}
-- 
1.7.5.4
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-Introduce-the-VectorizeConfig-class-with-which-we-ca.patch
Type: text/x-diff
Size: 13545 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20120405/6aa4c58c/attachment.patch>


More information about the llvm-commits mailing list