[llvm-commits] [llvm] r166620 - in /llvm/trunk: lib/Transforms/Vectorize/ test/Transforms/LoopVectorize/

Wed Oct 24 15:21:01 PDT 2012

----- Original Message -----
> From: "Nadav Rotem" <nrotem at apple.com>
> To: llvm-commits at cs.uiuc.edu
> Sent: Wednesday, October 24, 2012 3:36:33 PM
> Subject: [llvm-commits] [llvm] r166620 - in /llvm/trunk: lib/Transforms/Vectorize/ test/Transforms/LoopVectorize/
> 
> Author: nadav
> Date: Wed Oct 24 15:36:32 2012
> New Revision: 166620
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=166620&view=rev
> Log:
> LoopVectorizer: Add a basic cost model which uses the VTTI interface.
> 
> Added:
>     llvm/trunk/test/Transforms/LoopVectorize/cost-model.ll
> Modified:
>     llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
>     llvm/trunk/test/Transforms/LoopVectorize/2012-10-20-infloop.ll
>     llvm/trunk/test/Transforms/LoopVectorize/cpp-new-array.ll
>     llvm/trunk/test/Transforms/LoopVectorize/gcc-examples.ll
>     llvm/trunk/test/Transforms/LoopVectorize/increment.ll
>     llvm/trunk/test/Transforms/LoopVectorize/induction_plus.ll
>     llvm/trunk/test/Transforms/LoopVectorize/non-const-n.ll
>     llvm/trunk/test/Transforms/LoopVectorize/read-only.ll
>     llvm/trunk/test/Transforms/LoopVectorize/reduction.ll
>     llvm/trunk/test/Transforms/LoopVectorize/scalar-select.ll
> 
> Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=166620&r1=166619&r2=166620&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
> +++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Wed Oct 24
> 15:36:32 2012
> @@ -18,10 +18,13 @@
>  //
>  // This pass has three parts:
>  // 1. The main loop pass that drives the different parts.
> -// 2. LoopVectorizationLegality - A helper class that checks for the
> legality
> +// 2. LoopVectorizationLegality - A unit that checks for the
> legality
>  //    of the vectorization.
> -// 3. SingleBlockLoopVectorizer - A helper class that performs the
> actual
> +// 3. SingleBlockLoopVectorizer - A unit that performs the actual
>  //    widening of instructions.
> +// 4. LoopVectorizationCostModel - A unit that checks for the
> profitability
> +//    of vectorization. It decides on the optimal vector width,
> which
> +//    can be one, if vectorization is not profitable.
>  //===----------------------------------------------------------------------===//
>  //
>  // The reduction-variable vectorization is based on the paper:
> @@ -51,13 +54,14 @@
>  #include "llvm/ADT/StringExtras.h"
>  #include "llvm/Analysis/AliasAnalysis.h"
>  #include "llvm/Analysis/AliasSetTracker.h"
> -#include "llvm/Transforms/Scalar.h"
>  #include "llvm/Analysis/ScalarEvolution.h"
>  #include "llvm/Analysis/ScalarEvolutionExpressions.h"
>  #include "llvm/Analysis/ScalarEvolutionExpander.h"
> -#include "llvm/Transforms/Utils/BasicBlockUtils.h"
> -#include "llvm/Analysis/ValueTracking.h"
>  #include "llvm/Analysis/LoopInfo.h"
> +#include "llvm/Analysis/ValueTracking.h"
> +#include "llvm/Transforms/Scalar.h"
> +#include "llvm/Transforms/Utils/BasicBlockUtils.h"
> +#include "llvm/TargetTransformInfo.h"
>  #include "llvm/Support/CommandLine.h"
>  #include "llvm/Support/Debug.h"
>  #include "llvm/Support/raw_ostream.h"
> @@ -67,13 +71,14 @@
>  using namespace llvm;
>  
>  static cl::opt<unsigned>
> -DefaultVectorizationFactor("default-loop-vectorize-width",
> -                          cl::init(4), cl::Hidden,
> -                          cl::desc("Set the default loop
> vectorization width"));
> +VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden,
> +          cl::desc("Set the default vectorization width. Zero is
> autoselect."));
> +
>  namespace {
>  
> -// Forward declaration.
> +// Forward declarations.
>  class LoopVectorizationLegality;
> +class LoopVectorizationCostModel;
>  
>  /// SingleBlockLoopVectorizer vectorizes loops which contain only
>  one basic
>  /// block to a specified vectorization factor (VF).
> @@ -229,11 +234,10 @@
>    /// of the reductions that were found in the loop.
>    typedef DenseMap<PHINode*, ReductionDescriptor> ReductionList;
>  
> -  /// Returns the maximum vectorization factor that we *can* use to
> vectorize
> -  /// this loop. This does not mean that it is profitable to
> vectorize this
> -  /// loop, only that it is legal to do so. This may be a large
> number. We
> -  /// can vectorize to any SIMD width below this number.
> -  unsigned getLoopMaxVF();
> +  /// Returns true if it is legal to vectorize this loop.
> +  /// This does not mean that it is profitable to vectorize this
> +  /// loop, only that it is legal to do so.
> +  bool canVectorize();
>  
>    /// Returns the Induction variable.
>    PHINode *getInduction() {return Induction;}
> @@ -286,6 +290,49 @@
>    SmallPtrSet<Value*, 4> AllowedExit;
>  };
>  
> +/// LoopVectorizationCostModel - estimates the expected speedups due
> to
> +/// vectorization.
> +/// In many cases vectorization is not profitable. This can happen
> because
> +/// of a number of reasons. In this class we mainly attempt to
> predict
> +/// the expected speedup/slowdowns due to the supported instruction
> set.
> +/// We use the VectorTargetTransformInfo to query the different
> backends
> +/// for the cost of different operations.
> +class LoopVectorizationCostModel {
> +public:
> +  /// C'tor.
> +  LoopVectorizationCostModel(Loop *Lp, ScalarEvolution *Se,
> DataLayout *Dl,
> +                             LoopVectorizationLegality *Leg,
> +                             const VectorTargetTransformInfo *Vtti):
> +  TheLoop(Lp), SE(Se), DL(Dl), Legal(Leg), VTTI(Vtti) { }
> +
> +  /// Returns the most profitable vectorization factor for the loop
> that is
> +  /// smaller or equal to the VF argument. This method checks every
> power
> +  /// of two up to VF.
> +  unsigned findBestVectorizationFactor(unsigned VF = 4);
> +
> +private:
> +  /// Returns the expected execution cost. The unit of the cost does
> +  /// not matter because we use the 'cost' units to compare
> different
> +  /// vector widths. The cost that is returned is *not* normalized
> by
> +  /// the factor width.
> +  unsigned expectedCost(unsigned VF);
> +
> +  /// Returns the execution time cost of an instruction for a given
> vector
> +  /// width. Vector width of one means scalar.
> +  unsigned getInstructionCost(Instruction *I, unsigned VF);
> +
> +  /// The loop that we evaluate.
> +  Loop *TheLoop;
> +  /// Scev analysis.
> +  ScalarEvolution *SE;
> +  /// DataLayout analysis.
> +  DataLayout *DL;
> +  /// Vectorization legality.
> +  LoopVectorizationLegality *Legal;
> +  /// Vector target information.
> +  const VectorTargetTransformInfo *VTTI;
> +};
> +
>  struct LoopVectorize : public LoopPass {
>    static char ID; // Pass identification, replacement for typeid
>  
> @@ -296,6 +343,7 @@
>    ScalarEvolution *SE;
>    DataLayout *DL;
>    LoopInfo *LI;
> +  TargetTransformInfo *TTI;
>  
>    virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
>      // We only vectorize innermost loops.
> @@ -305,25 +353,42 @@
>      SE = &getAnalysis<ScalarEvolution>();
>      DL = getAnalysisIfAvailable<DataLayout>();
>      LI = &getAnalysis<LoopInfo>();
> +    TTI = getAnalysisIfAvailable<TargetTransformInfo>();
>  
>      DEBUG(dbgs() << "LV: Checking a loop in \"" <<
>            L->getHeader()->getParent()->getName() << "\"\n");
>  
>      // Check if it is legal to vectorize the loop.
>      LoopVectorizationLegality LVL(L, SE, DL);
> -    unsigned MaxVF = LVL.getLoopMaxVF();
> -
> -    // Check that we can vectorize this loop using the chosen
> vectorization
> -    // width.
> -    if (MaxVF < DefaultVectorizationFactor) {
> -      DEBUG(dbgs() << "LV: non-vectorizable MaxVF ("<< MaxVF <<
> ").\n");
> +    if (!LVL.canVectorize()) {
> +      DEBUG(dbgs() << "LV: Not vectorizing.\n");
>        return false;
>      }
>  
> -    DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< MaxVF <<
> ").\n");
> +    // Select the preffered vectorization factor.
> +    unsigned VF = 1;
> +    if (VectorizationFactor == 0) {
> +      const VectorTargetTransformInfo *VTTI = 0;
> +      if (TTI)
> +        VTTI = TTI->getVectorTargetTransformInfo();
> +      // Use the cost model.
> +      LoopVectorizationCostModel CM(L, SE, DL, &LVL, VTTI);
> +      VF = CM.findBestVectorizationFactor();
> +
> +      if (VF == 1) {
> +        DEBUG(dbgs() << "LV: Vectorization is possible but not
> beneficial.\n");
> +        return false;
> +      }
> +
> +    } else {
> +      // Use the user command flag.
> +      VF = VectorizationFactor;
> +    }
> +
> +    DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF <<
> ").\n");
>  
>      // If we decided that it is *legal* to vectorizer the loop then
>      do it.
> -    SingleBlockLoopVectorizer LB(L, SE, LI, &LPM,
> DefaultVectorizationFactor);
> +    SingleBlockLoopVectorizer LB(L, SE, LI, &LPM, VF);
>      LB.vectorize(&LVL);
>  
>      DEBUG(verifyFunction(*L->getHeader()->getParent()));
> @@ -656,6 +721,13 @@
>  
>  void
>  SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality
>  *Legal) {
> +  //===------------------------------------------------===//
> +  //
> +  // Notice: any optimization or new instruction that go
> +  // into the code below should be also be implemented in
> +  // the cost-model.
> +  //
> +  //===------------------------------------------------===//
>    typedef SmallVector<PHINode*, 4> PhiVector;
>    BasicBlock &BB = *OrigLoop->getHeader();
>    Constant *Zero = ConstantInt::get(
> @@ -957,18 +1029,18 @@
>    SE->forgetLoop(OrigLoop);
>  }
>  
> -unsigned LoopVectorizationLegality::getLoopMaxVF() {
> +bool LoopVectorizationLegality::canVectorize() {
>    if (!TheLoop->getLoopPreheader()) {
>      assert(false && "No preheader!!");
>      DEBUG(dbgs() << "LV: Loop not normalized." << "\n");
> -    return  1;
> +    return  false;
>    }
>  
>    // We can only vectorize single basic block loops.
>    unsigned NumBlocks = TheLoop->getNumBlocks();
>    if (NumBlocks != 1) {
>      DEBUG(dbgs() << "LV: Too many blocks:" << NumBlocks << "\n");
> -    return 1;
> +    return false;
>    }
>  
>    // We need to have a loop header.
> @@ -978,22 +1050,22 @@
>    // Go over each instruction and look at memory deps.
>    if (!canVectorizeBlock(*BB)) {
>      DEBUG(dbgs() << "LV: Can't vectorize this loop header\n");
> -    return 1;
> +    return false;
>    }
>  
>    // ScalarEvolution needs to be able to find the exit count.
>    const SCEV *ExitCount = SE->getExitCount(TheLoop, BB);
>    if (ExitCount == SE->getCouldNotCompute()) {
>      DEBUG(dbgs() << "LV: SCEV could not compute the loop exit
>      count.\n");
> -    return 1;
> +    return false;
>    }
>  
>    DEBUG(dbgs() << "LV: We can vectorize this loop!\n");
>  
>    // Okay! We can vectorize. At this point we don't have any other
>    mem analysis
> -  // which may limit our maximum vectorization factor, so just
> return the
> -  // maximum SIMD size.
> -  return DefaultVectorizationFactor;
> +  // which may limit our maximum vectorization factor, so just
> return true with
> +  // no restrictions.
> +  return true;
>  }
>  
>  bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) {
> @@ -1323,6 +1395,177 @@
>    return true;
>  }
>  
> +unsigned
> +LoopVectorizationCostModel::findBestVectorizationFactor(unsigned VF)
> {
> +  if (!VTTI) {
> +    DEBUG(dbgs() << "LV: No vector target information. Not
> vectorizing. \n");
> +    return 1;
> +  }
> +
> +  float Cost = expectedCost(1);
> +  unsigned Width = 1;
> +  DEBUG(dbgs() << "LV: Scalar loop costs: "<< (int)Cost << ".\n");
> +  for (unsigned i=2; i <= VF; i*=2) {
> +    // Notice that the vector loop needs to be executed less times,
> so
> +    // we need to divide the cost of the vector loops by the width
> of
> +    // the vector elements.
> +    float VectorCost = expectedCost(i) / (float)i;
> +    DEBUG(dbgs() << "LV: Vector loop of width "<< i << " costs: " <<
> +          (int)VectorCost << ".\n");
> +    if (VectorCost < Cost) {
> +      Cost = VectorCost;
> +      Width = i;
> +    }
> +  }
> +
> +  DEBUG(dbgs() << "LV: Selecting VF = : "<< Width << ".\n");
> +  return Width;
> +}
> +
> +unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
> +  // We can only estimate the cost of single basic block loops.
> +  assert(1 == TheLoop->getNumBlocks() && "Too many blocks in loop");
> +
> +  BasicBlock *BB = TheLoop->getHeader();
> +  unsigned Cost = 0;
> +
> +  // For each instruction in the old loop.
> +  for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it !=
> e; ++it) {
> +    Instruction *Inst = it;
> +    Cost += getInstructionCost(Inst, VF);
> +  }
> +
> +  // Return the cost divided by VF, because we will be executing
> +  // less iterations of the vector form.
> +  return Cost;
> +}
> +
> +unsigned
> +LoopVectorizationCostModel::getInstructionCost(Instruction *I,
> unsigned VF) {
> +  assert(VTTI && "Invalid vector target transformation info");
> +  switch (I->getOpcode()) {
> +    case Instruction::Br: {
> +      return VTTI->getInstrCost(I->getOpcode());
> +    }
> +    case Instruction::PHI:
> +      // PHIs are handled the same as the binary instructions below.
> +    case Instruction::Add:
> +    case Instruction::FAdd:
> +    case Instruction::Sub:
> +    case Instruction::FSub:
> +    case Instruction::Mul:
> +    case Instruction::FMul:
> +    case Instruction::UDiv:
> +    case Instruction::SDiv:
> +    case Instruction::FDiv:
> +    case Instruction::URem:
> +    case Instruction::SRem:
> +    case Instruction::FRem:
> +    case Instruction::Shl:
> +    case Instruction::LShr:
> +    case Instruction::AShr:
> +    case Instruction::And:
> +    case Instruction::Or:
> +    case Instruction::Xor: {
> +      Type *VTy = VectorType::get(I->getType(), VF);
> +          return VTTI->getInstrCost(I->getOpcode(), VTy);
> +    }
> +    case Instruction::Select: {
> +      SelectInst *SI = cast<SelectInst>(I);
> +      Type *VTy = VectorType::get(I->getType(), VF);
> +      const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
> +      bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop));
> +      Type *CondTy = SI->getCondition()->getType();
> +        if (ScalarCond)
> +          CondTy = VectorType::get(CondTy, VF);
> +
> +      return VTTI->getInstrCost(I->getOpcode(), VTy, CondTy);
> +    }
> +    case Instruction::ICmp:
> +    case Instruction::FCmp: {
> +      Type *VTy = VectorType::get(I->getOperand(0)->getType(), VF);
> +      return VTTI->getInstrCost(I->getOpcode(), VTy);
> +    }
> +    case Instruction::Store: {
> +      StoreInst *SI = cast<StoreInst>(I);
> +      Type *VTy = VectorType::get(SI->getValueOperand()->getType(),
> VF);
> +
> +      // Scalarized stores.
> +      if (!Legal->isConsecutiveGep(SI->getPointerOperand())) {
> +        unsigned Cost = 0;
> +        unsigned ExtCost =
> VTTI->getInstrCost(Instruction::ExtractElement, VTy);
> +        // The cost of extracting from the vector value.
> +        Cost += VF * ExtCost;
> +        // The cost of the scalar stores.
> +        Cost += VF * VTTI->getInstrCost(I->getOpcode(),
> VTy->getScalarType());

Why are you using getInstrCost here instead of getMemoryOpCost? I'd think that the alignment/address-space info. could be important.

> +        return Cost;
> +      }
> +
> +      // Wide stores.
> +      return VTTI->getMemoryOpCost(I->getOpcode(), VTy,
> SI->getAlignment(),
> +                                   SI->getPointerAddressSpace());
> +    }
> +    case Instruction::Load: {
> +      LoadInst *LI = cast<LoadInst>(I);
> +      Type *VTy = VectorType::get(I->getType(), VF);
> +
> +      // Scalarized loads.
> +      if (!Legal->isConsecutiveGep(LI->getPointerOperand())) {
> +        unsigned Cost = 0;
> +        unsigned InCost =
> VTTI->getInstrCost(Instruction::InsertElement, VTy);
> +        // The cost of inserting the loaded value into the result
> vector.
> +        Cost += VF * InCost;
> +        // The cost of the scalar stores.
> +        Cost += VF * VTTI->getInstrCost(I->getOpcode(),
> VTy->getScalarType());

[Same comment].

> +        return Cost;
> +      }
> +
> +      // Wide loads.
> +      return VTTI->getMemoryOpCost(I->getOpcode(), VTy,
> LI->getAlignment(),
> +                                   LI->getPointerAddressSpace());
> +    }
> +    case Instruction::ZExt:
> +    case Instruction::SExt:
> +    case Instruction::FPToUI:
> +    case Instruction::FPToSI:
> +    case Instruction::FPExt:
> +    case Instruction::PtrToInt:
> +    case Instruction::IntToPtr:
> +    case Instruction::SIToFP:
> +    case Instruction::UIToFP:
> +    case Instruction::Trunc:
> +    case Instruction::FPTrunc:
> +    case Instruction::BitCast: {
> +      Type *SrcTy = VectorType::get(I->getOperand(0)->getType(),
> VF);
> +      Type *DstTy = VectorType::get(I->getType(), VF);
> +      return VTTI->getInstrCost(I->getOpcode(), DstTy, SrcTy);
> +    }
> +    default: {
> +      // We are scalarizing the instruction. Return the cost of the
> scalar
> +      // instruction, plus the cost of insert and extract into
> vector
> +      // elements, times the vector width.
> +      unsigned Cost = 0;
> +      Type *Ty = I->getType();
> +
> +      if (!Ty->isVoidTy()) {
> +        Type *VTy = VectorType::get(Ty, VF);
> +        unsigned InsCost =
> VTTI->getInstrCost(Instruction::InsertElement, VTy);
> +        unsigned ExtCost =
> VTTI->getInstrCost(Instruction::ExtractElement, VTy);
> +        Cost += VF * (InsCost + ExtCost);
> +      }
> +
> +      /// We don't have any information on the scalar instruction,
> but maybe
> +      /// the target has.
> +      /// TODO: This may be a target-specific intrinsic.
> +      /// Need to add API for that.

Yep ;) -- I'd think that something like getInstrCost, but taking an intrinsic ID and a type will handle most cases.

Thanks again,
Hal

> +      Cost += VF * VTTI->getInstrCost(I->getOpcode(), Ty);
> +
> +      return Cost;
> +    }
> +  }// end of switch.
> +}
> +
> +
>  } // namespace
>  
>  char LoopVectorize::ID = 0;
> 
> Modified:
> llvm/trunk/test/Transforms/LoopVectorize/2012-10-20-infloop.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/2012-10-20-infloop.ll?rev=166620&r1=166619&r2=166620&view=diff
> ==============================================================================
> --- llvm/trunk/test/Transforms/LoopVectorize/2012-10-20-infloop.ll
> (original)
> +++ llvm/trunk/test/Transforms/LoopVectorize/2012-10-20-infloop.ll
> Wed Oct 24 15:36:32 2012
> @@ -1,4 +1,4 @@
> -; RUN: opt < %s  -loop-vectorize -dce
> +; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce
>  
>  ; Check that we don't fall into an infinite loop.
>  define void @test() nounwind {
> 
> Added: llvm/trunk/test/Transforms/LoopVectorize/cost-model.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/cost-model.ll?rev=166620&view=auto
> ==============================================================================
> --- llvm/trunk/test/Transforms/LoopVectorize/cost-model.ll (added)
> +++ llvm/trunk/test/Transforms/LoopVectorize/cost-model.ll Wed Oct 24
> 15:36:32 2012
> @@ -0,0 +1,40 @@
> +; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0
> -S | FileCheck %s
> +
> +target datalayout =
> "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
> +target triple = "x86_64-apple-macosx10.8.0"
> +
> + at c = common global [2048 x i32] zeroinitializer, align 16
> + at b = common global [2048 x i32] zeroinitializer, align 16
> + at d = common global [2048 x i32] zeroinitializer, align 16
> + at a = common global [2048 x i32] zeroinitializer, align 16
> +
> +; At this point the cost model is pretty bad and we are vectorizing
> the code below.
> +; TODO: This code should not be vectorized on x86.
> +;CHECK: cost_model_1
> +;CHECK: <4 x i32>
> +;CHECK: ret void
> +define void @cost_model_1() nounwind uwtable noinline ssp {
> +entry:
> +  br label %for.body
> +
> +for.body:                                         ; preds =
> %for.body, %entry
> +  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body
> ]
> +  %0 = shl nsw i64 %indvars.iv, 1
> +  %arrayidx = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %0
> +  %1 = load i32* %arrayidx, align 8
> +  %idxprom1 = sext i32 %1 to i64
> +  %arrayidx2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64
> %idxprom1
> +  %2 = load i32* %arrayidx2, align 4
> +  %arrayidx4 = getelementptr inbounds [2048 x i32]* @d, i64 0, i64
> %indvars.iv
> +  %3 = load i32* %arrayidx4, align 4
> +  %idxprom5 = sext i32 %3 to i64
> +  %arrayidx6 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64
> %idxprom5
> +  store i32 %2, i32* %arrayidx6, align 4
> +  %indvars.iv.next = add i64 %indvars.iv, 1
> +  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
> +  %exitcond = icmp eq i32 %lftr.wideiv, 256
> +  br i1 %exitcond, label %for.end, label %for.body
> +
> +for.end:                                          ; preds =
> %for.body
> +  ret void
> +}
> 
> Modified: llvm/trunk/test/Transforms/LoopVectorize/cpp-new-array.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/cpp-new-array.ll?rev=166620&r1=166619&r2=166620&view=diff
> ==============================================================================
> --- llvm/trunk/test/Transforms/LoopVectorize/cpp-new-array.ll
> (original)
> +++ llvm/trunk/test/Transforms/LoopVectorize/cpp-new-array.ll Wed Oct
> 24 15:36:32 2012
> @@ -1,4 +1,4 @@
> -; RUN: opt < %s  -loop-vectorize -dce -instcombine -licm -S |
> FileCheck %s
> +; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce
> -instcombine -licm -S | FileCheck %s
>  
>  target datalayout =
>  "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
>  target triple = "x86_64-apple-macosx10.8.0"
> 
> Modified: llvm/trunk/test/Transforms/LoopVectorize/gcc-examples.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/gcc-examples.ll?rev=166620&r1=166619&r2=166620&view=diff
> ==============================================================================
> --- llvm/trunk/test/Transforms/LoopVectorize/gcc-examples.ll
> (original)
> +++ llvm/trunk/test/Transforms/LoopVectorize/gcc-examples.ll Wed Oct
> 24 15:36:32 2012
> @@ -1,4 +1,4 @@
> -; RUN: opt < %s  -loop-vectorize -dce -instcombine -licm -S |
> FileCheck %s
> +; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce
> -instcombine -licm -S | FileCheck %s
>  
>  target datalayout =
>  "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
>  target triple = "x86_64-apple-macosx10.8.0"
> 
> Modified: llvm/trunk/test/Transforms/LoopVectorize/increment.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/increment.ll?rev=166620&r1=166619&r2=166620&view=diff
> ==============================================================================
> --- llvm/trunk/test/Transforms/LoopVectorize/increment.ll (original)
> +++ llvm/trunk/test/Transforms/LoopVectorize/increment.ll Wed Oct 24
> 15:36:32 2012
> @@ -1,4 +1,4 @@
> -; RUN: opt < %s  -loop-vectorize -dce -instcombine -licm -S |
> FileCheck %s
> +; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce
> -instcombine -licm -S | FileCheck %s
>  
>  target datalayout =
>  "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
>  target triple = "x86_64-apple-macosx10.8.0"
> 
> Modified: llvm/trunk/test/Transforms/LoopVectorize/induction_plus.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/induction_plus.ll?rev=166620&r1=166619&r2=166620&view=diff
> ==============================================================================
> --- llvm/trunk/test/Transforms/LoopVectorize/induction_plus.ll
> (original)
> +++ llvm/trunk/test/Transforms/LoopVectorize/induction_plus.ll Wed
> Oct 24 15:36:32 2012
> @@ -1,4 +1,4 @@
> -; RUN: opt < %s -loop-vectorize -instcombine -S | FileCheck %s
> +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -instcombine
> -S | FileCheck %s
>  
>  target datalayout =
>  "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
>  target triple = "x86_64-apple-macosx10.8.0"
> 
> Modified: llvm/trunk/test/Transforms/LoopVectorize/non-const-n.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/non-const-n.ll?rev=166620&r1=166619&r2=166620&view=diff
> ==============================================================================
> --- llvm/trunk/test/Transforms/LoopVectorize/non-const-n.ll
> (original)
> +++ llvm/trunk/test/Transforms/LoopVectorize/non-const-n.ll Wed Oct
> 24 15:36:32 2012
> @@ -1,4 +1,4 @@
> -; RUN: opt < %s  -loop-vectorize -dce -instcombine -licm -S |
> FileCheck %s
> +; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce
> -instcombine -licm -S | FileCheck %s
>  
>  target datalayout =
>  "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
>  target triple = "x86_64-apple-macosx10.8.0"
> 
> Modified: llvm/trunk/test/Transforms/LoopVectorize/read-only.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/read-only.ll?rev=166620&r1=166619&r2=166620&view=diff
> ==============================================================================
> --- llvm/trunk/test/Transforms/LoopVectorize/read-only.ll (original)
> +++ llvm/trunk/test/Transforms/LoopVectorize/read-only.ll Wed Oct 24
> 15:36:32 2012
> @@ -1,4 +1,4 @@
> -; RUN: opt < %s  -loop-vectorize -dce -instcombine -licm -S |
> FileCheck %s
> +; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce
> -instcombine -licm -S | FileCheck %s
>  
>  target datalayout =
>  "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
>  target triple = "x86_64-apple-macosx10.8.0"
> 
> Modified: llvm/trunk/test/Transforms/LoopVectorize/reduction.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/reduction.ll?rev=166620&r1=166619&r2=166620&view=diff
> ==============================================================================
> --- llvm/trunk/test/Transforms/LoopVectorize/reduction.ll (original)
> +++ llvm/trunk/test/Transforms/LoopVectorize/reduction.ll Wed Oct 24
> 15:36:32 2012
> @@ -1,4 +1,4 @@
> -; RUN: opt < %s  -loop-vectorize -dce -instcombine -licm -S |
> FileCheck %s
> +; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce
> -instcombine -licm -S | FileCheck %s
>  
>  target datalayout =
>  "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
>  target triple = "x86_64-apple-macosx10.8.0"
> 
> Modified: llvm/trunk/test/Transforms/LoopVectorize/scalar-select.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/scalar-select.ll?rev=166620&r1=166619&r2=166620&view=diff
> ==============================================================================
> --- llvm/trunk/test/Transforms/LoopVectorize/scalar-select.ll
> (original)
> +++ llvm/trunk/test/Transforms/LoopVectorize/scalar-select.ll Wed Oct
> 24 15:36:32 2012
> @@ -1,4 +1,4 @@
> -; RUN: opt < %s  -loop-vectorize -dce -instcombine -licm -S |
> FileCheck %s
> +; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce
> -instcombine -licm -S | FileCheck %s
>  
>  target datalayout =
>  "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
>  target triple = "x86_64-apple-macosx10.8.0"
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> 

-- 
Hal Finkel
Postdoctoral Appointee
Leadership Computing Facility
Argonne National Laboratory