[llvm] r182508 - SLPVectorizer: Change the order in which new instructions are added to the function.

Nadav Rotem nrotem at apple.com
Wed May 22 12:47:33 PDT 2013


Author: nadav
Date: Wed May 22 14:47:32 2013
New Revision: 182508

URL: http://llvm.org/viewvc/llvm-project?rev=182508&view=rev
Log:
SLPVectorizer: Change the order in which new instructions are added to the function.
We are not working on a DAG and I ran into a number of problems when I enabled the vectorizations of 'diamond-trees' (trees that share leafs).
* Imroved the numbering API.
* Changed the placement of new instructions to the last root.
* Fixed a bug with external tree users with non-zero lane.
* Fixed a bug in the placement of in-tree users.


Added:
    llvm/trunk/test/Transforms/SLPVectorizer/X86/crash_povray.ll
    llvm/trunk/test/Transforms/SLPVectorizer/X86/in-tree-user.ll
    llvm/trunk/test/Transforms/SLPVectorizer/X86/ordering.ll
Modified:
    llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp
    llvm/trunk/lib/Transforms/Vectorize/VecUtils.h
    llvm/trunk/test/Transforms/SLPVectorizer/X86/multi_user.ll

Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=182508&r1=182507&r2=182508&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Wed May 22 14:47:32 2013
@@ -20,6 +20,7 @@
 
 #include "VecUtils.h"
 #include "llvm/Transforms/Vectorize.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
@@ -47,7 +48,7 @@ namespace {
 
 /// The SLPVectorizer Pass.
 struct SLPVectorizer : public FunctionPass {
-  typedef std::map<Value*, BoUpSLP::StoreList> StoreListMap;
+  typedef MapVector<Value*, BoUpSLP::StoreList> StoreListMap;
 
   /// Pass identification, replacement for typeid
   static char ID;

Modified: llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp?rev=182508&r1=182507&r2=182508&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp (original)
+++ llvm/trunk/lib/Transforms/Vectorize/VecUtils.cpp Wed May 22 14:47:32 2013
@@ -46,7 +46,7 @@ namespace llvm {
 
 BoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl,
                  TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp) :
-  BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa), L(Lp)  {
+  Builder(S->getContext()), BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa), L(Lp) {
   numberInstructions();
 }
 
@@ -121,6 +121,7 @@ bool BoUpSLP::vectorizeStoreChain(ArrayR
     DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n");
     if (Cost < CostThreshold) {
       DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");
+      Builder.SetInsertPoint(getInsertionPoint(getLastIndex(Operands,VF)));
       vectorizeTree(Operands, VF);
       i += VF - 1;
       Changed = true;
@@ -131,7 +132,7 @@ bool BoUpSLP::vectorizeStoreChain(ArrayR
 }
 
 bool BoUpSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold) {
-  ValueSet Heads, Tails;
+  SetVector<Value*> Heads, Tails;
   SmallDenseMap<Value*, Value*> ConsecutiveChain;
 
   // We may run into multiple chains that merge into a single chain. We mark the
@@ -152,7 +153,8 @@ bool BoUpSLP::vectorizeStores(ArrayRef<S
     }
 
   // For stores that start but don't end a link in the chain:
-  for (ValueSet::iterator it = Heads.begin(), e = Heads.end();it != e; ++it) {
+  for (SetVector<Value*>::iterator it = Heads.begin(), e = Heads.end();
+       it != e; ++it) {
     if (Tails.count(*it)) continue;
 
     // We found a store instr that starts a chain. Now follow the chain and try
@@ -224,9 +226,14 @@ Value *BoUpSLP::isUnsafeToSink(Instructi
 }
 
 void BoUpSLP::vectorizeArith(ArrayRef<Value *> Operands) {
+  int LastIdx = getLastIndex(Operands, Operands.size());
+  Instruction *Loc = getInsertionPoint(LastIdx);
+  Builder.SetInsertPoint(Loc);
+
+  assert(getFirstUserIndex(Operands, Operands.size()) > LastIdx  &&
+         "Vectorizing with in-tree users");
+
   Value *Vec = vectorizeTree(Operands, Operands.size());
-  BasicBlock::iterator Loc = cast<Instruction>(Vec);
-  IRBuilder<> Builder(++Loc);
   // After vectorizing the operands we need to generate extractelement
   // instructions and replace all of the uses of the scalar values with
   // the values that we extracted from the vectorized tree.
@@ -246,7 +253,13 @@ int BoUpSLP::getTreeCost(ArrayRef<Value
   MustExtract.clear();
 
   // Find the location of the last root.
-  unsigned LastRootIndex = InstrIdx[GetLastInstr(VL, VL.size())];
+  int LastRootIndex = getLastIndex(VL, VL.size());
+  int FirstUserIndex = getFirstUserIndex(VL, VL.size());
+
+  // Don't vectorize if there are users of the tree roots inside the tree
+  // itself.
+  if (LastRootIndex > FirstUserIndex)
+    return max_cost;
 
   // Scan the tree and find which value is used by which lane, and which values
   // must be scalarized.
@@ -254,7 +267,7 @@ int BoUpSLP::getTreeCost(ArrayRef<Value
 
   // Check that instructions with multiple users can be vectorized. Mark unsafe
   // instructions.
-  for (ValueSet::iterator it = MultiUserVals.begin(),
+  for (SetVector<Value*>::iterator it = MultiUserVals.begin(),
        e = MultiUserVals.end(); it != e; ++it) {
     // Check that all of the users of this instr are within the tree
     // and that they are all from the same lane.
@@ -267,18 +280,21 @@ int BoUpSLP::getTreeCost(ArrayRef<Value
         // We don't have an ordering problem if the user is not in this basic
         // block.
         Instruction *Inst = cast<Instruction>(*I);
-        if (Inst->getParent() == BB) {
-          // We don't have an ordering problem if the user is after the last
-          // root.
-          unsigned Idx = InstrIdx[Inst];
-          if (Idx < LastRootIndex) {
-            MustScalarize.insert(*it);
-            DEBUG(dbgs()<<"SLP: Adding to MustScalarize "
-                  "because of an unsafe out of tree usage.\n");
-            break;
-          }
+        if (Inst->getParent() != BB) {
+          MustExtract.insert(*it);
+          continue;
+        }
+
+        // We don't have an ordering problem if the user is after the last root.
+        int Idx = InstrIdx[Inst];
+        if (Idx < LastRootIndex) {
+          MustScalarize.insert(*it);
+          DEBUG(dbgs()<<"SLP: Adding to MustScalarize "
+                "because of an unsafe out of tree usage.\n");
+          break;
         }
 
+
         DEBUG(dbgs()<<"SLP: Adding to MustExtract "
               "because of a safe out of tree usage.\n");
         MustExtract.insert(*it);
@@ -332,14 +348,6 @@ void BoUpSLP::getTreeUses_rec(ArrayRef<V
     if (!I || Opcode != I->getOpcode()) return;
   }
 
-  // Mark instructions with multiple users.
-  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
-    Instruction *I = dyn_cast<Instruction>(VL[i]);
-    // Remember to check if all of the users of this instr are vectorized
-    // within our tree.
-    if (I && I->getNumUses() > 1) MultiUserVals.insert(I);
-  }
-
   for (int i = 0, e = VL.size(); i < e; ++i) {
     // Check that the instruction is only used within
     // one lane.
@@ -348,6 +356,19 @@ void BoUpSLP::getTreeUses_rec(ArrayRef<V
     LaneMap[VL[i]] = i;
   }
 
+  // Mark instructions with multiple users.
+  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
+    Instruction *I = dyn_cast<Instruction>(VL[i]);
+    // Remember to check if all of the users of this instr are vectorized
+    // within our tree. At depth zero we have no local users, only external
+    // users that we don't care about.
+    if (Depth && I && I->getNumUses() > 1) {
+      DEBUG(dbgs()<<"SLP: Adding to MultiUserVals "
+            "because it has multiple users:" << *I << " \n");
+      MultiUserVals.insert(I);
+    }
+  }
+
   switch (Opcode) {
     case Instruction::ZExt:
     case Instruction::SExt:
@@ -461,11 +482,9 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Va
 
   // Check if it is safe to sink the loads or the stores.
   if (Opcode == Instruction::Load || Opcode == Instruction::Store) {
-    int MaxIdx = InstrIdx[VL0];
-    for (unsigned i = 1, e = VL.size(); i < e; ++i )
-      MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
-
+    int MaxIdx = getLastIndex(VL, VL.size());
     Instruction *Last = InstrVec[MaxIdx];
+
     for (unsigned i = 0, e = VL.size(); i < e; ++i ) {
       if (VL[i] == Last) continue;
       Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last);
@@ -592,15 +611,40 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Va
   }
 }
 
-Instruction *BoUpSLP::GetLastInstr(ArrayRef<Value *> VL, unsigned VF) {
+int BoUpSLP::getLastIndex(ArrayRef<Value *> VL, unsigned VF) {
   int MaxIdx = InstrIdx[BB->getFirstNonPHI()];
   for (unsigned i = 0; i < VF; ++i )
     MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
-  return InstrVec[MaxIdx + 1];
+  return MaxIdx;
+}
+
+int BoUpSLP::getFirstUserIndex(ArrayRef<Value *> VL, unsigned VF) {
+  // Find the first user of the values.
+  int FirstUser = InstrVec.size();
+  for (unsigned i = 0; i < VF; ++i) {
+    for (Value::use_iterator U = VL[i]->use_begin(), UE = VL[i]->use_end();
+         U != UE; ++U) {
+      Instruction *Instr = dyn_cast<Instruction>(*U);
+      if (!Instr || Instr->getParent() != BB)
+        continue;
+
+      FirstUser = std::min(FirstUser, InstrIdx[Instr]);
+    }
+  }
+  return FirstUser;
+}
+
+int BoUpSLP::getLastIndex(Instruction *I, Instruction *J) {
+  assert(I->getParent() == BB && "Invalid parent for instruction I");
+  assert(J->getParent() == BB && "Invalid parent for instruction J");
+  return std::max(InstrIdx[I],InstrIdx[J]);
+}
+
+Instruction *BoUpSLP::getInsertionPoint(unsigned Index) {
+  return InstrVec[Index + 1];
 }
 
 Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) {
-  IRBuilder<> Builder(GetLastInstr(VL, Ty->getNumElements()));
   Value *Vec = UndefValue::get(Ty);
   for (unsigned i=0; i < Ty->getNumElements(); ++i) {
     // Generate the 'InsertElement' instruction.
@@ -620,18 +664,26 @@ Value *BoUpSLP::Scalarize(ArrayRef<Value
 Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) {
   Value *V = vectorizeTree_rec(VL, VF);
 
-  Instruction *LastInstr = GetLastInstr(VL, VL.size());
-  int LastInstrIdx = InstrIdx[LastInstr];
-  IRBuilder<> Builder(LastInstr);
-  for (ValueSet::iterator it = MustExtract.begin(), e = MustExtract.end();
-       it != e; ++it) {
+  int LastInstrIdx = getLastIndex(VL, VL.size());
+  for (SetVector<Value*>::iterator it = MustExtract.begin(),
+       e = MustExtract.end(); it != e; ++it) {
     Instruction *I = cast<Instruction>(*it);
+
+    // This is a scalarized value, so we can use the original value.
+    // No need to extract from the vector.
+    if (!LaneMap.count(I))
+      continue;
+
     Value *Vec = VectorizedValues[I];
-    assert(LaneMap.count(I) && "Unable to find the lane for the external use");
+    // We decided not to vectorize I because one of its users was not
+    // vectorizerd. This is okay.
+    if (!Vec)
+      continue;
+
     Value *Idx = Builder.getInt32(LaneMap[I]);
     Value *Extract = Builder.CreateExtractElement(Vec, Idx);
     bool Replaced = false;
-    for (Value::use_iterator U = I->use_begin(), UE = U->use_end(); U != UE;
+    for (Value::use_iterator U = I->use_begin(), UE = I->use_end(); U != UE;
          ++U) {
       Instruction *UI = cast<Instruction>(*U);
       if (UI->getParent() != I->getParent() || InstrIdx[UI] > LastInstrIdx)
@@ -670,19 +722,27 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayR
   }
 
   // Check that this is a simple vector constant.
-  if (AllConst || AllSameScalar) return Scalarize(VL, VecTy);
+  if (AllConst || AllSameScalar)
+    return Scalarize(VL, VecTy);
 
   // Scalarize unknown structures.
   Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
-  if (!VL0) return Scalarize(VL, VecTy);
+  if (!VL0)
+    return Scalarize(VL, VecTy);
 
-  if (VectorizedValues.count(VL0)) return VectorizedValues[VL0];
+  if (VectorizedValues.count(VL0)) {
+    Value * Vec = VectorizedValues[VL0];
+    for (int i = 0; i < VF; ++i)
+      VectorizedValues[VL[i]] = Vec;
+    return Vec;
+  }
 
   unsigned Opcode = VL0->getOpcode();
   for (unsigned i = 0, e = VF; i < e; ++i) {
     Instruction *I = dyn_cast<Instruction>(VL[i]);
     // If not all of the instructions are identical then we have to scalarize.
-    if (!I || Opcode != I->getOpcode()) return Scalarize(VL, VecTy);
+    if (!I || Opcode != I->getOpcode())
+      return Scalarize(VL, VecTy);
   }
 
   switch (Opcode) {
@@ -702,7 +762,6 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayR
     for (int i = 0; i < VF; ++i)
       INVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
     Value *InVec = vectorizeTree_rec(INVL, VF);
-    IRBuilder<> Builder(GetLastInstr(VL, VF));
     CastInst *CI = dyn_cast<CastInst>(VL0);
     Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
 
@@ -737,7 +796,6 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayR
 
     Value *LHS = vectorizeTree_rec(LHSVL, VF);
     Value *RHS = vectorizeTree_rec(RHSVL, VF);
-    IRBuilder<> Builder(GetLastInstr(VL, VF));
     BinaryOperator *BinOp = cast<BinaryOperator>(VL0);
     Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS,RHS);
 
@@ -755,10 +813,13 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayR
       if (!isConsecutiveAccess(VL[i-1], VL[i]))
         return Scalarize(VL, VecTy);
 
-    IRBuilder<> Builder(GetLastInstr(VL, VF));
-    Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(),
-                                          VecTy->getPointerTo());
-    LI = Builder.CreateLoad(VecPtr);
+    // Loads are inserted at the head of the tree because we don't want to sink
+    // them all the way down past store instructions.
+    Instruction *Loc = getInsertionPoint(getLastIndex(VL, VL.size()));
+    IRBuilder<> LoadBuilder(Loc);
+    Value *VecPtr = LoadBuilder.CreateBitCast(LI->getPointerOperand(),
+                                              VecTy->getPointerTo());
+    LI = LoadBuilder.CreateLoad(VecPtr);
     LI->setAlignment(Alignment);
 
     for (int i = 0; i < VF; ++i)
@@ -775,8 +836,6 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayR
       ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand());
 
     Value *VecValue = vectorizeTree_rec(ValueOp, VF);
-
-    IRBuilder<> Builder(GetLastInstr(VL, VF));
     Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
                                           VecTy->getPointerTo());
     Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment);

Modified: llvm/trunk/lib/Transforms/Vectorize/VecUtils.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/VecUtils.h?rev=182508&r1=182507&r2=182508&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Vectorize/VecUtils.h (original)
+++ llvm/trunk/lib/Transforms/Vectorize/VecUtils.h Wed May 22 14:47:32 2013
@@ -16,9 +16,11 @@
 #define LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/IR/IRBuilder.h"
 #include <vector>
 
 namespace llvm {
@@ -107,9 +109,19 @@ private:
   /// \returns the pointer to the barrier instruction if we can't sink.
   Value *isUnsafeToSink(Instruction *Src, Instruction *Dst);
 
-  /// \returns the instruction that appears last in the BB from \p VL.
+  /// \returns the index of the last instrucion in the BB from \p VL.
   /// Only consider the first \p VF elements.
-  Instruction *GetLastInstr(ArrayRef<Value *> VL, unsigned VF);
+  int getLastIndex(ArrayRef<Value *> VL, unsigned VF);
+
+  /// \returns the index of the first User of \p VL.
+  /// Only consider the first \p VF elements.
+  int getFirstUserIndex(ArrayRef<Value *> VL, unsigned VF);
+
+  /// \returns the instruction \p I or \p Jt hat appears last in the BB .
+  int getLastIndex(Instruction *I, Instruction *J);
+
+  /// \returns the insertion point for \p Index.
+  Instruction *getInsertionPoint(unsigned Index);
 
   /// \returns a vector from a collection of scalars in \p VL.
   Value *Scalarize(ArrayRef<Value *> VL, VectorType *Ty);
@@ -130,17 +142,17 @@ private:
   /// Contains values that have users outside of the vectorized graph.
   /// We need to generate extract instructions for these values.
   /// NOTICE: The vectorization methods also use this set.
-  ValueSet MustExtract;
+  SetVector<Value*> MustExtract;
 
   /// Contains a list of values that are used outside the current tree. This
   /// set must be reset between runs.
-  ValueSet MultiUserVals;
+  SetVector<Value*> MultiUserVals;
   /// Maps values in the tree to the vector lanes that uses them. This map must
   /// be reset between runs of getCost.
   std::map<Value*, int> LaneMap;
   /// A list of instructions to ignore while sinking
   /// memory instructions. This map must be reset between runs of getCost.
-  SmallPtrSet<Value *, 8> MemBarrierIgnoreList;
+  ValueSet MemBarrierIgnoreList;
 
   // -- Containers that are used during vectorizeTree -- //
 
@@ -155,6 +167,9 @@ private:
   /// Iterating over this list is faster than calling LICM.
   ValueList GatherInstructions;
 
+  /// Instruction builder to construct the vectorized tree.
+  IRBuilder<> Builder;
+
   // Analysis and block reference.
   BasicBlock *BB;
   ScalarEvolution *SE;

Added: llvm/trunk/test/Transforms/SLPVectorizer/X86/crash_povray.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/crash_povray.ll?rev=182508&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/crash_povray.ll (added)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/crash_povray.ll Wed May 22 14:47:32 2013
@@ -0,0 +1,34 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%struct.hoge = type { double, double, double}
+
+define void @zot(%struct.hoge* %arg) {
+bb:
+  %tmp = load double* undef, align 8
+  %tmp1 = fsub double %tmp, undef
+  %tmp2 = load double* undef, align 8
+  %tmp3 = fsub double %tmp2, undef
+  %tmp4 = fmul double %tmp3, undef
+  %tmp5 = fmul double %tmp3, undef
+  %tmp6 = fsub double %tmp5, undef
+  %tmp7 = getelementptr inbounds %struct.hoge* %arg, i64 0, i32 1
+  store double %tmp6, double* %tmp7, align 8
+  %tmp8 = fmul double %tmp1, undef
+  %tmp9 = fsub double %tmp8, undef
+  %tmp10 = getelementptr inbounds %struct.hoge* %arg, i64 0, i32 2
+  store double %tmp9, double* %tmp10, align 8
+  br i1 undef, label %bb11, label %bb12
+
+bb11:                                             ; preds = %bb
+  br label %bb14
+
+bb12:                                             ; preds = %bb
+  %tmp13 = fmul double undef, %tmp2
+  br label %bb14
+
+bb14:                                             ; preds = %bb12, %bb11
+  ret void
+}

Added: llvm/trunk/test/Transforms/SLPVectorizer/X86/in-tree-user.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/in-tree-user.ll?rev=182508&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/in-tree-user.ll (added)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/in-tree-user.ll Wed May 22 14:47:32 2013
@@ -0,0 +1,50 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.0"
+
+ at .str = private unnamed_addr constant [6 x i8] c"bingo\00", align 1
+
+; We can't vectorize when the roots are used inside the tree.
+;CHECK: @in_tree_user
+;CHECK-NOT: load <2 x double>
+;CHECK: ret
+define void @in_tree_user(double* nocapture %A, i32 %n) {
+entry:
+  %conv = sitofp i32 %n to double
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
+  %0 = shl nsw i64 %indvars.iv, 1
+  %arrayidx = getelementptr inbounds double* %A, i64 %0
+  %1 = load double* %arrayidx, align 8
+  %mul1 = fmul double %conv, %1
+  %mul2 = fmul double %mul1, 7.000000e+00
+  %add = fadd double %mul2, 5.000000e+00
+  %BadValue = fadd double %add, %add    ; <------------------ In tree user.
+  %2 = or i64 %0, 1
+  %arrayidx6 = getelementptr inbounds double* %A, i64 %2
+  %3 = load double* %arrayidx6, align 8
+  %mul8 = fmul double %conv, %3
+  %mul9 = fmul double %mul8, 4.000000e+00
+  %add10 = fadd double %mul9, 9.000000e+00
+  %cmp11 = fcmp ogt double %add, %add10
+  br i1 %cmp11, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0))
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 100
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc
+  ret void
+}
+
+declare i32 @printf(i8* nocapture, ...)
+

Modified: llvm/trunk/test/Transforms/SLPVectorizer/X86/multi_user.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/multi_user.ll?rev=182508&r1=182507&r2=182508&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/multi_user.ll (original)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/multi_user.ll Wed May 22 14:47:32 2013
@@ -12,8 +12,8 @@ target triple = "x86_64-apple-macosx10.7
 ;}
 
 ;CHECK: @foo
-;CHECK: insertelement <4 x i32>
 ;CHECK: load <4 x i32>
+;CHECK: insertelement <4 x i32>
 ;CHECK: add <4 x i32>
 ;CHECK: store <4 x i32>
 ;CHECK: ret

Added: llvm/trunk/test/Transforms/SLPVectorizer/X86/ordering.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SLPVectorizer/X86/ordering.ll?rev=182508&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SLPVectorizer/X86/ordering.ll (added)
+++ llvm/trunk/test/Transforms/SLPVectorizer/X86/ordering.ll Wed May 22 14:47:32 2013
@@ -0,0 +1,19 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define void @updateModelQPFrame(i32 %m_Bits) {
+entry:
+  %0 = load double* undef, align 8
+  %mul = fmul double undef, %0
+  %mul2 = fmul double undef, %mul
+  %mul4 = fmul double %0, %mul2
+  %mul5 = fmul double undef, 4.000000e+00
+  %mul7 = fmul double undef, %mul5
+  %conv = sitofp i32 %m_Bits to double
+  %mul8 = fmul double %conv, %mul7
+  %add = fadd double %mul4, %mul8
+  %cmp11 = fcmp olt double %add, 0.000000e+00
+  ret void
+}





More information about the llvm-commits mailing list