<div dir="ltr">Hi Gil,<div><br></div><div>Just to make sure you've seen the bug report (bugzilla isn't sending email right now, because of the spambots) - this caused/exposed PR30172.</div><div><br></div><div>Thanks,</div><div>  Michael</div></div><div class="gmail_extra"><br><div class="gmail_quote">On Wed, Aug 24, 2016 at 4:37 AM, Gil Rapaport via llvm-commits <span dir="ltr"><<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: gilr<br>

Date: Wed Aug 24 06:37:57 2016<br>

New Revision: 279620<br>

<br>

URL: <a href="http://llvm.org/viewvc/llvm-project?rev=279620&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project?rev=279620&view=rev</a><br>

Log:<br>

[Loop Vectorizer] Support predication of div/rem<br>

<br>

div/rem instructions in basic blocks that require predication currently prevent<br>

vectorization. This patch extends the existing mechanism for predicating stores<br>

to handle other instructions and leverages it to predicate divs and rems.<br>

<br>

Differential Revision: <a href="https://reviews.llvm.org/D22918" rel="noreferrer" target="_blank">https://reviews.llvm.org/<wbr>D22918</a><br>

<br>

Added:<br>

    llvm/trunk/test/Transforms/<wbr>LoopVectorize/if-pred-non-<wbr>void.ll<br>

    llvm/trunk/test/Transforms/<wbr>LoopVectorize/if-pred-not-<wbr>when-safe.ll<br>

Modified:<br>

    llvm/trunk/lib/Transforms/<wbr>Vectorize/LoopVectorize.cpp<br>

    llvm/trunk/test/Transforms/<wbr>LoopVectorize/if-pred-stores.<wbr>ll<br>

<br>

Modified: llvm/trunk/lib/Transforms/<wbr>Vectorize/LoopVectorize.cpp<br>

URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=279620&r1=279619&r2=279620&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/lib/<wbr>Transforms/Vectorize/<wbr>LoopVectorize.cpp?rev=279620&<wbr>r1=279619&r2=279620&view=diff</a><br>

==============================<wbr>==============================<wbr>==================<br>

--- llvm/trunk/lib/Transforms/<wbr>Vectorize/LoopVectorize.cpp (original)<br>

+++ llvm/trunk/lib/Transforms/<wbr>Vectorize/LoopVectorize.cpp Wed Aug 24 06:37:57 2016<br>

@@ -386,8 +386,9 @@ protected:<br>

   /// See PR14725.<br>

   void fixLCSSAPHIs();<br>

<br>

-  /// Predicate conditional stores on their respective conditions.<br>

-  void predicateStores();<br>

+  /// Predicate conditional instructions that require predication on their<br>

+  /// respective conditions.<br>

+  void predicateInstructions();<br>

<br>

   /// Shrinks vector element sizes based on information in "MinBWs".<br>

   void truncateToMinimalBitwidths();<br>

@@ -414,11 +415,11 @@ protected:<br>

   void updateAnalysis();<br>

<br>

   /// This instruction is un-vectorizable. Implement it as a sequence<br>

-  /// of scalars. If \p IfPredicateStore is true we need to 'hide' each<br>

+  /// of scalars. If \p IfPredicateInstr is true we need to 'hide' each<br>

   /// scalarized instruction behind an if block predicated on the control<br>

   /// dependence of the instruction.<br>

   virtual void scalarizeInstruction(<wbr>Instruction *Instr,<br>

-                                    bool IfPredicateStore = false);<br>

+                                    bool IfPredicateInstr = false);<br>

<br>

   /// Vectorize Load and Store instructions,<br>

   virtual void vectorizeMemoryInstruction(<wbr>Instruction *Instr);<br>

@@ -624,7 +625,7 @@ protected:<br>

<br>

   /// Store instructions that should be predicated, as a pair<br>

   ///   <StoreInst, Predicate><br>

-  SmallVector<std::pair<<wbr>StoreInst *, Value *>, 4> PredicatedStores;<br>

+  SmallVector<std::pair<<wbr>Instruction *, Value *>, 4> PredicatedInstructions;<br>

   EdgeMaskCache MaskCache;<br>

   /// Trip count of the original loop.<br>

   Value *TripCount;<br>

@@ -654,7 +655,7 @@ public:<br>

<br>

 private:<br>

   void scalarizeInstruction(<wbr>Instruction *Instr,<br>

-                            bool IfPredicateStore = false) override;<br>

+                            bool IfPredicateInstr = false) override;<br>

   void vectorizeMemoryInstruction(<wbr>Instruction *Instr) override;<br>

   Value *getBroadcastInstrs(Value *V) override;<br>

   Value *getStepVector(Value *Val, int StartIdx, Value *Step,<br>

@@ -2767,8 +2768,11 @@ void InnerLoopVectorizer::<wbr>vectorizeMemor<br>

 }<br>

<br>

 void InnerLoopVectorizer::<wbr>scalarizeInstruction(<wbr>Instruction *Instr,<br>

-                                               bool IfPredicateStore) {<br>

+                                               bool IfPredicateInstr) {<br>

   assert(!Instr->getType()-><wbr>isAggregateType() && "Can't handle vectors");<br>

+  DEBUG(dbgs() << "LV: Scalarizing"<br>

+               << (IfPredicateInstr ? " and predicating:" : ":") << *Instr<br>

+               << '\n');<br>

   // Holds vector parameters or scalars, in case of uniform vals.<br>

   SmallVector<VectorParts, 4> Params;<br>

<br>

@@ -2812,7 +2816,7 @@ void InnerLoopVectorizer::<wbr>scalarizeInstr<br>

   VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);<br>

<br>

   VectorParts Cond;<br>

-  if (IfPredicateStore) {<br>

+  if (IfPredicateInstr) {<br>

     assert(Instr->getParent()-><wbr>getSinglePredecessor() &&<br>

            "Only support single predecessor blocks");<br>

     Cond = createEdgeMask(Instr-><wbr>getParent()-><wbr>getSinglePredecessor(),<br>

@@ -2826,7 +2830,7 @@ void InnerLoopVectorizer::<wbr>scalarizeInstr<br>

<br>

       // Start if-block.<br>

       Value *Cmp = nullptr;<br>

-      if (IfPredicateStore) {<br>

+      if (IfPredicateInstr) {<br>

         Cmp = Builder.CreateExtractElement(<wbr>Cond[Part], Builder.getInt32(Width));<br>

         Cmp = Builder.CreateICmp(ICmpInst::<wbr>ICMP_EQ, Cmp,<br>

                                  ConstantInt::get(Cmp->getType(<wbr>), 1));<br>

@@ -2865,9 +2869,8 @@ void InnerLoopVectorizer::<wbr>scalarizeInstr<br>

         VecResults[Part] = Builder.CreateInsertElement(<wbr>VecResults[Part], Cloned,<br>

                                                        Builder.getInt32(Width));<br>

       // End if-block.<br>

-      if (IfPredicateStore)<br>

-        PredicatedStores.push_back(<br>

-            std::make_pair(cast<StoreInst><wbr>(Cloned), Cmp));<br>

+      if (IfPredicateInstr)<br>

+        PredicatedInstructions.push_<wbr>back(std::make_pair(Cloned, Cmp));<br>

     }<br>

   }<br>

 }<br>

@@ -3398,9 +3401,13 @@ static Value *addFastMathFlag(Value *V)<br>

   return V;<br>

 }<br>

<br>

-/// Estimate the overhead of scalarizing a value. Insert and Extract are set if<br>

-/// the result needs to be inserted and/or extracted from vectors.<br>

+/// \brief Estimate the overhead of scalarizing a value based on its type.<br>

+/// Insert and Extract are set if the result needs to be inserted and/or<br>

+/// extracted from vectors.<br>

+/// If the instruction is also to be predicated, add the cost of a PHI<br>

+/// node to the insertion cost.<br>

 static unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract,<br>

+                                         bool Predicated,<br>

                                          const TargetTransformInfo &TTI) {<br>

   if (Ty->isVoidTy())<br>

     return 0;<br>

@@ -3409,15 +3416,58 @@ static unsigned getScalarizationOverhead<br>

   unsigned Cost = 0;<br>

<br>

   for (unsigned I = 0, E = Ty->getVectorNumElements(); I < E; ++I) {<br>

-    if (Insert)<br>

-      Cost += TTI.getVectorInstrCost(<wbr>Instruction::InsertElement, Ty, I);<br>

     if (Extract)<br>

       Cost += TTI.getVectorInstrCost(<wbr>Instruction::ExtractElement, Ty, I);<br>

+    if (Insert) {<br>

+      Cost += TTI.getVectorInstrCost(<wbr>Instruction::InsertElement, Ty, I);<br>

+      if (Predicated)<br>

+        Cost += TTI.getCFInstrCost(<wbr>Instruction::PHI);<br>

+    }<br>

   }<br>

<br>

+  // We assume that if-converted blocks have a 50% chance of being executed.<br>

+  // Predicated scalarized instructions are avoided due to the CF that bypasses<br>

+  // turned off lanes. The extracts and inserts will be sinked/hoisted to the<br>

+  // predicated basic-block and are subjected to the same assumption.<br>

+  if (Predicated)<br>

+    Cost /= 2;<br>

+<br>

   return Cost;<br>

 }<br>

<br>

+/// \brief Estimate the overhead of scalarizing an Instruction based on the<br>

+/// types of its operands and return value.<br>

+static unsigned getScalarizationOverhead(<wbr>SmallVectorImpl<Type *> &OpTys,<br>

+                                         Type *RetTy, bool Predicated,<br>

+                                         const TargetTransformInfo &TTI) {<br>

+  unsigned ScalarizationCost =<br>

+      getScalarizationOverhead(<wbr>RetTy, true, false, Predicated, TTI);<br>

+<br>

+  for (Type *Ty : OpTys)<br>

+    ScalarizationCost +=<br>

+        getScalarizationOverhead(Ty, false, true, Predicated, TTI);<br>

+<br>

+  return ScalarizationCost;<br>

+}<br>

+<br>

+/// \brief Estimate the overhead of scalarizing an instruction. This is a<br>

+/// convenience wrapper for the type-based getScalarizationOverhead API.<br>

+static unsigned getScalarizationOverhead(<wbr>Instruction *I, unsigned VF,<br>

+                                         bool Predicated,<br>

+                                         const TargetTransformInfo &TTI) {<br>

+  if (VF == 1)<br>

+    return 0;<br>

+<br>

+  Type *RetTy = ToVectorTy(I->getType(), VF);<br>

+<br>

+  SmallVector<Type *, 4> OpTys;<br>

+  unsigned OperandsNum = I->getNumOperands();<br>

+  for (unsigned OpInd = 0; OpInd < OperandsNum; ++OpInd)<br>

+    OpTys.push_back(ToVectorTy(I-><wbr>getOperand(OpInd)->getType(), VF));<br>

+<br>

+  return getScalarizationOverhead(<wbr>OpTys, RetTy, Predicated, TTI);<br>

+}<br>

+<br>

 // Estimate cost of a call instruction CI if it were vectorized with factor VF.<br>

 // Return the cost of the instruction, including scalarization overhead if it's<br>

 // needed. The flag NeedToScalarize shows if the call needs to be scalarized -<br>

@@ -3448,10 +3498,7 @@ static unsigned getVectorCallCost(CallIn<br>

<br>

   // Compute costs of unpacking argument values for the scalar calls and<br>

   // packing the return values to a vector.<br>

-  unsigned ScalarizationCost =<br>

-      getScalarizationOverhead(<wbr>RetTy, true, false, TTI);<br>

-  for (Type *Ty : Tys)<br>

-    ScalarizationCost += getScalarizationOverhead(Ty, false, true, TTI);<br>

+  unsigned ScalarizationCost = getScalarizationOverhead(Tys, RetTy, false, TTI);<br>

<br>

   unsigned Cost = ScalarCallCost * VF + ScalarizationCost;<br>

<br>

@@ -3871,7 +3918,7 @@ void InnerLoopVectorizer::<wbr>vectorizeLoop(<br>

   // Make sure DomTree is updated.<br>

   updateAnalysis();<br>

<br>

-  predicateStores();<br>

+  predicateInstructions();<br>

<br>

   // Remove redundant induction instructions.<br>

   cse(LoopVectorBody);<br>

@@ -4038,17 +4085,128 @@ void InnerLoopVectorizer::<wbr>fixLCSSAPHIs()<br>

                             LoopMiddleBlock);<br>

   }<br>

 }<br>

-<br>

-void InnerLoopVectorizer::<wbr>predicateStores() {<br>

-  for (auto KV : PredicatedStores) {<br>

+<br>

+void InnerLoopVectorizer::<wbr>predicateInstructions() {<br>

+<br>

+  // For each instruction I marked for predication on value C, split I into its<br>

+  // own basic block to form an if-then construct over C.<br>

+  // Since I may be fed by extractelement and/or be feeding an insertelement<br>

+  // generated during scalarization we try to move such instructions into the<br>

+  // predicated basic block as well. For the insertelement this also means that<br>

+  // the PHI will be created for the resulting vector rather than for the<br>

+  // scalar instruction.<br>

+  // So for some predicated instruction, e.g. the conditional sdiv in:<br>

+  //<br>

+  // for.body:<br>

+  //  ...<br>

+  //  %add = add nsw i32 %mul, %0<br>

+  //  %cmp5 = icmp sgt i32 %2, 7<br>

+  //  br i1 %cmp5, label %if.then, label %if.end<br>

+  //<br>

+  // if.then:<br>

+  //  %div = sdiv i32 %0, %1<br>

+  //  br label %if.end<br>

+  //<br>

+  // if.end:<br>

+  //  %x.0 = phi i32 [ %div, %if.then ], [ %add, %for.body ]<br>

+  //<br>

+  // the sdiv at this point is scalarized and if-converted using a select.<br>

+  // The inactive elements in the vector are not used, but the predicated<br>

+  // instruction is still executed for all vector elements, essentially:<br>

+  //<br>

+  // vector.body:<br>

+  //  ...<br>

+  //  %17 = add nsw <2 x i32> %16, %wide.load<br>

+  //  %29 = extractelement <2 x i32> %wide.load, i32 0<br>

+  //  %30 = extractelement <2 x i32> %wide.load51, i32 0<br>

+  //  %31 = sdiv i32 %29, %30<br>

+  //  %32 = insertelement <2 x i32> undef, i32 %31, i32 0<br>

+  //  %35 = extractelement <2 x i32> %wide.load, i32 1<br>

+  //  %36 = extractelement <2 x i32> %wide.load51, i32 1<br>

+  //  %37 = sdiv i32 %35, %36<br>

+  //  %38 = insertelement <2 x i32> %32, i32 %37, i32 1<br>

+  //  %predphi = select <2 x i1> %26, <2 x i32> %38, <2 x i32> %17<br>

+  //<br>

+  // Predication will now re-introduce the original control flow to avoid false<br>

+  // side-effects by the sdiv instructions on the inactive elements, yielding<br>

+  // (after cleanup):<br>

+  //<br>

+  // vector.body:<br>

+  //  ...<br>

+  //  %5 = add nsw <2 x i32> %4, %wide.load<br>

+  //  %8 = icmp sgt <2 x i32> %wide.load52, <i32 7, i32 7><br>

+  //  %9 = extractelement <2 x i1> %8, i32 0<br>

+  //  br i1 %9, label %pred.sdiv.if, label %pred.sdiv.continue<br>

+  //<br>

+  // pred.sdiv.if:<br>

+  //  %10 = extractelement <2 x i32> %wide.load, i32 0<br>

+  //  %11 = extractelement <2 x i32> %wide.load51, i32 0<br>

+  //  %12 = sdiv i32 %10, %11<br>

+  //  %13 = insertelement <2 x i32> undef, i32 %12, i32 0<br>

+  //  br label %pred.sdiv.continue<br>

+  //<br>

+  // pred.sdiv.continue:<br>

+  //  %14 = phi <2 x i32> [ undef, %vector.body ], [ %13, %pred.sdiv.if ]<br>

+  //  %15 = extractelement <2 x i1> %8, i32 1<br>

+  //  br i1 %15, label %pred.sdiv.if54, label %pred.sdiv.continue55<br>

+  //<br>

+  // pred.sdiv.if54:<br>

+  //  %16 = extractelement <2 x i32> %wide.load, i32 1<br>

+  //  %17 = extractelement <2 x i32> %wide.load51, i32 1<br>

+  //  %18 = sdiv i32 %16, %17<br>

+  //  %19 = insertelement <2 x i32> %14, i32 %18, i32 1<br>

+  //  br label %pred.sdiv.continue55<br>

+  //<br>

+  // pred.sdiv.continue55:<br>

+  //  %20 = phi <2 x i32> [ %14, %pred.sdiv.continue ], [ %19, %pred.sdiv.if54 ]<br>

+  //  %predphi = select <2 x i1> %8, <2 x i32> %20, <2 x i32> %5<br>

+<br>

+  for (auto KV : PredicatedInstructions) {<br>

     BasicBlock::iterator I(KV.first);<br>

-    auto *BB = SplitBlock(I->getParent(), &*std::next(I), DT, LI);<br>

+    BasicBlock *Head = I->getParent();<br>

+    auto *BB = SplitBlock(Head, &*std::next(I), DT, LI);<br>

     auto *T = SplitBlockAndInsertIfThen(KV.<wbr>second, &*I, /*Unreachable=*/false,<br>

                                         /*BranchWeights=*/nullptr, DT, LI);<br>

     I->moveBefore(T);<br>

-    I->getParent()->setName("pred.<wbr>store.if");<br>

-    BB->setName("pred.store.<wbr>continue");<br>

+    // Try to move any extractelement we may have created for the predicated<br>

+    // instruction into the Then block.<br>

+    for (Use &Op : I->operands()) {<br>

+      auto *OpInst = dyn_cast<ExtractElementInst>(&<wbr>*Op);<br>

+      if (OpInst && OpInst->hasOneUse()) // TODO: more accurately - hasOneUser()<br>

+        OpInst->moveBefore(&*I);<br>

+    }<br>

+<br>

+    I->getParent()->setName(Twine(<wbr>"pred.") + I->getOpcodeName() + ".if");<br>

+    BB->setName(Twine("pred.") + I->getOpcodeName() + ".continue");<br>

+<br>

+    // If the instruction is non-void create a Phi node at reconvergence point.<br>

+    if (!I->getType()->isVoidTy()) {<br>

+      Value *IncomingTrue = nullptr;<br>

+      Value *IncomingFalse = nullptr;<br>

+<br>

+      if (I->hasOneUse() && isa<InsertElementInst>(*I-><wbr>user_begin())) {<br>

+        // If the predicated instruction is feeding an insert-element, move it<br>

+        // into the Then block; Phi node will be created for the vector.<br>

+        InsertElementInst *IEI = cast<InsertElementInst>(*I-><wbr>user_begin());<br>

+        IEI->moveBefore(T);<br>

+        IncomingTrue = IEI; // the new vector with the inserted element.<br>

+        IncomingFalse = IEI->getOperand(0); // the unmodified vector<br>

+      } else {<br>

+        // Phi node will be created for the scalar predicated instruction.<br>

+        IncomingTrue = &*I;<br>

+        IncomingFalse = UndefValue::get(I->getType());<br>

+      }<br>

+<br>

+      BasicBlock *PostDom = I->getParent()-><wbr>getSingleSuccessor();<br>

+      assert(PostDom && "Then block has multiple successors");<br>

+      PHINode *Phi =<br>

+          PHINode::Create(IncomingTrue-><wbr>getType(), 2, "", &PostDom->front());<br>

+      IncomingTrue-><wbr>replaceAllUsesWith(Phi);<br>

+      Phi->addIncoming(<wbr>IncomingFalse, Head);<br>

+      Phi->addIncoming(IncomingTrue, I->getParent());<br>

+    }<br>

   }<br>

+<br>

   DEBUG(DT->verifyDomTree());<br>

 }<br>

<br>

@@ -4235,6 +4393,24 @@ void InnerLoopVectorizer::<wbr>widenPHIInstru<br>

   }<br>

 }<br>

<br>

+/// A helper function for checking whether an integer division-related<br>

+/// instruction may divide by zero (in which case it must be predicated if<br>

+/// executed conditionally in the scalar code).<br>

+/// TODO: It may be worthwhile to generalize and check isKnownNonZero().<br>

+/// Non-zero divisors that are non compile-time constants will not be<br>

+/// converted into multiplication, so we will still end up scalarizing<br>

+/// the division, but can do so w/o predication.<br>

+static bool mayDivideByZero(Instruction &I) {<br>

+  assert((I.getOpcode() == Instruction::UDiv ||<br>

+          I.getOpcode() == Instruction::SDiv ||<br>

+          I.getOpcode() == Instruction::URem ||<br>

+          I.getOpcode() == Instruction::SRem) &&<br>

+         "Unexpected instruction");<br>

+  Value *Divisor = I.getOperand(1);<br>

+  auto *CInt = dyn_cast<ConstantInt>(Divisor)<wbr>;<br>

+  return !CInt || CInt->isZero();<br>

+}<br>

+<br>

 void InnerLoopVectorizer::<wbr>vectorizeBlockInLoop(<wbr>BasicBlock *BB, PhiVector *PV) {<br>

   // For each instruction in the old loop.<br>

   for (Instruction &I : *BB) {<br>

@@ -4251,17 +4427,23 @@ void InnerLoopVectorizer::<wbr>vectorizeBlock<br>

       continue;<br>

     } // End of PHI.<br>

<br>

+    case Instruction::UDiv:<br>

+    case Instruction::SDiv:<br>

+    case Instruction::SRem:<br>

+    case Instruction::URem:<br>

+      // Scalarize with predication if this instruction may divide by zero and<br>

+      // block execution is conditional, otherwise fallthrough.<br>

+      if (mayDivideByZero(I) && Legal->blockNeedsPredication(<wbr>I.getParent())) {<br>

+        scalarizeInstruction(&I, true);<br>

+        continue;<br>

+      }<br>

     case Instruction::Add:<br>

     case Instruction::FAdd:<br>

     case Instruction::Sub:<br>

     case Instruction::FSub:<br>

     case Instruction::Mul:<br>

     case Instruction::FMul:<br>

-    case Instruction::UDiv:<br>

-    case Instruction::SDiv:<br>

     case Instruction::FDiv:<br>

-    case Instruction::URem:<br>

-    case Instruction::SRem:<br>

     case Instruction::FRem:<br>

     case Instruction::Shl:<br>

     case Instruction::LShr:<br>

@@ -5155,17 +5337,6 @@ bool LoopVectorizationLegality::<wbr>blockCan<br>

     }<br>

     if (I.mayThrow())<br>

       return false;<br>

-<br>

-    // The instructions below can trap.<br>

-    switch (I.getOpcode()) {<br>

-    default:<br>

-      continue;<br>

-    case Instruction::UDiv:<br>

-    case Instruction::SDiv:<br>

-    case Instruction::URem:<br>

-    case Instruction::SRem:<br>

-      return false;<br>

-    }<br>

   }<br>

<br>

   return true;<br>

@@ -6082,17 +6253,24 @@ unsigned LoopVectorizationCostModel::<wbr>get<br>

     // TODO: IF-converted IFs become selects.<br>

     return 0;<br>

   }<br>

+  case Instruction::UDiv:<br>

+  case Instruction::SDiv:<br>

+  case Instruction::URem:<br>

+  case Instruction::SRem:<br>

+    // We assume that if-converted blocks have a 50% chance of being executed.<br>

+    // Predicated scalarized instructions are avoided due to the CF that<br>

+    // bypasses turned off lanes. If we are not predicating, fallthrough.<br>

+    if (VF > 1 && mayDivideByZero(*I) &&<br>

+        Legal->blockNeedsPredication(<wbr>I->getParent()))<br>

+      return VF * TTI.getArithmeticInstrCost(I-><wbr>getOpcode(), RetTy) / 2 +<br>

+             getScalarizationOverhead(I, VF, true, TTI);<br>

   case Instruction::Add:<br>

   case Instruction::FAdd:<br>

   case Instruction::Sub:<br>

   case Instruction::FSub:<br>

   case Instruction::Mul:<br>

   case Instruction::FMul:<br>

-  case Instruction::UDiv:<br>

-  case Instruction::SDiv:<br>

   case Instruction::FDiv:<br>

-  case Instruction::URem:<br>

-  case Instruction::SRem:<br>

   case Instruction::FRem:<br>

   case Instruction::Shl:<br>

   case Instruction::LShr:<br>

@@ -6328,28 +6506,11 @@ unsigned LoopVectorizationCostModel::<wbr>get<br>

       return std::min(CallCost, getVectorIntrinsicCost(CI, VF, TTI, TLI));<br>

     return CallCost;<br>

   }<br>

-  default: {<br>

-    // We are scalarizing the instruction. Return the cost of the scalar<br>

-    // instruction, plus the cost of insert and extract into vector<br>

-    // elements, times the vector width.<br>

-    unsigned Cost = 0;<br>

-<br>

-    if (!RetTy->isVoidTy() && VF != 1) {<br>

-      unsigned InsCost =<br>

-          TTI.getVectorInstrCost(<wbr>Instruction::InsertElement, VectorTy);<br>

-      unsigned ExtCost =<br>

-          TTI.getVectorInstrCost(<wbr>Instruction::ExtractElement, VectorTy);<br>

-<br>

-      // The cost of inserting the results plus extracting each one of the<br>

-      // operands.<br>

-      Cost += VF * (InsCost + ExtCost * I->getNumOperands());<br>

-    }<br>

-<br>

+  default:<br>

     // The cost of executing VF copies of the scalar instruction. This opcode<br>

     // is unknown. Assume that it is the same as 'mul'.<br>

-    Cost += VF * TTI.getArithmeticInstrCost(<wbr>Instruction::Mul, VectorTy);<br>

-    return Cost;<br>

-  }<br>

+    return VF * TTI.getArithmeticInstrCost(<wbr>Instruction::Mul, VectorTy) +<br>

+           getScalarizationOverhead(I, VF, false, TTI);<br>

   } // end of switch.<br>

 }<br>

<br>

@@ -6407,7 +6568,7 @@ void LoopVectorizationCostModel::<wbr>collect<br>

 }<br>

<br>

 void InnerLoopUnroller::<wbr>scalarizeInstruction(<wbr>Instruction *Instr,<br>

-                                             bool IfPredicateStore) {<br>

+                                             bool IfPredicateInstr) {<br>

   assert(!Instr->getType()-><wbr>isAggregateType() && "Can't handle vectors");<br>

   // Holds vector parameters or scalars, in case of uniform vals.<br>

   SmallVector<VectorParts, 4> Params;<br>

@@ -6450,7 +6611,7 @@ void InnerLoopUnroller::<wbr>scalarizeInstruc<br>

   VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);<br>

<br>

   VectorParts Cond;<br>

-  if (IfPredicateStore) {<br>

+  if (IfPredicateInstr) {<br>

     assert(Instr->getParent()-><wbr>getSinglePredecessor() &&<br>

            "Only support single predecessor blocks");<br>

     Cond = createEdgeMask(Instr-><wbr>getParent()-><wbr>getSinglePredecessor(),<br>

@@ -6463,7 +6624,7 @@ void InnerLoopUnroller::<wbr>scalarizeInstruc<br>

<br>

     // Start an "if (pred) a[i] = ..." block.<br>

     Value *Cmp = nullptr;<br>

-    if (IfPredicateStore) {<br>

+    if (IfPredicateInstr) {<br>

       if (Cond[Part]->getType()-><wbr>isVectorTy())<br>

         Cond[Part] =<br>

             Builder.CreateExtractElement(<wbr>Cond[Part], Builder.getInt32(0));<br>

@@ -6494,16 +6655,16 @@ void InnerLoopUnroller::<wbr>scalarizeInstruc<br>

       VecResults[Part] = Cloned;<br>

<br>

     // End if-block.<br>

-    if (IfPredicateStore)<br>

-      PredicatedStores.push_back(<wbr>std::make_pair(cast<StoreInst><wbr>(Cloned), Cmp));<br>

+    if (IfPredicateInstr)<br>

+      PredicatedInstructions.push_<wbr>back(std::make_pair(Cloned, Cmp));<br>

   }<br>

 }<br>

<br>

 void InnerLoopUnroller::<wbr>vectorizeMemoryInstruction(<wbr>Instruction *Instr) {<br>

   auto *SI = dyn_cast<StoreInst>(Instr);<br>

-  bool IfPredicateStore = (SI && Legal->blockNeedsPredication(<wbr>SI->getParent()));<br>

+  bool IfPredicateInstr = (SI && Legal->blockNeedsPredication(<wbr>SI->getParent()));<br>

<br>

-  return scalarizeInstruction(Instr, IfPredicateStore);<br>

+  return scalarizeInstruction(Instr, IfPredicateInstr);<br>

 }<br>

<br>

 Value *InnerLoopUnroller::<wbr>reverseVector(Value *Vec) { return Vec; }<br>

<br>

Added: llvm/trunk/test/Transforms/<wbr>LoopVectorize/if-pred-non-<wbr>void.ll<br>

URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/if-pred-non-void.ll?rev=279620&view=auto" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/test/<wbr>Transforms/LoopVectorize/if-<wbr>pred-non-void.ll?rev=279620&<wbr>view=auto</a><br>

==============================<wbr>==============================<wbr>==================<br>

--- llvm/trunk/test/Transforms/<wbr>LoopVectorize/if-pred-non-<wbr>void.ll (added)<br>

+++ llvm/trunk/test/Transforms/<wbr>LoopVectorize/if-pred-non-<wbr>void.ll Wed Aug 24 06:37:57 2016<br>

@@ -0,0 +1,173 @@<br>

+; RUN: opt -S -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -verify-loop-info -simplifycfg < %s | FileCheck %s<br>

+<br>

+target datalayout = "e-m:e-i64:64-f80:128-n8:16:<wbr>32:64-S128"<br>

+target triple = "x86_64-unknown-linux-gnu"<br>

+<br>

+; Test predication of non-void instructions, specifically (i) that these<br>

+; instructions permit vectorization and (ii) the creation of an insertelement<br>

+; and a Phi node. We check the full 2-element sequence for the first<br>

+; instruction; For the rest we'll just make sure they get predicated based<br>

+; on the code generated for the first element.<br>

+define void @test(i32* nocapture %asd, i32* nocapture %aud,<br>

+                  i32* nocapture %asr, i32* nocapture %aur) {<br>

+entry:<br>

+  br label %for.body<br>

+<br>

+for.cond.cleanup:                                 ; preds = %if.end<br>

+  ret void<br>

+<br>

+; CHECK-LABEL: test<br>

+; CHECK: vector.body:<br>

+; CHECK:   %[[SDEE:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 0<br>

+; CHECK:   %[[SDCC:[a-zA-Z0-9]+]] = icmp eq i1 %[[SDEE]], true<br>

+; CHECK:   br i1 %[[SDCC]], label %[[CSD:[a-zA-Z0-9.]+]], label %[[ESD:[a-zA-Z0-9.]+]]<br>

+; CHECK: [[CSD]]:<br>

+; CHECK:   %[[SDA0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0<br>

+; CHECK:   %[[SDA1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0<br>

+; CHECK:   %[[SD0:[a-zA-Z0-9]+]] = sdiv i32 %[[SDA0]], %[[SDA1]]<br>

+; CHECK:   %[[SD1:[a-zA-Z0-9]+]] = insertelement <2 x i32> undef, i32 %[[SD0]], i32 0<br>

+; CHECK:   br label %[[ESD]]<br>

+; CHECK: [[ESD]]:<br>

+; CHECK:   %[[SDR:[a-zA-Z0-9]+]] = phi <2 x i32> [ undef, %vector.body ], [ %[[SD1]], %[[CSD]] ]<br>

+; CHECK:   %[[SDEEH:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 1<br>

+; CHECK:   %[[SDCCH:[a-zA-Z0-9]+]] = icmp eq i1 %[[SDEEH]], true<br>

+; CHECK:   br i1 %[[SDCCH]], label %[[CSDH:[a-zA-Z0-9.]+]], label %[[ESDH:[a-zA-Z0-9.]+]]<br>

+; CHECK: [[CSDH]]:<br>

+; CHECK:   %[[SDA0H:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1<br>

+; CHECK:   %[[SDA1H:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 1<br>

+; CHECK:   %[[SD0H:[a-zA-Z0-9]+]] = sdiv i32 %[[SDA0H]], %[[SDA1H]]<br>

+; CHECK:   %[[SD1H:[a-zA-Z0-9]+]] = insertelement <2 x i32> %[[SDR]], i32 %[[SD0H]], i32 1<br>

+; CHECK:   br label %[[ESDH]]<br>

+; CHECK: [[ESDH]]:<br>

+; CHECK:   %{{.*}} = phi <2 x i32> [ %[[SDR]], %[[ESD]] ], [ %[[SD1H]], %[[CSDH]] ]<br>

+<br>

+; CHECK:   %[[UDEE:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 0<br>

+; CHECK:   %[[UDCC:[a-zA-Z0-9]+]] = icmp eq i1 %[[UDEE]], true<br>

+; CHECK:   br i1 %[[UDCC]], label %[[CUD:[a-zA-Z0-9.]+]], label %[[EUD:[a-zA-Z0-9.]+]]<br>

+; CHECK: [[CUD]]:<br>

+; CHECK:   %[[UDA0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0<br>

+; CHECK:   %[[UDA1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0<br>

+; CHECK:   %[[UD0:[a-zA-Z0-9]+]] = udiv i32 %[[UDA0]], %[[UDA1]]<br>

+; CHECK:   %[[UD1:[a-zA-Z0-9]+]] = insertelement <2 x i32> undef, i32 %[[UD0]], i32 0<br>

+; CHECK:   br label %[[EUD]]<br>

+; CHECK: [[EUD]]:<br>

+; CHECK:   %{{.*}} = phi <2 x i32> [ undef, %{{.*}} ], [ %[[UD1]], %[[CUD]] ]<br>

+<br>

+; CHECK:   %[[SREE:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 0<br>

+; CHECK:   %[[SRCC:[a-zA-Z0-9]+]] = icmp eq i1 %[[SREE]], true<br>

+; CHECK:   br i1 %[[SRCC]], label %[[CSR:[a-zA-Z0-9.]+]], label %[[ESR:[a-zA-Z0-9.]+]]<br>

+; CHECK: [[CSR]]:<br>

+; CHECK:   %[[SRA0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0<br>

+; CHECK:   %[[SRA1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0<br>

+; CHECK:   %[[SR0:[a-zA-Z0-9]+]] = srem i32 %[[SRA0]], %[[SRA1]]<br>

+; CHECK:   %[[SR1:[a-zA-Z0-9]+]] = insertelement <2 x i32> undef, i32 %[[SR0]], i32 0<br>

+; CHECK:   br label %[[ESR]]<br>

+; CHECK: [[ESR]]:<br>

+; CHECK:   %{{.*}} = phi <2 x i32> [ undef, %{{.*}} ], [ %[[SR1]], %[[CSR]] ]<br>

+<br>

+; CHECK:   %[[UREE:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 0<br>

+; CHECK:   %[[URCC:[a-zA-Z0-9]+]] = icmp eq i1 %[[UREE]], true<br>

+; CHECK:   br i1 %[[URCC]], label %[[CUR:[a-zA-Z0-9.]+]], label %[[EUR:[a-zA-Z0-9.]+]]<br>

+; CHECK: [[CUR]]:<br>

+; CHECK:   %[[URA0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0<br>

+; CHECK:   %[[URA1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32 0<br>

+; CHECK:   %[[UR0:[a-zA-Z0-9]+]] = urem i32 %[[URA0]], %[[URA1]]<br>

+; CHECK:   %[[UR1:[a-zA-Z0-9]+]] = insertelement <2 x i32> undef, i32 %[[UR0]], i32 0<br>

+; CHECK:   br label %[[EUR]]<br>

+; CHECK: [[EUR]]:<br>

+; CHECK:   %{{.*}} = phi <2 x i32> [ undef, %{{.*}} ], [ %[[UR1]], %[[CUR]] ]<br>

+<br>

+for.body:                                         ; preds = %if.end, %entry<br>

+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end ]<br>

+  %isd = getelementptr inbounds i32, i32* %asd, i64 %indvars.iv<br>

+  %iud = getelementptr inbounds i32, i32* %aud, i64 %indvars.iv<br>

+  %isr = getelementptr inbounds i32, i32* %asr, i64 %indvars.iv<br>

+  %iur = getelementptr inbounds i32, i32* %aur, i64 %indvars.iv<br>

+  %lsd = load i32, i32* %isd, align 4<br>

+  %lud = load i32, i32* %iud, align 4<br>

+  %lsr = load i32, i32* %isr, align 4<br>

+  %lur = load i32, i32* %iur, align 4<br>

+  %psd = add nsw i32 %lsd, 23<br>

+  %pud = add nsw i32 %lud, 24<br>

+  %psr = add nsw i32 %lsr, 25<br>

+  %pur = add nsw i32 %lur, 26<br>

+  %cmp1 = icmp slt i32 %lsd, 100<br>

+  br i1 %cmp1, label %if.then, label %if.end<br>

+<br>

+if.then:                                          ; preds = %for.body<br>

+  %rsd = sdiv i32 %psd, %lsd<br>

+  %rud = udiv i32 %pud, %lud<br>

+  %rsr = srem i32 %psr, %lsr<br>

+  %rur = urem i32 %pur, %lur<br>

+  br label %if.end<br>

+<br>

+if.end:                                           ; preds = %if.then, %for.body<br>

+  %ysd.0 = phi i32 [ %rsd, %if.then ], [ %psd, %for.body ]<br>

+  %yud.0 = phi i32 [ %rud, %if.then ], [ %pud, %for.body ]<br>

+  %ysr.0 = phi i32 [ %rsr, %if.then ], [ %psr, %for.body ]<br>

+  %yur.0 = phi i32 [ %rur, %if.then ], [ %pur, %for.body ]<br>

+  store i32 %ysd.0, i32* %isd, align 4<br>

+  store i32 %yud.0, i32* %iud, align 4<br>

+  store i32 %ysr.0, i32* %isr, align 4<br>

+  store i32 %yur.0, i32* %iur, align 4<br>

+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1<br>

+  %exitcond = icmp eq i64 %indvars.iv.next, 128<br>

+  br i1 %exitcond, label %for.cond.cleanup, label %for.body<br>

+}<br>

+<br>

+; Future-use test for predication under smarter scalar-scalar: this test will<br>

+; fail when the vectorizer starts feeding scalarized values directly to their<br>

+; scalar users, i.e. w/o generating redundant insertelement/extractelement<br>

+; instructions. This case is already supported by the predication code (which<br>

+; should generate a phi for the scalar predicated value rather than for the<br>

+; insertelement), but cannot be tested yet.<br>

+; If you got this test to fail, kindly fix the test by using the alternative<br>

+; FFU sequence. This will make the test check how we handle this case from<br>

+; now on.<br>

+define void @test_scalar2scalar(i32* nocapture %asd, i32* nocapture %bsd) {<br>

+entry:<br>

+  br label %for.body<br>

+<br>

+for.cond.cleanup:                                 ; preds = %if.end<br>

+  ret void<br>

+<br>

+; CHECK-LABEL: test_scalar2scalar<br>

+; CHECK: vector.body:<br>

+; CHECK:   br i1 %{{.*}}, label %[[THEN:[a-zA-Z0-9.]+]], label %[[FI:[a-zA-Z0-9.]+]]<br>

+; CHECK: [[THEN]]:<br>

+; CHECK:   %[[PD:[a-zA-Z0-9]+]] = sdiv i32 %{{.*}}, %{{.*}}<br>

+; CHECK:   %[[PDV:[a-zA-Z0-9]+]] = insertelement <2 x i32> undef, i32 %[[PD]], i32 0<br>

+; CHECK:   br label %[[FI]]<br>

+; CHECK: [[FI]]:<br>

+; CHECK:   %[[PH:[a-zA-Z0-9]+]] = phi <2 x i32> [ undef, %vector.body ], [ %[[PDV]], %[[THEN]] ]<br>

+; FFU-LABEL: test_scalar2scalar<br>

+; FFU:   vector.body:<br>

+; FFU:     br i1 %{{.*}}, label %[[THEN:[a-zA-Z0-9.]+]], label %[[FI:[a-zA-Z0-9.]+]]<br>

+; FFU:   [[THEN]]:<br>

+; FFU:     %[[PD:[a-zA-Z0-9]+]] = sdiv i32 %{{.*}}, %{{.*}}<br>

+; FFU:     br label %[[FI]]<br>

+; FFU:   [[FI]]:<br>

+; FFU:     %{{.*}} = phi i32 [ undef, %vector.body ], [ %[[PD]], %[[THEN]] ]<br>

+<br>

+for.body:                                         ; preds = %if.end, %entry<br>

+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end ]<br>

+  %isd = getelementptr inbounds i32, i32* %asd, i64 %indvars.iv<br>

+  %lsd = load i32, i32* %isd, align 4<br>

+  %isd.b = getelementptr inbounds i32, i32* %bsd, i64 %indvars.iv<br>

+  %lsd.b = load i32, i32* %isd.b, align 4<br>

+  %psd = add nsw i32 %lsd, 23<br>

+  %cmp1 = icmp slt i32 %lsd, 100<br>

+  br i1 %cmp1, label %if.then, label %if.end<br>

+<br>

+if.then:                                          ; preds = %for.body<br>

+  %sd1 = sdiv i32 %psd, %lsd<br>

+  %rsd = sdiv i32 %lsd.b, %sd1<br>

+  br label %if.end<br>

+<br>

+if.end:                                           ; preds = %if.then, %for.body<br>

+  %ysd.0 = phi i32 [ %rsd, %if.then ], [ %psd, %for.body ]<br>

+  store i32 %ysd.0, i32* %isd, align 4<br>

+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1<br>

+  %exitcond = icmp eq i64 %indvars.iv.next, 128<br>

+  br i1 %exitcond, label %for.cond.cleanup, label %for.body<br>

+}<br>

<br>

Added: llvm/trunk/test/Transforms/<wbr>LoopVectorize/if-pred-not-<wbr>when-safe.ll<br>

URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/if-pred-not-when-safe.ll?rev=279620&view=auto" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/test/<wbr>Transforms/LoopVectorize/if-<wbr>pred-not-when-safe.ll?rev=<wbr>279620&view=auto</a><br>

==============================<wbr>==============================<wbr>==================<br>

--- llvm/trunk/test/Transforms/<wbr>LoopVectorize/if-pred-not-<wbr>when-safe.ll (added)<br>

+++ llvm/trunk/test/Transforms/<wbr>LoopVectorize/if-pred-not-<wbr>when-safe.ll Wed Aug 24 06:37:57 2016<br>

@@ -0,0 +1,90 @@<br>

+; RUN: opt -S -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -verify-loop-info -simplifycfg < %s | FileCheck %s<br>

+<br>

+target datalayout = "e-m:e-i64:64-f80:128-n8:16:<wbr>32:64-S128"<br>

+target triple = "x86_64-unknown-linux-gnu"<br>

+<br>

+; Test no-predication of instructions that are provably safe, e.g. dividing by<br>

+; a non-zero constant.<br>

+define void @test(i32* nocapture %asd, i32* nocapture %aud,<br>

+                  i32* nocapture %asr, i32* nocapture %aur,<br>

+                  i32* nocapture %asd0, i32* nocapture %aud0,<br>

+                  i32* nocapture %asr0, i32* nocapture %aur0<br>

+) {<br>

+entry:<br>

+  br label %for.body<br>

+<br>

+for.cond.cleanup:                                 ; preds = %if.end<br>

+  ret void<br>

+<br>

+; CHECK-LABEL: test<br>

+; CHECK: vector.body:<br>

+; CHECK: %{{.*}} = sdiv <2 x i32> %{{.*}}, <i32 11, i32 11><br>

+; CHECK: %{{.*}} = udiv <2 x i32> %{{.*}}, <i32 13, i32 13><br>

+; CHECK: %{{.*}} = srem <2 x i32> %{{.*}}, <i32 17, i32 17><br>

+; CHECK: %{{.*}} = urem <2 x i32> %{{.*}}, <i32 19, i32 19><br>

+; CHECK-NOT: %{{.*}} = sdiv <2 x i32> %{{.*}}, <i32 0, i32 0><br>

+; CHECK-NOT: %{{.*}} = udiv <2 x i32> %{{.*}}, <i32 0, i32 0><br>

+; CHECK-NOT: %{{.*}} = srem <2 x i32> %{{.*}}, <i32 0, i32 0><br>

+; CHECK-NOT: %{{.*}} = urem <2 x i32> %{{.*}}, <i32 0, i32 0><br>

+<br>

+for.body:                                         ; preds = %if.end, %entry<br>

+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end ]<br>

+  %isd = getelementptr inbounds i32, i32* %asd, i64 %indvars.iv<br>

+  %iud = getelementptr inbounds i32, i32* %aud, i64 %indvars.iv<br>

+  %isr = getelementptr inbounds i32, i32* %asr, i64 %indvars.iv<br>

+  %iur = getelementptr inbounds i32, i32* %aur, i64 %indvars.iv<br>

+  %lsd = load i32, i32* %isd, align 4<br>

+  %lud = load i32, i32* %iud, align 4<br>

+  %lsr = load i32, i32* %isr, align 4<br>

+  %lur = load i32, i32* %iur, align 4<br>

+  %psd = add nsw i32 %lsd, 23<br>

+  %pud = add nsw i32 %lud, 24<br>

+  %psr = add nsw i32 %lsr, 25<br>

+  %pur = add nsw i32 %lur, 26<br>

+  %isd0 = getelementptr inbounds i32, i32* %asd0, i64 %indvars.iv<br>

+  %iud0 = getelementptr inbounds i32, i32* %aud0, i64 %indvars.iv<br>

+  %isr0 = getelementptr inbounds i32, i32* %asr0, i64 %indvars.iv<br>

+  %iur0 = getelementptr inbounds i32, i32* %aur0, i64 %indvars.iv<br>

+  %lsd0 = load i32, i32* %isd0, align 4<br>

+  %lud0 = load i32, i32* %iud0, align 4<br>

+  %lsr0 = load i32, i32* %isr0, align 4<br>

+  %lur0 = load i32, i32* %iur0, align 4<br>

+  %psd0 = add nsw i32 %lsd, 27<br>

+  %pud0 = add nsw i32 %lud, 28<br>

+  %psr0 = add nsw i32 %lsr, 29<br>

+  %pur0 = add nsw i32 %lur, 30<br>

+  %cmp1 = icmp slt i32 %lsd, 100<br>

+  br i1 %cmp1, label %if.then, label %if.end<br>

+<br>

+if.then:                                          ; preds = %for.body<br>

+  %rsd = sdiv i32 %psd, 11<br>

+  %rud = udiv i32 %pud, 13<br>

+  %rsr = srem i32 %psr, 17<br>

+  %rur = urem i32 %pur, 19<br>

+  %rsd0 = sdiv i32 %psd0, 0<br>

+  %rud0 = udiv i32 %pud0, 0<br>

+  %rsr0 = srem i32 %psr0, 0<br>

+  %rur0 = urem i32 %pur0, 0<br>

+  br label %if.end<br>

+<br>

+if.end:                                           ; preds = %if.then, %for.body<br>

+  %ysd.0 = phi i32 [ %rsd, %if.then ], [ %psd, %for.body ]<br>

+  %yud.0 = phi i32 [ %rud, %if.then ], [ %pud, %for.body ]<br>

+  %ysr.0 = phi i32 [ %rsr, %if.then ], [ %psr, %for.body ]<br>

+  %yur.0 = phi i32 [ %rur, %if.then ], [ %pur, %for.body ]<br>

+  %ysd0.0 = phi i32 [ %rsd0, %if.then ], [ %psd0, %for.body ]<br>

+  %yud0.0 = phi i32 [ %rud0, %if.then ], [ %pud0, %for.body ]<br>

+  %ysr0.0 = phi i32 [ %rsr0, %if.then ], [ %psr0, %for.body ]<br>

+  %yur0.0 = phi i32 [ %rur0, %if.then ], [ %pur0, %for.body ]<br>

+  store i32 %ysd.0, i32* %isd, align 4<br>

+  store i32 %yud.0, i32* %iud, align 4<br>

+  store i32 %ysr.0, i32* %isr, align 4<br>

+  store i32 %yur.0, i32* %iur, align 4<br>

+  store i32 %ysd0.0, i32* %isd0, align 4<br>

+  store i32 %yud0.0, i32* %iud0, align 4<br>

+  store i32 %ysr0.0, i32* %isr0, align 4<br>

+  store i32 %yur0.0, i32* %iur0, align 4<br>

+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1<br>

+  %exitcond = icmp eq i64 %indvars.iv.next, 128<br>

+  br i1 %exitcond, label %for.cond.cleanup, label %for.body<br>

+}<br>

<br>

Modified: llvm/trunk/test/Transforms/<wbr>LoopVectorize/if-pred-stores.<wbr>ll<br>

URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/if-pred-stores.ll?rev=279620&r1=279619&r2=279620&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/test/<wbr>Transforms/LoopVectorize/if-<wbr>pred-stores.ll?rev=279620&r1=<wbr>279619&r2=279620&view=diff</a><br>

==============================<wbr>==============================<wbr>==================<br>

--- llvm/trunk/test/Transforms/<wbr>LoopVectorize/if-pred-stores.<wbr>ll (original)<br>

+++ llvm/trunk/test/Transforms/<wbr>LoopVectorize/if-pred-stores.<wbr>ll Wed Aug 24 06:37:57 2016<br>

@@ -1,7 +1,6 @@<br>

 ; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize -verify-loop-info -simplifycfg < %s | FileCheck %s --check-prefix=UNROLL<br>

 ; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize -verify-loop-info < %s | FileCheck %s --check-prefix=UNROLL-<wbr>NOSIMPLIFY<br>

 ; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -enable-cond-stores-vec -verify-loop-info -simplifycfg < %s | FileCheck %s --check-prefix=VEC<br>

-; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -enable-cond-stores-vec -verify-loop-info -simplifycfg -instcombine < %s | FileCheck %s --check-prefix=VEC-IC<br>

<br>

 target datalayout = "e-m:o-i64:64-f80:128-n8:16:<wbr>32:64-S128"<br>

 target triple = "x86_64-apple-macosx10.9.0"<br>

@@ -17,49 +16,27 @@ entry:<br>

 ; VEC:   %[[v10:.+]] = and <2 x i1> %[[v8]], <i1 true, i1 true><br>

 ; VEC:   %[[v11:.+]] = extractelement <2 x i1> %[[v10]], i32 0<br>

 ; VEC:   %[[v12:.+]] = icmp eq i1 %[[v11]], true<br>

-; VEC:   %[[v13:.+]] = extractelement <2 x i32> %[[v9]], i32 0<br>

-; VEC:   %[[v14:.+]] = extractelement <2 x i32*> %{{.*}}, i32 0<br>

 ; VEC:   br i1 %[[v12]], label %[[cond:.+]], label %[[else:.+]]<br>

 ;<br>

 ; VEC: [[cond]]:<br>

+; VEC:   %[[v13:.+]] = extractelement <2 x i32> %[[v9]], i32 0<br>

+; VEC:   %[[v14:.+]] = extractelement <2 x i32*> %{{.*}}, i32 0<br>

 ; VEC:   store i32 %[[v13]], i32* %[[v14]], align 4<br>

 ; VEC:   br label %[[else:.+]]<br>

 ;<br>

 ; VEC: [[else]]:<br>

 ; VEC:   %[[v15:.+]] = extractelement <2 x i1> %[[v10]], i32 1<br>

 ; VEC:   %[[v16:.+]] = icmp eq i1 %[[v15]], true<br>

-; VEC:   %[[v17:.+]] = extractelement <2 x i32> %[[v9]], i32 1<br>

-; VEC:   %[[v18:.+]] = extractelement <2 x i32*> %{{.+}} i32 1<br>

 ; VEC:   br i1 %[[v16]], label %[[cond2:.+]], label %[[else2:.+]]<br>

 ;<br>

 ; VEC: [[cond2]]:<br>

+; VEC:   %[[v17:.+]] = extractelement <2 x i32> %[[v9]], i32 1<br>

+; VEC:   %[[v18:.+]] = extractelement <2 x i32*> %{{.+}} i32 1<br>

 ; VEC:   store i32 %[[v17]], i32* %[[v18]], align 4<br>

 ; VEC:   br label %[[else2:.+]]<br>

 ;<br>

 ; VEC: [[else2]]:<br>

<br>

-; VEC-IC-LABEL: test<br>

-; VEC-IC:   %[[v1:.+]] = icmp sgt <2 x i32> %{{.*}}, <i32 100, i32 100><br>

-; VEC-IC:   %[[v2:.+]] = add nsw <2 x i32> %{{.*}}, <i32 20, i32 20><br>

-; VEC-IC:   %[[v3:.+]] = extractelement <2 x i1> %[[v1]], i32 0<br>

-; VEC-IC:   br i1 %[[v3]], label %[[cond:.+]], label %[[else:.+]]<br>

-;<br>

-; VEC-IC: [[cond]]:<br>

-; VEC-IC:   %[[v4:.+]] = extractelement <2 x i32> %[[v2]], i32 0<br>

-; VEC-IC:   store i32 %[[v4]], i32* %{{.*}}, align 4<br>

-; VEC-IC:   br label %[[else:.+]]<br>

-;<br>

-; VEC-IC: [[else]]:<br>

-; VEC-IC:   %[[v5:.+]] = extractelement <2 x i1> %[[v1]], i32 1<br>

-; VEC-IC:   br i1 %[[v5]], label %[[cond2:.+]], label %[[else2:.+]]<br>

-;<br>

-; VEC-IC: [[cond2]]:<br>

-; VEC-IC:   %[[v6:.+]] = extractelement <2 x i32> %[[v2]], i32 1<br>

-; VEC-IC:   store i32 %[[v6]], i32* %{{.*}}, align 4<br>

-; VEC-IC:   br label %[[else2:.+]]<br>

-;<br>

-; VEC-IC: [[else2]]:<br>

-<br>

 ; UNROLL-LABEL: test<br>

 ; UNROLL: vector.body:<br>

 ; UNROLL:   %[[IND:[a-zA-Z0-9]+]] = add i64 %{{.*}}, 0<br>

<br>

<br>

______________________________<wbr>_________________<br>

llvm-commits mailing list<br>

<a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a><br>

<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/<wbr>mailman/listinfo/llvm-commits</a><br>

</blockquote></div><br></div>