[llvm] r279620 - [Loop Vectorizer] Support predication of div/rem
Michael Kuperstein via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 29 12:57:01 PDT 2016
Hi Gil,
Just to make sure you've seen the bug report (bugzilla isn't sending email
right now, because of the spambots) - this caused/exposed PR30172.
Thanks,
Michael
On Wed, Aug 24, 2016 at 4:37 AM, Gil Rapaport via llvm-commits <
llvm-commits at lists.llvm.org> wrote:
> Author: gilr
> Date: Wed Aug 24 06:37:57 2016
> New Revision: 279620
>
> URL: http://llvm.org/viewvc/llvm-project?rev=279620&view=rev
> Log:
> [Loop Vectorizer] Support predication of div/rem
>
> div/rem instructions in basic blocks that require predication currently
> prevent
> vectorization. This patch extends the existing mechanism for predicating
> stores
> to handle other instructions and leverages it to predicate divs and rems.
>
> Differential Revision: https://reviews.llvm.org/D22918
>
> Added:
> llvm/trunk/test/Transforms/LoopVectorize/if-pred-non-void.ll
> llvm/trunk/test/Transforms/LoopVectorize/if-pred-not-when-safe.ll
> Modified:
> llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
> llvm/trunk/test/Transforms/LoopVectorize/if-pred-stores.ll
>
> Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/
> Transforms/Vectorize/LoopVectorize.cpp?rev=279620&
> r1=279619&r2=279620&view=diff
> ============================================================
> ==================
> --- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
> +++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Wed Aug 24
> 06:37:57 2016
> @@ -386,8 +386,9 @@ protected:
> /// See PR14725.
> void fixLCSSAPHIs();
>
> - /// Predicate conditional stores on their respective conditions.
> - void predicateStores();
> + /// Predicate conditional instructions that require predication on their
> + /// respective conditions.
> + void predicateInstructions();
>
> /// Shrinks vector element sizes based on information in "MinBWs".
> void truncateToMinimalBitwidths();
> @@ -414,11 +415,11 @@ protected:
> void updateAnalysis();
>
> /// This instruction is un-vectorizable. Implement it as a sequence
> - /// of scalars. If \p IfPredicateStore is true we need to 'hide' each
> + /// of scalars. If \p IfPredicateInstr is true we need to 'hide' each
> /// scalarized instruction behind an if block predicated on the control
> /// dependence of the instruction.
> virtual void scalarizeInstruction(Instruction *Instr,
> - bool IfPredicateStore = false);
> + bool IfPredicateInstr = false);
>
> /// Vectorize Load and Store instructions,
> virtual void vectorizeMemoryInstruction(Instruction *Instr);
> @@ -624,7 +625,7 @@ protected:
>
> /// Store instructions that should be predicated, as a pair
> /// <StoreInst, Predicate>
> - SmallVector<std::pair<StoreInst *, Value *>, 4> PredicatedStores;
> + SmallVector<std::pair<Instruction *, Value *>, 4>
> PredicatedInstructions;
> EdgeMaskCache MaskCache;
> /// Trip count of the original loop.
> Value *TripCount;
> @@ -654,7 +655,7 @@ public:
>
> private:
> void scalarizeInstruction(Instruction *Instr,
> - bool IfPredicateStore = false) override;
> + bool IfPredicateInstr = false) override;
> void vectorizeMemoryInstruction(Instruction *Instr) override;
> Value *getBroadcastInstrs(Value *V) override;
> Value *getStepVector(Value *Val, int StartIdx, Value *Step,
> @@ -2767,8 +2768,11 @@ void InnerLoopVectorizer::vectorizeMemor
> }
>
> void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
> - bool IfPredicateStore) {
> + bool IfPredicateInstr) {
> assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
> + DEBUG(dbgs() << "LV: Scalarizing"
> + << (IfPredicateInstr ? " and predicating:" : ":") << *Instr
> + << '\n');
> // Holds vector parameters or scalars, in case of uniform vals.
> SmallVector<VectorParts, 4> Params;
>
> @@ -2812,7 +2816,7 @@ void InnerLoopVectorizer::scalarizeInstr
> VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
>
> VectorParts Cond;
> - if (IfPredicateStore) {
> + if (IfPredicateInstr) {
> assert(Instr->getParent()->getSinglePredecessor() &&
> "Only support single predecessor blocks");
> Cond = createEdgeMask(Instr->getParent()->getSinglePredecessor(),
> @@ -2826,7 +2830,7 @@ void InnerLoopVectorizer::scalarizeInstr
>
> // Start if-block.
> Value *Cmp = nullptr;
> - if (IfPredicateStore) {
> + if (IfPredicateInstr) {
> Cmp = Builder.CreateExtractElement(Cond[Part],
> Builder.getInt32(Width));
> Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Cmp,
> ConstantInt::get(Cmp->getType(), 1));
> @@ -2865,9 +2869,8 @@ void InnerLoopVectorizer::scalarizeInstr
> VecResults[Part] = Builder.CreateInsertElement(VecResults[Part],
> Cloned,
>
> Builder.getInt32(Width));
> // End if-block.
> - if (IfPredicateStore)
> - PredicatedStores.push_back(
> - std::make_pair(cast<StoreInst>(Cloned), Cmp));
> + if (IfPredicateInstr)
> + PredicatedInstructions.push_back(std::make_pair(Cloned, Cmp));
> }
> }
> }
> @@ -3398,9 +3401,13 @@ static Value *addFastMathFlag(Value *V)
> return V;
> }
>
> -/// Estimate the overhead of scalarizing a value. Insert and Extract are
> set if
> -/// the result needs to be inserted and/or extracted from vectors.
> +/// \brief Estimate the overhead of scalarizing a value based on its type.
> +/// Insert and Extract are set if the result needs to be inserted and/or
> +/// extracted from vectors.
> +/// If the instruction is also to be predicated, add the cost of a PHI
> +/// node to the insertion cost.
> static unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool
> Extract,
> + bool Predicated,
> const TargetTransformInfo &TTI) {
> if (Ty->isVoidTy())
> return 0;
> @@ -3409,15 +3416,58 @@ static unsigned getScalarizationOverhead
> unsigned Cost = 0;
>
> for (unsigned I = 0, E = Ty->getVectorNumElements(); I < E; ++I) {
> - if (Insert)
> - Cost += TTI.getVectorInstrCost(Instruction::InsertElement, Ty, I);
> if (Extract)
> Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, Ty, I);
> + if (Insert) {
> + Cost += TTI.getVectorInstrCost(Instruction::InsertElement, Ty, I);
> + if (Predicated)
> + Cost += TTI.getCFInstrCost(Instruction::PHI);
> + }
> }
>
> + // We assume that if-converted blocks have a 50% chance of being
> executed.
> + // Predicated scalarized instructions are avoided due to the CF that
> bypasses
> + // turned off lanes. The extracts and inserts will be sinked/hoisted to
> the
> + // predicated basic-block and are subjected to the same assumption.
> + if (Predicated)
> + Cost /= 2;
> +
> return Cost;
> }
>
> +/// \brief Estimate the overhead of scalarizing an Instruction based on
> the
> +/// types of its operands and return value.
> +static unsigned getScalarizationOverhead(SmallVectorImpl<Type *> &OpTys,
> + Type *RetTy, bool Predicated,
> + const TargetTransformInfo &TTI) {
> + unsigned ScalarizationCost =
> + getScalarizationOverhead(RetTy, true, false, Predicated, TTI);
> +
> + for (Type *Ty : OpTys)
> + ScalarizationCost +=
> + getScalarizationOverhead(Ty, false, true, Predicated, TTI);
> +
> + return ScalarizationCost;
> +}
> +
> +/// \brief Estimate the overhead of scalarizing an instruction. This is a
> +/// convenience wrapper for the type-based getScalarizationOverhead API.
> +static unsigned getScalarizationOverhead(Instruction *I, unsigned VF,
> + bool Predicated,
> + const TargetTransformInfo &TTI) {
> + if (VF == 1)
> + return 0;
> +
> + Type *RetTy = ToVectorTy(I->getType(), VF);
> +
> + SmallVector<Type *, 4> OpTys;
> + unsigned OperandsNum = I->getNumOperands();
> + for (unsigned OpInd = 0; OpInd < OperandsNum; ++OpInd)
> + OpTys.push_back(ToVectorTy(I->getOperand(OpInd)->getType(), VF));
> +
> + return getScalarizationOverhead(OpTys, RetTy, Predicated, TTI);
> +}
> +
> // Estimate cost of a call instruction CI if it were vectorized with
> factor VF.
> // Return the cost of the instruction, including scalarization overhead
> if it's
> // needed. The flag NeedToScalarize shows if the call needs to be
> scalarized -
> @@ -3448,10 +3498,7 @@ static unsigned getVectorCallCost(CallIn
>
> // Compute costs of unpacking argument values for the scalar calls and
> // packing the return values to a vector.
> - unsigned ScalarizationCost =
> - getScalarizationOverhead(RetTy, true, false, TTI);
> - for (Type *Ty : Tys)
> - ScalarizationCost += getScalarizationOverhead(Ty, false, true, TTI);
> + unsigned ScalarizationCost = getScalarizationOverhead(Tys, RetTy,
> false, TTI);
>
> unsigned Cost = ScalarCallCost * VF + ScalarizationCost;
>
> @@ -3871,7 +3918,7 @@ void InnerLoopVectorizer::vectorizeLoop(
> // Make sure DomTree is updated.
> updateAnalysis();
>
> - predicateStores();
> + predicateInstructions();
>
> // Remove redundant induction instructions.
> cse(LoopVectorBody);
> @@ -4038,17 +4085,128 @@ void InnerLoopVectorizer::fixLCSSAPHIs()
> LoopMiddleBlock);
> }
> }
> -
> -void InnerLoopVectorizer::predicateStores() {
> - for (auto KV : PredicatedStores) {
> +
> +void InnerLoopVectorizer::predicateInstructions() {
> +
> + // For each instruction I marked for predication on value C, split I
> into its
> + // own basic block to form an if-then construct over C.
> + // Since I may be fed by extractelement and/or be feeding an
> insertelement
> + // generated during scalarization we try to move such instructions into
> the
> + // predicated basic block as well. For the insertelement this also
> means that
> + // the PHI will be created for the resulting vector rather than for the
> + // scalar instruction.
> + // So for some predicated instruction, e.g. the conditional sdiv in:
> + //
> + // for.body:
> + // ...
> + // %add = add nsw i32 %mul, %0
> + // %cmp5 = icmp sgt i32 %2, 7
> + // br i1 %cmp5, label %if.then, label %if.end
> + //
> + // if.then:
> + // %div = sdiv i32 %0, %1
> + // br label %if.end
> + //
> + // if.end:
> + // %x.0 = phi i32 [ %div, %if.then ], [ %add, %for.body ]
> + //
> + // the sdiv at this point is scalarized and if-converted using a select.
> + // The inactive elements in the vector are not used, but the predicated
> + // instruction is still executed for all vector elements, essentially:
> + //
> + // vector.body:
> + // ...
> + // %17 = add nsw <2 x i32> %16, %wide.load
> + // %29 = extractelement <2 x i32> %wide.load, i32 0
> + // %30 = extractelement <2 x i32> %wide.load51, i32 0
> + // %31 = sdiv i32 %29, %30
> + // %32 = insertelement <2 x i32> undef, i32 %31, i32 0
> + // %35 = extractelement <2 x i32> %wide.load, i32 1
> + // %36 = extractelement <2 x i32> %wide.load51, i32 1
> + // %37 = sdiv i32 %35, %36
> + // %38 = insertelement <2 x i32> %32, i32 %37, i32 1
> + // %predphi = select <2 x i1> %26, <2 x i32> %38, <2 x i32> %17
> + //
> + // Predication will now re-introduce the original control flow to avoid
> false
> + // side-effects by the sdiv instructions on the inactive elements,
> yielding
> + // (after cleanup):
> + //
> + // vector.body:
> + // ...
> + // %5 = add nsw <2 x i32> %4, %wide.load
> + // %8 = icmp sgt <2 x i32> %wide.load52, <i32 7, i32 7>
> + // %9 = extractelement <2 x i1> %8, i32 0
> + // br i1 %9, label %pred.sdiv.if, label %pred.sdiv.continue
> + //
> + // pred.sdiv.if:
> + // %10 = extractelement <2 x i32> %wide.load, i32 0
> + // %11 = extractelement <2 x i32> %wide.load51, i32 0
> + // %12 = sdiv i32 %10, %11
> + // %13 = insertelement <2 x i32> undef, i32 %12, i32 0
> + // br label %pred.sdiv.continue
> + //
> + // pred.sdiv.continue:
> + // %14 = phi <2 x i32> [ undef, %vector.body ], [ %13, %pred.sdiv.if ]
> + // %15 = extractelement <2 x i1> %8, i32 1
> + // br i1 %15, label %pred.sdiv.if54, label %pred.sdiv.continue55
> + //
> + // pred.sdiv.if54:
> + // %16 = extractelement <2 x i32> %wide.load, i32 1
> + // %17 = extractelement <2 x i32> %wide.load51, i32 1
> + // %18 = sdiv i32 %16, %17
> + // %19 = insertelement <2 x i32> %14, i32 %18, i32 1
> + // br label %pred.sdiv.continue55
> + //
> + // pred.sdiv.continue55:
> + // %20 = phi <2 x i32> [ %14, %pred.sdiv.continue ], [ %19,
> %pred.sdiv.if54 ]
> + // %predphi = select <2 x i1> %8, <2 x i32> %20, <2 x i32> %5
> +
> + for (auto KV : PredicatedInstructions) {
> BasicBlock::iterator I(KV.first);
> - auto *BB = SplitBlock(I->getParent(), &*std::next(I), DT, LI);
> + BasicBlock *Head = I->getParent();
> + auto *BB = SplitBlock(Head, &*std::next(I), DT, LI);
> auto *T = SplitBlockAndInsertIfThen(KV.second, &*I,
> /*Unreachable=*/false,
> /*BranchWeights=*/nullptr, DT,
> LI);
> I->moveBefore(T);
> - I->getParent()->setName("pred.store.if");
> - BB->setName("pred.store.continue");
> + // Try to move any extractelement we may have created for the
> predicated
> + // instruction into the Then block.
> + for (Use &Op : I->operands()) {
> + auto *OpInst = dyn_cast<ExtractElementInst>(&*Op);
> + if (OpInst && OpInst->hasOneUse()) // TODO: more accurately -
> hasOneUser()
> + OpInst->moveBefore(&*I);
> + }
> +
> + I->getParent()->setName(Twine("pred.") + I->getOpcodeName() + ".if");
> + BB->setName(Twine("pred.") + I->getOpcodeName() + ".continue");
> +
> + // If the instruction is non-void create a Phi node at reconvergence
> point.
> + if (!I->getType()->isVoidTy()) {
> + Value *IncomingTrue = nullptr;
> + Value *IncomingFalse = nullptr;
> +
> + if (I->hasOneUse() && isa<InsertElementInst>(*I->user_begin())) {
> + // If the predicated instruction is feeding an insert-element,
> move it
> + // into the Then block; Phi node will be created for the vector.
> + InsertElementInst *IEI = cast<InsertElementInst>(*I->
> user_begin());
> + IEI->moveBefore(T);
> + IncomingTrue = IEI; // the new vector with the inserted element.
> + IncomingFalse = IEI->getOperand(0); // the unmodified vector
> + } else {
> + // Phi node will be created for the scalar predicated instruction.
> + IncomingTrue = &*I;
> + IncomingFalse = UndefValue::get(I->getType());
> + }
> +
> + BasicBlock *PostDom = I->getParent()->getSingleSuccessor();
> + assert(PostDom && "Then block has multiple successors");
> + PHINode *Phi =
> + PHINode::Create(IncomingTrue->getType(), 2, "",
> &PostDom->front());
> + IncomingTrue->replaceAllUsesWith(Phi);
> + Phi->addIncoming(IncomingFalse, Head);
> + Phi->addIncoming(IncomingTrue, I->getParent());
> + }
> }
> +
> DEBUG(DT->verifyDomTree());
> }
>
> @@ -4235,6 +4393,24 @@ void InnerLoopVectorizer::widenPHIInstru
> }
> }
>
> +/// A helper function for checking whether an integer division-related
> +/// instruction may divide by zero (in which case it must be predicated if
> +/// executed conditionally in the scalar code).
> +/// TODO: It may be worthwhile to generalize and check isKnownNonZero().
> +/// Non-zero divisors that are non compile-time constants will not be
> +/// converted into multiplication, so we will still end up scalarizing
> +/// the division, but can do so w/o predication.
> +static bool mayDivideByZero(Instruction &I) {
> + assert((I.getOpcode() == Instruction::UDiv ||
> + I.getOpcode() == Instruction::SDiv ||
> + I.getOpcode() == Instruction::URem ||
> + I.getOpcode() == Instruction::SRem) &&
> + "Unexpected instruction");
> + Value *Divisor = I.getOperand(1);
> + auto *CInt = dyn_cast<ConstantInt>(Divisor);
> + return !CInt || CInt->isZero();
> +}
> +
> void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector
> *PV) {
> // For each instruction in the old loop.
> for (Instruction &I : *BB) {
> @@ -4251,17 +4427,23 @@ void InnerLoopVectorizer::vectorizeBlock
> continue;
> } // End of PHI.
>
> + case Instruction::UDiv:
> + case Instruction::SDiv:
> + case Instruction::SRem:
> + case Instruction::URem:
> + // Scalarize with predication if this instruction may divide by
> zero and
> + // block execution is conditional, otherwise fallthrough.
> + if (mayDivideByZero(I) && Legal->blockNeedsPredication(I.getParent()))
> {
> + scalarizeInstruction(&I, true);
> + continue;
> + }
> case Instruction::Add:
> case Instruction::FAdd:
> case Instruction::Sub:
> case Instruction::FSub:
> case Instruction::Mul:
> case Instruction::FMul:
> - case Instruction::UDiv:
> - case Instruction::SDiv:
> case Instruction::FDiv:
> - case Instruction::URem:
> - case Instruction::SRem:
> case Instruction::FRem:
> case Instruction::Shl:
> case Instruction::LShr:
> @@ -5155,17 +5337,6 @@ bool LoopVectorizationLegality::blockCan
> }
> if (I.mayThrow())
> return false;
> -
> - // The instructions below can trap.
> - switch (I.getOpcode()) {
> - default:
> - continue;
> - case Instruction::UDiv:
> - case Instruction::SDiv:
> - case Instruction::URem:
> - case Instruction::SRem:
> - return false;
> - }
> }
>
> return true;
> @@ -6082,17 +6253,24 @@ unsigned LoopVectorizationCostModel::get
> // TODO: IF-converted IFs become selects.
> return 0;
> }
> + case Instruction::UDiv:
> + case Instruction::SDiv:
> + case Instruction::URem:
> + case Instruction::SRem:
> + // We assume that if-converted blocks have a 50% chance of being
> executed.
> + // Predicated scalarized instructions are avoided due to the CF that
> + // bypasses turned off lanes. If we are not predicating, fallthrough.
> + if (VF > 1 && mayDivideByZero(*I) &&
> + Legal->blockNeedsPredication(I->getParent()))
> + return VF * TTI.getArithmeticInstrCost(I->getOpcode(), RetTy) / 2 +
> + getScalarizationOverhead(I, VF, true, TTI);
> case Instruction::Add:
> case Instruction::FAdd:
> case Instruction::Sub:
> case Instruction::FSub:
> case Instruction::Mul:
> case Instruction::FMul:
> - case Instruction::UDiv:
> - case Instruction::SDiv:
> case Instruction::FDiv:
> - case Instruction::URem:
> - case Instruction::SRem:
> case Instruction::FRem:
> case Instruction::Shl:
> case Instruction::LShr:
> @@ -6328,28 +6506,11 @@ unsigned LoopVectorizationCostModel::get
> return std::min(CallCost, getVectorIntrinsicCost(CI, VF, TTI, TLI));
> return CallCost;
> }
> - default: {
> - // We are scalarizing the instruction. Return the cost of the scalar
> - // instruction, plus the cost of insert and extract into vector
> - // elements, times the vector width.
> - unsigned Cost = 0;
> -
> - if (!RetTy->isVoidTy() && VF != 1) {
> - unsigned InsCost =
> - TTI.getVectorInstrCost(Instruction::InsertElement, VectorTy);
> - unsigned ExtCost =
> - TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy);
> -
> - // The cost of inserting the results plus extracting each one of the
> - // operands.
> - Cost += VF * (InsCost + ExtCost * I->getNumOperands());
> - }
> -
> + default:
> // The cost of executing VF copies of the scalar instruction. This
> opcode
> // is unknown. Assume that it is the same as 'mul'.
> - Cost += VF * TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy);
> - return Cost;
> - }
> + return VF * TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy) +
> + getScalarizationOverhead(I, VF, false, TTI);
> } // end of switch.
> }
>
> @@ -6407,7 +6568,7 @@ void LoopVectorizationCostModel::collect
> }
>
> void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
> - bool IfPredicateStore) {
> + bool IfPredicateInstr) {
> assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
> // Holds vector parameters or scalars, in case of uniform vals.
> SmallVector<VectorParts, 4> Params;
> @@ -6450,7 +6611,7 @@ void InnerLoopUnroller::scalarizeInstruc
> VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
>
> VectorParts Cond;
> - if (IfPredicateStore) {
> + if (IfPredicateInstr) {
> assert(Instr->getParent()->getSinglePredecessor() &&
> "Only support single predecessor blocks");
> Cond = createEdgeMask(Instr->getParent()->getSinglePredecessor(),
> @@ -6463,7 +6624,7 @@ void InnerLoopUnroller::scalarizeInstruc
>
> // Start an "if (pred) a[i] = ..." block.
> Value *Cmp = nullptr;
> - if (IfPredicateStore) {
> + if (IfPredicateInstr) {
> if (Cond[Part]->getType()->isVectorTy())
> Cond[Part] =
> Builder.CreateExtractElement(Cond[Part],
> Builder.getInt32(0));
> @@ -6494,16 +6655,16 @@ void InnerLoopUnroller::scalarizeInstruc
> VecResults[Part] = Cloned;
>
> // End if-block.
> - if (IfPredicateStore)
> - PredicatedStores.push_back(std::make_pair(cast<StoreInst>(Cloned),
> Cmp));
> + if (IfPredicateInstr)
> + PredicatedInstructions.push_back(std::make_pair(Cloned, Cmp));
> }
> }
>
> void InnerLoopUnroller::vectorizeMemoryInstruction(Instruction *Instr) {
> auto *SI = dyn_cast<StoreInst>(Instr);
> - bool IfPredicateStore = (SI && Legal->blockNeedsPredication(
> SI->getParent()));
> + bool IfPredicateInstr = (SI && Legal->blockNeedsPredication(
> SI->getParent()));
>
> - return scalarizeInstruction(Instr, IfPredicateStore);
> + return scalarizeInstruction(Instr, IfPredicateInstr);
> }
>
> Value *InnerLoopUnroller::reverseVector(Value *Vec) { return Vec; }
>
> Added: llvm/trunk/test/Transforms/LoopVectorize/if-pred-non-void.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
> Transforms/LoopVectorize/if-pred-non-void.ll?rev=279620&view=auto
> ============================================================
> ==================
> --- llvm/trunk/test/Transforms/LoopVectorize/if-pred-non-void.ll (added)
> +++ llvm/trunk/test/Transforms/LoopVectorize/if-pred-non-void.ll Wed Aug
> 24 06:37:57 2016
> @@ -0,0 +1,173 @@
> +; RUN: opt -S -force-vector-width=2 -force-vector-interleave=1
> -loop-vectorize -verify-loop-info -simplifycfg < %s | FileCheck %s
> +
> +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
> +target triple = "x86_64-unknown-linux-gnu"
> +
> +; Test predication of non-void instructions, specifically (i) that these
> +; instructions permit vectorization and (ii) the creation of an
> insertelement
> +; and a Phi node. We check the full 2-element sequence for the first
> +; instruction; For the rest we'll just make sure they get predicated based
> +; on the code generated for the first element.
> +define void @test(i32* nocapture %asd, i32* nocapture %aud,
> + i32* nocapture %asr, i32* nocapture %aur) {
> +entry:
> + br label %for.body
> +
> +for.cond.cleanup: ; preds = %if.end
> + ret void
> +
> +; CHECK-LABEL: test
> +; CHECK: vector.body:
> +; CHECK: %[[SDEE:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 0
> +; CHECK: %[[SDCC:[a-zA-Z0-9]+]] = icmp eq i1 %[[SDEE]], true
> +; CHECK: br i1 %[[SDCC]], label %[[CSD:[a-zA-Z0-9.]+]], label
> %[[ESD:[a-zA-Z0-9.]+]]
> +; CHECK: [[CSD]]:
> +; CHECK: %[[SDA0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32
> 0
> +; CHECK: %[[SDA1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32
> 0
> +; CHECK: %[[SD0:[a-zA-Z0-9]+]] = sdiv i32 %[[SDA0]], %[[SDA1]]
> +; CHECK: %[[SD1:[a-zA-Z0-9]+]] = insertelement <2 x i32> undef, i32
> %[[SD0]], i32 0
> +; CHECK: br label %[[ESD]]
> +; CHECK: [[ESD]]:
> +; CHECK: %[[SDR:[a-zA-Z0-9]+]] = phi <2 x i32> [ undef, %vector.body ],
> [ %[[SD1]], %[[CSD]] ]
> +; CHECK: %[[SDEEH:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32
> 1
> +; CHECK: %[[SDCCH:[a-zA-Z0-9]+]] = icmp eq i1 %[[SDEEH]], true
> +; CHECK: br i1 %[[SDCCH]], label %[[CSDH:[a-zA-Z0-9.]+]], label
> %[[ESDH:[a-zA-Z0-9.]+]]
> +; CHECK: [[CSDH]]:
> +; CHECK: %[[SDA0H:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}},
> i32 1
> +; CHECK: %[[SDA1H:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}},
> i32 1
> +; CHECK: %[[SD0H:[a-zA-Z0-9]+]] = sdiv i32 %[[SDA0H]], %[[SDA1H]]
> +; CHECK: %[[SD1H:[a-zA-Z0-9]+]] = insertelement <2 x i32> %[[SDR]], i32
> %[[SD0H]], i32 1
> +; CHECK: br label %[[ESDH]]
> +; CHECK: [[ESDH]]:
> +; CHECK: %{{.*}} = phi <2 x i32> [ %[[SDR]], %[[ESD]] ], [ %[[SD1H]],
> %[[CSDH]] ]
> +
> +; CHECK: %[[UDEE:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 0
> +; CHECK: %[[UDCC:[a-zA-Z0-9]+]] = icmp eq i1 %[[UDEE]], true
> +; CHECK: br i1 %[[UDCC]], label %[[CUD:[a-zA-Z0-9.]+]], label
> %[[EUD:[a-zA-Z0-9.]+]]
> +; CHECK: [[CUD]]:
> +; CHECK: %[[UDA0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32
> 0
> +; CHECK: %[[UDA1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32
> 0
> +; CHECK: %[[UD0:[a-zA-Z0-9]+]] = udiv i32 %[[UDA0]], %[[UDA1]]
> +; CHECK: %[[UD1:[a-zA-Z0-9]+]] = insertelement <2 x i32> undef, i32
> %[[UD0]], i32 0
> +; CHECK: br label %[[EUD]]
> +; CHECK: [[EUD]]:
> +; CHECK: %{{.*}} = phi <2 x i32> [ undef, %{{.*}} ], [ %[[UD1]],
> %[[CUD]] ]
> +
> +; CHECK: %[[SREE:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 0
> +; CHECK: %[[SRCC:[a-zA-Z0-9]+]] = icmp eq i1 %[[SREE]], true
> +; CHECK: br i1 %[[SRCC]], label %[[CSR:[a-zA-Z0-9.]+]], label
> %[[ESR:[a-zA-Z0-9.]+]]
> +; CHECK: [[CSR]]:
> +; CHECK: %[[SRA0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32
> 0
> +; CHECK: %[[SRA1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32
> 0
> +; CHECK: %[[SR0:[a-zA-Z0-9]+]] = srem i32 %[[SRA0]], %[[SRA1]]
> +; CHECK: %[[SR1:[a-zA-Z0-9]+]] = insertelement <2 x i32> undef, i32
> %[[SR0]], i32 0
> +; CHECK: br label %[[ESR]]
> +; CHECK: [[ESR]]:
> +; CHECK: %{{.*}} = phi <2 x i32> [ undef, %{{.*}} ], [ %[[SR1]],
> %[[CSR]] ]
> +
> +; CHECK: %[[UREE:[a-zA-Z0-9]+]] = extractelement <2 x i1> %{{.*}}, i32 0
> +; CHECK: %[[URCC:[a-zA-Z0-9]+]] = icmp eq i1 %[[UREE]], true
> +; CHECK: br i1 %[[URCC]], label %[[CUR:[a-zA-Z0-9.]+]], label
> %[[EUR:[a-zA-Z0-9.]+]]
> +; CHECK: [[CUR]]:
> +; CHECK: %[[URA0:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32
> 0
> +; CHECK: %[[URA1:[a-zA-Z0-9]+]] = extractelement <2 x i32> %{{.*}}, i32
> 0
> +; CHECK: %[[UR0:[a-zA-Z0-9]+]] = urem i32 %[[URA0]], %[[URA1]]
> +; CHECK: %[[UR1:[a-zA-Z0-9]+]] = insertelement <2 x i32> undef, i32
> %[[UR0]], i32 0
> +; CHECK: br label %[[EUR]]
> +; CHECK: [[EUR]]:
> +; CHECK: %{{.*}} = phi <2 x i32> [ undef, %{{.*}} ], [ %[[UR1]],
> %[[CUR]] ]
> +
> +for.body: ; preds = %if.end,
> %entry
> + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end ]
> + %isd = getelementptr inbounds i32, i32* %asd, i64 %indvars.iv
> + %iud = getelementptr inbounds i32, i32* %aud, i64 %indvars.iv
> + %isr = getelementptr inbounds i32, i32* %asr, i64 %indvars.iv
> + %iur = getelementptr inbounds i32, i32* %aur, i64 %indvars.iv
> + %lsd = load i32, i32* %isd, align 4
> + %lud = load i32, i32* %iud, align 4
> + %lsr = load i32, i32* %isr, align 4
> + %lur = load i32, i32* %iur, align 4
> + %psd = add nsw i32 %lsd, 23
> + %pud = add nsw i32 %lud, 24
> + %psr = add nsw i32 %lsr, 25
> + %pur = add nsw i32 %lur, 26
> + %cmp1 = icmp slt i32 %lsd, 100
> + br i1 %cmp1, label %if.then, label %if.end
> +
> +if.then: ; preds = %for.body
> + %rsd = sdiv i32 %psd, %lsd
> + %rud = udiv i32 %pud, %lud
> + %rsr = srem i32 %psr, %lsr
> + %rur = urem i32 %pur, %lur
> + br label %if.end
> +
> +if.end: ; preds = %if.then,
> %for.body
> + %ysd.0 = phi i32 [ %rsd, %if.then ], [ %psd, %for.body ]
> + %yud.0 = phi i32 [ %rud, %if.then ], [ %pud, %for.body ]
> + %ysr.0 = phi i32 [ %rsr, %if.then ], [ %psr, %for.body ]
> + %yur.0 = phi i32 [ %rur, %if.then ], [ %pur, %for.body ]
> + store i32 %ysd.0, i32* %isd, align 4
> + store i32 %yud.0, i32* %iud, align 4
> + store i32 %ysr.0, i32* %isr, align 4
> + store i32 %yur.0, i32* %iur, align 4
> + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
> + %exitcond = icmp eq i64 %indvars.iv.next, 128
> + br i1 %exitcond, label %for.cond.cleanup, label %for.body
> +}
> +
> +; Future-use test for predication under smarter scalar-scalar: this test
> will
> +; fail when the vectorizer starts feeding scalarized values directly to
> their
> +; scalar users, i.e. w/o generating redundant insertelement/extractelement
> +; instructions. This case is already supported by the predication code
> (which
> +; should generate a phi for the scalar predicated value rather than for
> the
> +; insertelement), but cannot be tested yet.
> +; If you got this test to fail, kindly fix the test by using the
> alternative
> +; FFU sequence. This will make the test check how we handle this case from
> +; now on.
> +define void @test_scalar2scalar(i32* nocapture %asd, i32* nocapture %bsd)
> {
> +entry:
> + br label %for.body
> +
> +for.cond.cleanup: ; preds = %if.end
> + ret void
> +
> +; CHECK-LABEL: test_scalar2scalar
> +; CHECK: vector.body:
> +; CHECK: br i1 %{{.*}}, label %[[THEN:[a-zA-Z0-9.]+]], label
> %[[FI:[a-zA-Z0-9.]+]]
> +; CHECK: [[THEN]]:
> +; CHECK: %[[PD:[a-zA-Z0-9]+]] = sdiv i32 %{{.*}}, %{{.*}}
> +; CHECK: %[[PDV:[a-zA-Z0-9]+]] = insertelement <2 x i32> undef, i32
> %[[PD]], i32 0
> +; CHECK: br label %[[FI]]
> +; CHECK: [[FI]]:
> +; CHECK: %[[PH:[a-zA-Z0-9]+]] = phi <2 x i32> [ undef, %vector.body ],
> [ %[[PDV]], %[[THEN]] ]
> +; FFU-LABEL: test_scalar2scalar
> +; FFU: vector.body:
> +; FFU: br i1 %{{.*}}, label %[[THEN:[a-zA-Z0-9.]+]], label
> %[[FI:[a-zA-Z0-9.]+]]
> +; FFU: [[THEN]]:
> +; FFU: %[[PD:[a-zA-Z0-9]+]] = sdiv i32 %{{.*}}, %{{.*}}
> +; FFU: br label %[[FI]]
> +; FFU: [[FI]]:
> +; FFU: %{{.*}} = phi i32 [ undef, %vector.body ], [ %[[PD]],
> %[[THEN]] ]
> +
> +for.body: ; preds = %if.end,
> %entry
> + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end ]
> + %isd = getelementptr inbounds i32, i32* %asd, i64 %indvars.iv
> + %lsd = load i32, i32* %isd, align 4
> + %isd.b = getelementptr inbounds i32, i32* %bsd, i64 %indvars.iv
> + %lsd.b = load i32, i32* %isd.b, align 4
> + %psd = add nsw i32 %lsd, 23
> + %cmp1 = icmp slt i32 %lsd, 100
> + br i1 %cmp1, label %if.then, label %if.end
> +
> +if.then: ; preds = %for.body
> + %sd1 = sdiv i32 %psd, %lsd
> + %rsd = sdiv i32 %lsd.b, %sd1
> + br label %if.end
> +
> +if.end: ; preds = %if.then,
> %for.body
> + %ysd.0 = phi i32 [ %rsd, %if.then ], [ %psd, %for.body ]
> + store i32 %ysd.0, i32* %isd, align 4
> + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
> + %exitcond = icmp eq i64 %indvars.iv.next, 128
> + br i1 %exitcond, label %for.cond.cleanup, label %for.body
> +}
>
> Added: llvm/trunk/test/Transforms/LoopVectorize/if-pred-not-when-safe.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
> Transforms/LoopVectorize/if-pred-not-when-safe.ll?rev=279620&view=auto
> ============================================================
> ==================
> --- llvm/trunk/test/Transforms/LoopVectorize/if-pred-not-when-safe.ll
> (added)
> +++ llvm/trunk/test/Transforms/LoopVectorize/if-pred-not-when-safe.ll Wed
> Aug 24 06:37:57 2016
> @@ -0,0 +1,90 @@
> +; RUN: opt -S -force-vector-width=2 -force-vector-interleave=1
> -loop-vectorize -verify-loop-info -simplifycfg < %s | FileCheck %s
> +
> +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
> +target triple = "x86_64-unknown-linux-gnu"
> +
> +; Test no-predication of instructions that are provably safe, e.g.
> dividing by
> +; a non-zero constant.
> +define void @test(i32* nocapture %asd, i32* nocapture %aud,
> + i32* nocapture %asr, i32* nocapture %aur,
> + i32* nocapture %asd0, i32* nocapture %aud0,
> + i32* nocapture %asr0, i32* nocapture %aur0
> +) {
> +entry:
> + br label %for.body
> +
> +for.cond.cleanup: ; preds = %if.end
> + ret void
> +
> +; CHECK-LABEL: test
> +; CHECK: vector.body:
> +; CHECK: %{{.*}} = sdiv <2 x i32> %{{.*}}, <i32 11, i32 11>
> +; CHECK: %{{.*}} = udiv <2 x i32> %{{.*}}, <i32 13, i32 13>
> +; CHECK: %{{.*}} = srem <2 x i32> %{{.*}}, <i32 17, i32 17>
> +; CHECK: %{{.*}} = urem <2 x i32> %{{.*}}, <i32 19, i32 19>
> +; CHECK-NOT: %{{.*}} = sdiv <2 x i32> %{{.*}}, <i32 0, i32 0>
> +; CHECK-NOT: %{{.*}} = udiv <2 x i32> %{{.*}}, <i32 0, i32 0>
> +; CHECK-NOT: %{{.*}} = srem <2 x i32> %{{.*}}, <i32 0, i32 0>
> +; CHECK-NOT: %{{.*}} = urem <2 x i32> %{{.*}}, <i32 0, i32 0>
> +
> +for.body: ; preds = %if.end,
> %entry
> + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end ]
> + %isd = getelementptr inbounds i32, i32* %asd, i64 %indvars.iv
> + %iud = getelementptr inbounds i32, i32* %aud, i64 %indvars.iv
> + %isr = getelementptr inbounds i32, i32* %asr, i64 %indvars.iv
> + %iur = getelementptr inbounds i32, i32* %aur, i64 %indvars.iv
> + %lsd = load i32, i32* %isd, align 4
> + %lud = load i32, i32* %iud, align 4
> + %lsr = load i32, i32* %isr, align 4
> + %lur = load i32, i32* %iur, align 4
> + %psd = add nsw i32 %lsd, 23
> + %pud = add nsw i32 %lud, 24
> + %psr = add nsw i32 %lsr, 25
> + %pur = add nsw i32 %lur, 26
> + %isd0 = getelementptr inbounds i32, i32* %asd0, i64 %indvars.iv
> + %iud0 = getelementptr inbounds i32, i32* %aud0, i64 %indvars.iv
> + %isr0 = getelementptr inbounds i32, i32* %asr0, i64 %indvars.iv
> + %iur0 = getelementptr inbounds i32, i32* %aur0, i64 %indvars.iv
> + %lsd0 = load i32, i32* %isd0, align 4
> + %lud0 = load i32, i32* %iud0, align 4
> + %lsr0 = load i32, i32* %isr0, align 4
> + %lur0 = load i32, i32* %iur0, align 4
> + %psd0 = add nsw i32 %lsd, 27
> + %pud0 = add nsw i32 %lud, 28
> + %psr0 = add nsw i32 %lsr, 29
> + %pur0 = add nsw i32 %lur, 30
> + %cmp1 = icmp slt i32 %lsd, 100
> + br i1 %cmp1, label %if.then, label %if.end
> +
> +if.then: ; preds = %for.body
> + %rsd = sdiv i32 %psd, 11
> + %rud = udiv i32 %pud, 13
> + %rsr = srem i32 %psr, 17
> + %rur = urem i32 %pur, 19
> + %rsd0 = sdiv i32 %psd0, 0
> + %rud0 = udiv i32 %pud0, 0
> + %rsr0 = srem i32 %psr0, 0
> + %rur0 = urem i32 %pur0, 0
> + br label %if.end
> +
> +if.end: ; preds = %if.then,
> %for.body
> + %ysd.0 = phi i32 [ %rsd, %if.then ], [ %psd, %for.body ]
> + %yud.0 = phi i32 [ %rud, %if.then ], [ %pud, %for.body ]
> + %ysr.0 = phi i32 [ %rsr, %if.then ], [ %psr, %for.body ]
> + %yur.0 = phi i32 [ %rur, %if.then ], [ %pur, %for.body ]
> + %ysd0.0 = phi i32 [ %rsd0, %if.then ], [ %psd0, %for.body ]
> + %yud0.0 = phi i32 [ %rud0, %if.then ], [ %pud0, %for.body ]
> + %ysr0.0 = phi i32 [ %rsr0, %if.then ], [ %psr0, %for.body ]
> + %yur0.0 = phi i32 [ %rur0, %if.then ], [ %pur0, %for.body ]
> + store i32 %ysd.0, i32* %isd, align 4
> + store i32 %yud.0, i32* %iud, align 4
> + store i32 %ysr.0, i32* %isr, align 4
> + store i32 %yur.0, i32* %iur, align 4
> + store i32 %ysd0.0, i32* %isd0, align 4
> + store i32 %yud0.0, i32* %iud0, align 4
> + store i32 %ysr0.0, i32* %isr0, align 4
> + store i32 %yur0.0, i32* %iur0, align 4
> + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
> + %exitcond = icmp eq i64 %indvars.iv.next, 128
> + br i1 %exitcond, label %for.cond.cleanup, label %for.body
> +}
>
> Modified: llvm/trunk/test/Transforms/LoopVectorize/if-pred-stores.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
> Transforms/LoopVectorize/if-pred-stores.ll?rev=279620&r1=
> 279619&r2=279620&view=diff
> ============================================================
> ==================
> --- llvm/trunk/test/Transforms/LoopVectorize/if-pred-stores.ll (original)
> +++ llvm/trunk/test/Transforms/LoopVectorize/if-pred-stores.ll Wed Aug 24
> 06:37:57 2016
> @@ -1,7 +1,6 @@
> ; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1
> -force-vector-interleave=2 -loop-vectorize -verify-loop-info -simplifycfg <
> %s | FileCheck %s --check-prefix=UNROLL
> ; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1
> -force-vector-interleave=2 -loop-vectorize -verify-loop-info < %s |
> FileCheck %s --check-prefix=UNROLL-NOSIMPLIFY
> ; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2
> -force-vector-interleave=1 -loop-vectorize -enable-cond-stores-vec
> -verify-loop-info -simplifycfg < %s | FileCheck %s --check-prefix=VEC
> -; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2
> -force-vector-interleave=1 -loop-vectorize -enable-cond-stores-vec
> -verify-loop-info -simplifycfg -instcombine < %s | FileCheck %s
> --check-prefix=VEC-IC
>
> target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
> target triple = "x86_64-apple-macosx10.9.0"
> @@ -17,49 +16,27 @@ entry:
> ; VEC: %[[v10:.+]] = and <2 x i1> %[[v8]], <i1 true, i1 true>
> ; VEC: %[[v11:.+]] = extractelement <2 x i1> %[[v10]], i32 0
> ; VEC: %[[v12:.+]] = icmp eq i1 %[[v11]], true
> -; VEC: %[[v13:.+]] = extractelement <2 x i32> %[[v9]], i32 0
> -; VEC: %[[v14:.+]] = extractelement <2 x i32*> %{{.*}}, i32 0
> ; VEC: br i1 %[[v12]], label %[[cond:.+]], label %[[else:.+]]
> ;
> ; VEC: [[cond]]:
> +; VEC: %[[v13:.+]] = extractelement <2 x i32> %[[v9]], i32 0
> +; VEC: %[[v14:.+]] = extractelement <2 x i32*> %{{.*}}, i32 0
> ; VEC: store i32 %[[v13]], i32* %[[v14]], align 4
> ; VEC: br label %[[else:.+]]
> ;
> ; VEC: [[else]]:
> ; VEC: %[[v15:.+]] = extractelement <2 x i1> %[[v10]], i32 1
> ; VEC: %[[v16:.+]] = icmp eq i1 %[[v15]], true
> -; VEC: %[[v17:.+]] = extractelement <2 x i32> %[[v9]], i32 1
> -; VEC: %[[v18:.+]] = extractelement <2 x i32*> %{{.+}} i32 1
> ; VEC: br i1 %[[v16]], label %[[cond2:.+]], label %[[else2:.+]]
> ;
> ; VEC: [[cond2]]:
> +; VEC: %[[v17:.+]] = extractelement <2 x i32> %[[v9]], i32 1
> +; VEC: %[[v18:.+]] = extractelement <2 x i32*> %{{.+}} i32 1
> ; VEC: store i32 %[[v17]], i32* %[[v18]], align 4
> ; VEC: br label %[[else2:.+]]
> ;
> ; VEC: [[else2]]:
>
> -; VEC-IC-LABEL: test
> -; VEC-IC: %[[v1:.+]] = icmp sgt <2 x i32> %{{.*}}, <i32 100, i32 100>
> -; VEC-IC: %[[v2:.+]] = add nsw <2 x i32> %{{.*}}, <i32 20, i32 20>
> -; VEC-IC: %[[v3:.+]] = extractelement <2 x i1> %[[v1]], i32 0
> -; VEC-IC: br i1 %[[v3]], label %[[cond:.+]], label %[[else:.+]]
> -;
> -; VEC-IC: [[cond]]:
> -; VEC-IC: %[[v4:.+]] = extractelement <2 x i32> %[[v2]], i32 0
> -; VEC-IC: store i32 %[[v4]], i32* %{{.*}}, align 4
> -; VEC-IC: br label %[[else:.+]]
> -;
> -; VEC-IC: [[else]]:
> -; VEC-IC: %[[v5:.+]] = extractelement <2 x i1> %[[v1]], i32 1
> -; VEC-IC: br i1 %[[v5]], label %[[cond2:.+]], label %[[else2:.+]]
> -;
> -; VEC-IC: [[cond2]]:
> -; VEC-IC: %[[v6:.+]] = extractelement <2 x i32> %[[v2]], i32 1
> -; VEC-IC: store i32 %[[v6]], i32* %{{.*}}, align 4
> -; VEC-IC: br label %[[else2:.+]]
> -;
> -; VEC-IC: [[else2]]:
> -
> ; UNROLL-LABEL: test
> ; UNROLL: vector.body:
> ; UNROLL: %[[IND:[a-zA-Z0-9]+]] = add i64 %{{.*}}, 0
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160829/a5886c47/attachment.html>
More information about the llvm-commits
mailing list