[llvm] r326079 - [LV] Move isLegalMasked* functions from Legality to CostModel

Wed Apr 25 01:14:48 PDT 2018

Hi Hideki and Renato,

Running

  opt -loop-vectorize -S -o - tr15930.ll

with this commit gives:

Instruction does not dominate all uses!
   %25 = trunc i32 %offset.idx16 to i16
   %broadcast.splatinsert17 = insertelement <4 x i16> undef, i16 %25, i32 0
LLVM ERROR: Broken function found, compilation aborted!

Looking at the output after the loop vectorizer we get

*** IR Dump After Loop Vectorization ***
define void @f1() {
entry:
   br i1 false, label %scalar.ph, label %vector.scevcheck

vector.scevcheck:                                 ; preds = %entry
   %mul = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 1, i16 undef)
   %mul.result = extractvalue { i16, i1 } %mul, 0
   %mul.overflow = extractvalue { i16, i1 } %mul, 1
   %0 = add i16 undef, %mul.result
   %1 = sub i16 undef, %mul.result
   %2 = icmp sgt i16 %1, undef
   %3 = icmp slt i16 %0, undef
   %4 = select i1 true, i1 %2, i1 %3
   %5 = or i1 %4, %mul.overflow
   %6 = or i1 false, %5
   br i1 %6, label %scalar.ph, label %vector.ph

vector.ph:                                        ; preds = 
%vector.scevcheck
   %broadcast.splatinsert17 = insertelement <4 x i16> undef, i16 %25, i32 0
   %broadcast.splat18 = shufflevector <4 x i16> 
%broadcast.splatinsert17, <4 x i16> undef, <4 x i32> zeroinitializer
   br label %vector.body

[...]

pred.load.continue15:                             ; preds = 
%pred.load.if14, %pred.load.continue13
   %24 = phi i32 [ undef, %pred.load.continue13 ], [ %23, %pred.load.if14 ]
   %offset.idx16 = sub i32 undef, %index
   %25 = trunc i32 %offset.idx16 to i16

If we follow the path

  entry -> vector.scevcheck -> vector.ph

we see that the def of %25 in pred.load.continue15 doesn't dominate the 
use in vector.ph.

Regards,
Mikael

On 02/26/2018 12:06 PM, Renato Golin via llvm-commits wrote:
> Author: rengolin
> Date: Mon Feb 26 03:06:36 2018
> New Revision: 326079
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=326079&view=rev
> Log:
> [LV] Move isLegalMasked* functions from Legality to CostModel
> 
> All SIMD architectures can emulate masked load/store/gather/scatter
> through element-wise condition check, scalar load/store, and
> insert/extract. Therefore, bailing out of vectorization as legality
> failure, when they return false, is incorrect. We should proceed to cost
> model and determine profitability.
> 
> This patch is to address the vectorizer's architectural limitation
> described above. As such, I tried to keep the cost model and
> vectorize/don't-vectorize behavior nearly unchanged. Cost model tuning
> should be done separately.
> 
> Please see
> http://lists.llvm.org/pipermail/llvm-dev/2018-January/120164.html for
> RFC and the discussions.
> 
> Closes D43208.
> 
> Patch by: Hideki Saito <hideki.saito at intel.com>
> 
> Modified:
>      llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
>      llvm/trunk/test/Transforms/LoopVectorize/conditional-assignment.ll
>      llvm/trunk/test/Transforms/LoopVectorize/hoist-loads.ll
> 
> Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=326079&r1=326078&r2=326079&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
> +++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Mon Feb 26 03:06:36 2018
> @@ -1648,58 +1648,12 @@ public:
>   
>     bool hasStride(Value *V) { return LAI->hasStride(V); }
>   
> -  /// Returns true if the target machine supports masked store operation
> -  /// for the given \p DataType and kind of access to \p Ptr.
> -  bool isLegalMaskedStore(Type *DataType, Value *Ptr) {
> -    return isConsecutivePtr(Ptr) && TTI->isLegalMaskedStore(DataType);
> -  }
> -
> -  /// Returns true if the target machine supports masked load operation
> -  /// for the given \p DataType and kind of access to \p Ptr.
> -  bool isLegalMaskedLoad(Type *DataType, Value *Ptr) {
> -    return isConsecutivePtr(Ptr) && TTI->isLegalMaskedLoad(DataType);
> -  }
> -
> -  /// Returns true if the target machine supports masked scatter operation
> -  /// for the given \p DataType.
> -  bool isLegalMaskedScatter(Type *DataType) {
> -    return TTI->isLegalMaskedScatter(DataType);
> -  }
> -
> -  /// Returns true if the target machine supports masked gather operation
> -  /// for the given \p DataType.
> -  bool isLegalMaskedGather(Type *DataType) {
> -    return TTI->isLegalMaskedGather(DataType);
> -  }
> -
> -  /// Returns true if the target machine can represent \p V as a masked gather
> -  /// or scatter operation.
> -  bool isLegalGatherOrScatter(Value *V) {
> -    auto *LI = dyn_cast<LoadInst>(V);
> -    auto *SI = dyn_cast<StoreInst>(V);
> -    if (!LI && !SI)
> -      return false;
> -    auto *Ptr = getPointerOperand(V);
> -    auto *Ty = cast<PointerType>(Ptr->getType())->getElementType();
> -    return (LI && isLegalMaskedGather(Ty)) || (SI && isLegalMaskedScatter(Ty));
> -  }
> -
>     /// Returns true if vector representation of the instruction \p I
>     /// requires mask.
>     bool isMaskRequired(const Instruction *I) { return (MaskedOp.count(I) != 0); }
>   
>     unsigned getNumStores() const { return LAI->getNumStores(); }
>     unsigned getNumLoads() const { return LAI->getNumLoads(); }
> -  unsigned getNumPredStores() const { return NumPredStores; }
> -
> -  /// Returns true if \p I is an instruction that will be scalarized with
> -  /// predication. Such instructions include conditional stores and
> -  /// instructions that may divide by zero.
> -  bool isScalarWithPredication(Instruction *I);
> -
> -  /// Returns true if \p I is a memory instruction with consecutive memory
> -  /// access that can be widened.
> -  bool memoryInstructionCanBeWidened(Instruction *I, unsigned VF = 1);
>   
>     // Returns true if the NoNaN attribute is set on the function.
>     bool hasFunNoNaNAttr() const { return HasFunNoNaNAttr; }
> @@ -1753,8 +1707,6 @@ private:
>       return LAI ? &LAI->getSymbolicStrides() : nullptr;
>     }
>   
> -  unsigned NumPredStores = 0;
> -
>     /// The loop that we evaluate.
>     Loop *TheLoop;
>   
> @@ -2060,7 +2012,53 @@ public:
>       collectLoopScalars(VF);
>     }
>   
> +  /// Returns true if the target machine supports masked store operation
> +  /// for the given \p DataType and kind of access to \p Ptr.
> +  bool isLegalMaskedStore(Type *DataType, Value *Ptr) {
> +    return Legal->isConsecutivePtr(Ptr) && TTI.isLegalMaskedStore(DataType);
> +  }
> +
> +  /// Returns true if the target machine supports masked load operation
> +  /// for the given \p DataType and kind of access to \p Ptr.
> +  bool isLegalMaskedLoad(Type *DataType, Value *Ptr) {
> +    return Legal->isConsecutivePtr(Ptr) && TTI.isLegalMaskedLoad(DataType);
> +  }
> +
> +  /// Returns true if the target machine supports masked scatter operation
> +  /// for the given \p DataType.
> +  bool isLegalMaskedScatter(Type *DataType) {
> +    return TTI.isLegalMaskedScatter(DataType);
> +  }
> +
> +  /// Returns true if the target machine supports masked gather operation
> +  /// for the given \p DataType.
> +  bool isLegalMaskedGather(Type *DataType) {
> +    return TTI.isLegalMaskedGather(DataType);
> +  }
> +
> +  /// Returns true if the target machine can represent \p V as a masked gather
> +  /// or scatter operation.
> +  bool isLegalGatherOrScatter(Value *V) {
> +    bool LI = isa<LoadInst>(V);
> +    bool SI = isa<StoreInst>(V);
> +    if (!LI && !SI)
> +      return false;
> +    auto *Ty = getMemInstValueType(V);
> +    return (LI && isLegalMaskedGather(Ty)) || (SI && isLegalMaskedScatter(Ty));
> +  }
> +
> +  /// Returns true if \p I is an instruction that will be scalarized with
> +  /// predication. Such instructions include conditional stores and
> +  /// instructions that may divide by zero.
> +  bool isScalarWithPredication(Instruction *I);
> +
> +  /// Returns true if \p I is a memory instruction with consecutive memory
> +  /// access that can be widened.
> +  bool memoryInstructionCanBeWidened(Instruction *I, unsigned VF = 1);
> +
>   private:
> +  unsigned NumPredStores = 0;
> +
>     /// \return An upper bound for the vectorization factor, larger than zero.
>     /// One is returned if vectorization should best be avoided due to cost.
>     unsigned computeFeasibleMaxVF(bool OptForSize, unsigned ConstTripCount);
> @@ -2112,6 +2110,10 @@ private:
>     /// as a vector operation.
>     bool isConsecutiveLoadOrStore(Instruction *I);
>   
> +  /// Returns true if an artificially high cost for emulated masked memrefs
> +  /// should be used.
> +  bool useEmulatedMaskMemRefHack(Instruction *I);
> +
>     /// Create an analysis remark that explains why vectorization failed
>     ///
>     /// \p RemarkName is the identifier for the remark.  \return the remark object
> @@ -5421,14 +5423,22 @@ void LoopVectorizationCostModel::collect
>     Scalars[VF].insert(Worklist.begin(), Worklist.end());
>   }
>   
> -bool LoopVectorizationLegality::isScalarWithPredication(Instruction *I) {
> -  if (!blockNeedsPredication(I->getParent()))
> +bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I) {
> +  if (!Legal->blockNeedsPredication(I->getParent()))
>       return false;
>     switch(I->getOpcode()) {
>     default:
>       break;
> -  case Instruction::Store:
> -    return !isMaskRequired(I);
> +  case Instruction::Load:
> +  case Instruction::Store: {
> +    if (!Legal->isMaskRequired(I))
> +      return false;
> +    auto *Ptr = getPointerOperand(I);
> +    auto *Ty = getMemInstValueType(I);
> +    return isa<LoadInst>(I) ?
> +        !(isLegalMaskedLoad(Ty, Ptr)  || isLegalMaskedGather(Ty))
> +      : !(isLegalMaskedStore(Ty, Ptr) || isLegalMaskedScatter(Ty));
> +  }
>     case Instruction::UDiv:
>     case Instruction::SDiv:
>     case Instruction::SRem:
> @@ -5438,8 +5448,8 @@ bool LoopVectorizationLegality::isScalar
>     return false;
>   }
>   
> -bool LoopVectorizationLegality::memoryInstructionCanBeWidened(Instruction *I,
> -                                                              unsigned VF) {
> +bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(Instruction *I,
> +                                                               unsigned VF) {
>     // Get and ensure we have a valid memory instruction.
>     LoadInst *LI = dyn_cast<LoadInst>(I);
>     StoreInst *SI = dyn_cast<StoreInst>(I);
> @@ -5448,7 +5458,7 @@ bool LoopVectorizationLegality::memoryIn
>     auto *Ptr = getPointerOperand(I);
>   
>     // In order to be widened, the pointer should be consecutive, first of all.
> -  if (!isConsecutivePtr(Ptr))
> +  if (!Legal->isConsecutivePtr(Ptr))
>       return false;
>   
>     // If the instruction is a store located in a predicated block, it will be
> @@ -5703,39 +5713,26 @@ bool LoopVectorizationLegality::blockCan
>         if (!LI)
>           return false;
>         if (!SafePtrs.count(LI->getPointerOperand())) {
> -        if (isLegalMaskedLoad(LI->getType(), LI->getPointerOperand()) ||
> -            isLegalMaskedGather(LI->getType())) {
> -          MaskedOp.insert(LI);
> -          continue;
> -        }
>           // !llvm.mem.parallel_loop_access implies if-conversion safety.
> -        if (IsAnnotatedParallel)
> -          continue;
> -        return false;
> +        // Otherwise, record that the load needs (real or emulated) masking
> +        // and let the cost model decide.
> +        if (!IsAnnotatedParallel)
> +          MaskedOp.insert(LI);
> +        continue;
>         }
>       }
>   
>       if (I.mayWriteToMemory()) {
>         auto *SI = dyn_cast<StoreInst>(&I);
> -      // We only support predication of stores in basic blocks with one
> -      // predecessor.
>         if (!SI)
>           return false;
> -
> -      // Build a masked store if it is legal for the target.
> -      if (isLegalMaskedStore(SI->getValueOperand()->getType(),
> -                             SI->getPointerOperand()) ||
> -          isLegalMaskedScatter(SI->getValueOperand()->getType())) {
> -        MaskedOp.insert(SI);
> -        continue;
> -      }
> -
> -      bool isSafePtr = (SafePtrs.count(SI->getPointerOperand()) != 0);
> -      bool isSinglePredecessor = SI->getParent()->getSinglePredecessor();
> -
> -      if (++NumPredStores > NumberOfStoresToPredicate || !isSafePtr ||
> -          !isSinglePredecessor)
> -        return false;
> +      // Predicated store requires some form of masking:
> +      // 1) masked store HW instruction,
> +      // 2) emulation via load-blend-store (only if safe and legal to do so,
> +      //    be aware on the race conditions), or
> +      // 3) element-by-element predicate check and scalar store.
> +      MaskedOp.insert(SI);
> +      continue;
>       }
>       if (I.mayThrow())
>         return false;
> @@ -6050,13 +6047,6 @@ void InterleavedAccessInfo::analyzeInter
>   }
>   
>   Optional<unsigned> LoopVectorizationCostModel::computeMaxVF(bool OptForSize) {
> -  if (!EnableCondStoresVectorization && Legal->getNumPredStores()) {
> -    ORE->emit(createMissedAnalysis("ConditionalStore")
> -              << "store that is conditionally executed prevents vectorization");
> -    DEBUG(dbgs() << "LV: No vectorization. There are conditional stores.\n");
> -    return None;
> -  }
> -
>     if (Legal->getRuntimePointerChecking()->Need && TTI.hasBranchDivergence()) {
>       // TODO: It may by useful to do since it's still likely to be dynamically
>       // uniform if the target can skip.
> @@ -6183,9 +6173,7 @@ LoopVectorizationCostModel::computeFeasi
>   VectorizationFactor
>   LoopVectorizationCostModel::selectVectorizationFactor(unsigned MaxVF) {
>     float Cost = expectedCost(1).first;
> -#ifndef NDEBUG
>     const float ScalarCost = Cost;
> -#endif /* NDEBUG */
>     unsigned Width = 1;
>     DEBUG(dbgs() << "LV: Scalar loop costs: " << (int)ScalarCost << ".\n");
>   
> @@ -6216,6 +6204,14 @@ LoopVectorizationCostModel::selectVector
>       }
>     }
>   
> +  if (!EnableCondStoresVectorization && NumPredStores) {
> +    ORE->emit(createMissedAnalysis("ConditionalStore")
> +              << "store that is conditionally executed prevents vectorization");
> +    DEBUG(dbgs() << "LV: No vectorization. There are conditional stores.\n");
> +    Width = 1;
> +    Cost = ScalarCost;
> +  }
> +
>     DEBUG(if (ForceVectorization && Width > 1 && Cost >= ScalarCost) dbgs()
>           << "LV: Vectorization seems to be not beneficial, "
>           << "but was forced by a user.\n");
> @@ -6267,7 +6263,7 @@ LoopVectorizationCostModel::getSmallestA
>         //        optimization to non-pointer types.
>         //
>         if (T->isPointerTy() && !isConsecutiveLoadOrStore(&I) &&
> -          !Legal->isAccessInterleaved(&I) && !Legal->isLegalGatherOrScatter(&I))
> +          !Legal->isAccessInterleaved(&I) && !isLegalGatherOrScatter(&I))
>           continue;
>   
>         MinWidth = std::min(MinWidth,
> @@ -6592,6 +6588,22 @@ LoopVectorizationCostModel::calculateReg
>     return RUs;
>   }
>   
> +bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I){
> +  // TODO: Cost model for emulated masked load/store is completely
> +  // broken. This hack guides the cost model to use an artificially
> +  // high enough value to practically disable vectorization with such
> +  // operations, except where previously deployed legality hack allowed
> +  // using very low cost values. This is to avoid regressions coming simply
> +  // from moving "masked load/store" check from legality to cost model.
> +  // Masked Load/Gather emulation was previously never allowed.
> +  // Limited number of Masked Store/Scatter emulation was allowed.
> +  assert(isScalarWithPredication(I) &&
> +         "Expecting a scalar emulated instruction");
> +  return isa<LoadInst>(I) ||
> +         (isa<StoreInst>(I) &&
> +          NumPredStores > NumberOfStoresToPredicate);
> +}
> +
>   void LoopVectorizationCostModel::collectInstsToScalarize(unsigned VF) {
>     // If we aren't vectorizing the loop, or if we've already collected the
>     // instructions to scalarize, there's nothing to do. Collection may already
> @@ -6612,11 +6624,13 @@ void LoopVectorizationCostModel::collect
>       if (!Legal->blockNeedsPredication(BB))
>         continue;
>       for (Instruction &I : *BB)
> -      if (Legal->isScalarWithPredication(&I)) {
> +      if (isScalarWithPredication(&I)) {
>           ScalarCostsTy ScalarCosts;
> -        if (computePredInstDiscount(&I, ScalarCosts, VF) >= 0)
> +        // Do not apply discount logic if hacked cost is needed
> +        // for emulated masked memrefs.
> +        if (!useEmulatedMaskMemRefHack(&I) &&
> +            computePredInstDiscount(&I, ScalarCosts, VF) >= 0)
>             ScalarCostsVF.insert(ScalarCosts.begin(), ScalarCosts.end());
> -
>           // Remember that BB will remain after vectorization.
>           PredicatedBBsAfterVectorization.insert(BB);
>         }
> @@ -6651,7 +6665,7 @@ int LoopVectorizationCostModel::computeP
>   
>       // If the instruction is scalar with predication, it will be analyzed
>       // separately. We ignore it within the context of PredInst.
> -    if (Legal->isScalarWithPredication(I))
> +    if (isScalarWithPredication(I))
>         return false;
>   
>       // If any of the instruction's operands are uniform after vectorization,
> @@ -6705,7 +6719,7 @@ int LoopVectorizationCostModel::computeP
>   
>       // Compute the scalarization overhead of needed insertelement instructions
>       // and phi nodes.
> -    if (Legal->isScalarWithPredication(I) && !I->getType()->isVoidTy()) {
> +    if (isScalarWithPredication(I) && !I->getType()->isVoidTy()) {
>         ScalarCost += TTI.getScalarizationOverhead(ToVectorTy(I->getType(), VF),
>                                                    true, false);
>         ScalarCost += VF * TTI.getCFInstrCost(Instruction::PHI);
> @@ -6848,9 +6862,15 @@ unsigned LoopVectorizationCostModel::get
>     // If we have a predicated store, it may not be executed for each vector
>     // lane. Scale the cost by the probability of executing the predicated
>     // block.
> -  if (Legal->isScalarWithPredication(I))
> +  if (isScalarWithPredication(I)) {
>       Cost /= getReciprocalPredBlockProb();
>   
> +    if (useEmulatedMaskMemRefHack(I))
> +      // Artificially setting to a high enough value to practically disable
> +      // vectorization with such operations.
> +      Cost = 3000000;
> +  }
> +
>     return Cost;
>   }
>   
> @@ -6975,6 +6995,7 @@ LoopVectorizationCostModel::getInstructi
>   void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) {
>     if (VF == 1)
>       return;
> +  NumPredStores = 0;
>     for (BasicBlock *BB : TheLoop->blocks()) {
>       // For each instruction in the old loop.
>       for (Instruction &I : *BB) {
> @@ -6982,6 +7003,8 @@ void LoopVectorizationCostModel::setCost
>         if (!Ptr)
>           continue;
>   
> +      if (isa<StoreInst>(&I) && isScalarWithPredication(&I))
> +        NumPredStores++;
>         if (isa<LoadInst>(&I) && Legal->isUniform(Ptr)) {
>           // Scalar load + broadcast
>           unsigned Cost = getUniformMemOpCost(&I, VF);
> @@ -6990,7 +7013,7 @@ void LoopVectorizationCostModel::setCost
>         }
>   
>         // We assume that widening is the best solution when possible.
> -      if (Legal->memoryInstructionCanBeWidened(&I, VF)) {
> +      if (memoryInstructionCanBeWidened(&I, VF)) {
>           unsigned Cost = getConsecutiveMemOpCost(&I, VF);
>           int ConsecutiveStride = Legal->isConsecutivePtr(getPointerOperand(&I));
>           assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) &&
> @@ -7017,7 +7040,7 @@ void LoopVectorizationCostModel::setCost
>         }
>   
>         unsigned GatherScatterCost =
> -          Legal->isLegalGatherOrScatter(&I)
> +          isLegalGatherOrScatter(&I)
>                 ? getGatherScatterCost(&I, VF) * NumAccesses
>                 : std::numeric_limits<unsigned>::max();
>   
> @@ -7178,7 +7201,7 @@ unsigned LoopVectorizationCostModel::get
>       // vector lane. Get the scalarization cost and scale this amount by the
>       // probability of executing the predicated block. If the instruction is not
>       // predicated, we fall through to the next case.
> -    if (VF > 1 && Legal->isScalarWithPredication(I)) {
> +    if (VF > 1 && isScalarWithPredication(I)) {
>         unsigned Cost = 0;
>   
>         // These instructions have a non-void type, so account for the phi nodes
> @@ -7799,7 +7822,7 @@ LoopVectorizationPlanner::tryToBlend(Ins
>   
>   bool LoopVectorizationPlanner::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
>                                             VFRange &Range) {
> -  if (Legal->isScalarWithPredication(I))
> +  if (CM.isScalarWithPredication(I))
>       return false;
>   
>     auto IsVectorizableOpcode = [](unsigned Opcode) {
> @@ -7906,7 +7929,7 @@ VPBasicBlock *LoopVectorizationPlanner::
>         [&](unsigned VF) { return CM.isUniformAfterVectorization(I, VF); },
>         Range);
>   
> -  bool IsPredicated = Legal->isScalarWithPredication(I);
> +  bool IsPredicated = CM.isScalarWithPredication(I);
>     auto *Recipe = new VPReplicateRecipe(I, IsUniform, IsPredicated);
>   
>     // Find if I uses a predicated instruction. If so, it will use its scalar
> 
> Modified: llvm/trunk/test/Transforms/LoopVectorize/conditional-assignment.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/conditional-assignment.ll?rev=326079&r1=326078&r2=326079&view=diff
> ==============================================================================
> --- llvm/trunk/test/Transforms/LoopVectorize/conditional-assignment.ll (original)
> +++ llvm/trunk/test/Transforms/LoopVectorize/conditional-assignment.ll Mon Feb 26 03:06:36 2018
> @@ -1,7 +1,7 @@
>   ; RUN: opt < %s -enable-cond-stores-vec=false -loop-vectorize -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
>   ; RUN: opt < %s -enable-cond-stores-vec=false -passes=loop-vectorize -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
>   
> -; CHECK: remark: source.c:2:8: loop not vectorized: store that is conditionally executed prevents vectorization
> +; CHECK: remark: source.c:2:8: the cost-model indicates that vectorization is not beneficial
>   
>   target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
>   
> 
> Modified: llvm/trunk/test/Transforms/LoopVectorize/hoist-loads.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/hoist-loads.ll?rev=326079&r1=326078&r2=326079&view=diff
> ==============================================================================
> --- llvm/trunk/test/Transforms/LoopVectorize/hoist-loads.ll (original)
> +++ llvm/trunk/test/Transforms/LoopVectorize/hoist-loads.ll Mon Feb 26 03:06:36 2018
> @@ -37,8 +37,9 @@ for.end:
>   }
>   
>   ; However, we can't hoist loads whose address we have not seen unconditionally
> -; accessed.
> +; accessed. One wide load is fine, but not the second.
>   ; CHECK-LABEL: @dont_hoist_cond_load(
> +; CHECK: load <2 x float>
>   ; CHECK-NOT: load <2 x float>
>   
>   define void @dont_hoist_cond_load() {
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
> 
-------------- next part --------------
target triple = "x86_64-unknown-linux-gnu"

@a = external global [2 x i16], align 1

define void @f1() {
entry:
  br label %for.body

for.body:                                         ; preds = %land.end, %entry
  %0 = phi i32 [ undef, %entry ], [ %dec, %land.end ]
  br i1 undef, label %land.end, label %land.rhs

land.rhs:                                         ; preds = %for.body
  %1 = load i32, i32* undef, align 1
  br label %land.end

land.end:                                         ; preds = %land.rhs, %for.body
  %2 = trunc i32 %0 to i16
  %arrayidx = getelementptr inbounds [2 x i16], [2 x i16]* @a, i16 0, i16 %2
  store i16 undef, i16* %arrayidx, align 1
  %dec = add nsw i32 %0, -1
  %cmp = icmp sgt i32 %0, 1
  br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge

for.cond.for.end_crit_edge:                       ; preds = %land.end
  unreachable
}