[llvm] r258620 - [LIR] Add support for structs and hand unrolled loops

Tue Jan 26 21:35:58 PST 2016

Thank you, Quentin and David.

I fixed it and recommitted in r258777.

Best,

Haicheng

> Hi Haicheng,
>
> David suggested that this commit may cause:
> https://llvm.org/bugs/show_bug.cgi?id=26293
>
> Could you have a look please?
>
> In the meantime, I am going to revert to check if this is actually the
> problem.
>
> Thanks,
> -Quentin
>> On Jan 22, 2016, at 10:52 PM, Haicheng Wu via llvm-commits
>> <llvm-commits at lists.llvm.org> wrote:
>>
>> Author: haicheng
>> Date: Sat Jan 23 00:52:41 2016
>> New Revision: 258620
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=258620&view=rev
>> Log:
>> [LIR] Add support for structs and hand unrolled loops
>>
>> Now LIR can turn following codes into memset:
>>
>> typedef struct foo {
>>  int a;
>>  int b;
>> } foo_t;
>>
>> void bar(foo_t *f, unsigned n) {
>>  for (unsigned i = 0; i < n; ++i) {
>>    f[i].a = 0;
>>    f[i].b = 0;
>>  }
>> }
>>
>> void test(foo_t *f, unsigned n) {
>>  for (unsigned i = 0; i < n; i += 2) {
>>    f[i] = 0;
>>    f[i+1] = 0;
>>  }
>> }
>>
>> Added:
>>    llvm/trunk/test/Transforms/LoopIdiom/struct.ll
>>    llvm/trunk/test/Transforms/LoopIdiom/struct_pattern.ll
>>    llvm/trunk/test/Transforms/LoopIdiom/unroll.ll
>> Modified:
>>    llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h
>>    llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp
>>    llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
>>    llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
>>
>> Modified: llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h?rev=258620&r1=258619&r2=258620&view=diff
>> ==============================================================================
>> --- llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h (original)
>> +++ llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h Sat Jan 23
>> 00:52:41 2016
>> @@ -659,6 +659,11 @@ const SCEV *replaceSymbolicStrideSCEV(Pr
>> int isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop
>> *Lp,
>>                  const ValueToValueMap &StridesMap);
>>
>> +/// \brief Returns true if the memory operations \p A and \p B are
>> consecutive.
>> +/// This is a simple API that does not depend on the analysis pass.
>> +bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
>> +                         ScalarEvolution &SE, bool CheckType = true);
>> +
>> /// \brief This analysis provides dependence information for the memory
>> accesses
>> /// of a loop.
>> ///
>>
>> Modified: llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp?rev=258620&r1=258619&r2=258620&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp (original)
>> +++ llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp Sat Jan 23 00:52:41
>> 2016
>> @@ -901,6 +901,78 @@ int llvm::isStridedPtr(PredicatedScalarE
>>   return Stride;
>> }
>>
>> +/// Take the pointer operand from the Load/Store instruction.
>> +/// Returns NULL if this is not a valid Load/Store instruction.
>> +static Value *getPointerOperand(Value *I) {
>> +  if (LoadInst *LI = dyn_cast<LoadInst>(I))
>> +    return LI->getPointerOperand();
>> +  if (StoreInst *SI = dyn_cast<StoreInst>(I))
>> +    return SI->getPointerOperand();
>> +  return nullptr;
>> +}
>> +
>> +/// Take the address space operand from the Load/Store instruction.
>> +/// Returns -1 if this is not a valid Load/Store instruction.
>> +static unsigned getAddressSpaceOperand(Value *I) {
>> +  if (LoadInst *L = dyn_cast<LoadInst>(I))
>> +    return L->getPointerAddressSpace();
>> +  if (StoreInst *S = dyn_cast<StoreInst>(I))
>> +    return S->getPointerAddressSpace();
>> +  return -1;
>> +}
>> +
>> +/// Returns true if the memory operations \p A and \p B are
>> consecutive.
>> +bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout
>> &DL,
>> +                               ScalarEvolution &SE, bool CheckType) {
>> +  Value *PtrA = getPointerOperand(A);
>> +  Value *PtrB = getPointerOperand(B);
>> +  unsigned ASA = getAddressSpaceOperand(A);
>> +  unsigned ASB = getAddressSpaceOperand(B);
>> +
>> +  // Check that the address spaces match and that the pointers are
>> valid.
>> +  if (!PtrA || !PtrB || (ASA != ASB))
>> +    return false;
>> +
>> +  // Make sure that A and B are different pointers.
>> +  if (PtrA == PtrB)
>> +    return false;
>> +
>> +  // Make sure that A and B have the same type if required.
>> +  if(CheckType && PtrA->getType() != PtrB->getType())
>> +      return false;
>> +
>> +  unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA);
>> +  Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
>> +  APInt Size(PtrBitWidth, DL.getTypeStoreSize(Ty));
>> +
>> +  APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0);
>> +  PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
>> +  PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
>> +
>> +  //  OffsetDelta = OffsetB - OffsetA;
>> +  const SCEV *OffsetSCEVA = SE.getConstant(OffsetA);
>> +  const SCEV *OffsetSCEVB = SE.getConstant(OffsetB);
>> +  const SCEV *OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB,
>> OffsetSCEVA);
>> +  const SCEVConstant *OffsetDeltaC =
>> dyn_cast<SCEVConstant>(OffsetDeltaSCEV);
>> +  const APInt &OffsetDelta = OffsetDeltaC->getAPInt();
>> +  // Check if they are based on the same pointer. That makes the
>> offsets
>> +  // sufficient.
>> +  if (PtrA == PtrB)
>> +    return OffsetDelta == Size;
>> +
>> +  // Compute the necessary base pointer delta to have the necessary
>> final delta
>> +  // equal to the size.
>> +  // BaseDelta = Size - OffsetDelta;
>> +  const SCEV *SizeSCEV = SE.getConstant(Size);
>> +  const SCEV *BaseDelta = SE.getMinusSCEV(SizeSCEV, OffsetDeltaSCEV);
>> +
>> +  // Otherwise compute the distance with SCEV between the base
>> pointers.
>> +  const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
>> +  const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
>> +  const SCEV *X = SE.getAddExpr(PtrSCEVA, BaseDelta);
>> +  return X == PtrSCEVB;
>> +}
>> +
>> bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type)
>> {
>>   switch (Type) {
>>   case NoDep:
>>
>> Modified: llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp?rev=258620&r1=258619&r2=258620&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp (original)
>> +++ llvm/trunk/lib/Transforms/Scalar/LoopIdiomRecognize.cpp Sat Jan 23
>> 00:52:41 2016
>> @@ -26,22 +26,20 @@
>> // i64 and larger types when i64 is legal and the value has few bits
>> set.  It
>> // would be good to enhance isel to emit a loop for ctpop in this case.
>> //
>> -// We should enhance the memset/memcpy recognition to handle multiple
>> stores in
>> -// the loop.  This would handle things like:
>> -//   void foo(_Complex float *P)
>> -//     for (i) { __real__(*P) = 0;  __imag__(*P) = 0; }
>> -//
>> // This could recognize common matrix multiplies and dot product idioms
>> and
>> // replace them with calls to BLAS (if linked in??).
>> //
>> //===----------------------------------------------------------------------===//
>>
>> #include "llvm/Transforms/Scalar.h"
>> +#include "llvm/ADT/MapVector.h"
>> +#include "llvm/ADT/SetVector.h"
>> #include "llvm/ADT/Statistic.h"
>> #include "llvm/Analysis/AliasAnalysis.h"
>> #include "llvm/Analysis/BasicAliasAnalysis.h"
>> #include "llvm/Analysis/GlobalsModRef.h"
>> #include "llvm/Analysis/LoopPass.h"
>> +#include "llvm/Analysis/LoopAccessAnalysis.h"
>> #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
>> #include "llvm/Analysis/ScalarEvolutionExpander.h"
>> #include "llvm/Analysis/ScalarEvolutionExpressions.h"
>> @@ -108,7 +106,9 @@ public:
>>
>> private:
>>   typedef SmallVector<StoreInst *, 8> StoreList;
>> -  StoreList StoreRefsForMemset;
>> +  typedef MapVector<Value *, StoreList> StoreListMap;
>> +  StoreListMap StoreRefsForMemset;
>> +  StoreListMap StoreRefsForMemsetPattern;
>>   StoreList StoreRefsForMemcpy;
>>   bool HasMemset;
>>   bool HasMemsetPattern;
>> @@ -122,14 +122,18 @@ private:
>>                       SmallVectorImpl<BasicBlock *> &ExitBlocks);
>>
>>   void collectStores(BasicBlock *BB);
>> -  bool isLegalStore(StoreInst *SI, bool &ForMemset, bool &ForMemcpy);
>> -  bool processLoopStore(StoreInst *SI, const SCEV *BECount);
>> +  bool isLegalStore(StoreInst *SI, bool &ForMemset, bool
>> &ForMemsetPattern,
>> +                    bool &ForMemcpy);
>> +  bool processLoopStores(SmallVectorImpl<StoreInst *> &SL, const SCEV
>> *BECount,
>> +                         bool ForMemset);
>>   bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
>>
>>   bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
>>                                unsigned StoreAlignment, Value
>> *StoredVal,
>> -                               Instruction *TheStore, const
>> SCEVAddRecExpr *Ev,
>> -                               const SCEV *BECount, bool NegStride);
>> +                               Instruction *TheStore,
>> +                               SmallPtrSetImpl<Instruction *> &Stores,
>> +                               const SCEVAddRecExpr *Ev, const SCEV
>> *BECount,
>> +                               bool NegStride);
>>   bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount);
>>
>>   /// @}
>> @@ -305,7 +309,7 @@ static Constant *getMemSetPatternValue(V
>> }
>>
>> bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,
>> -                                      bool &ForMemcpy) {
>> +                                      bool &ForMemsetPattern, bool
>> &ForMemcpy) {
>>   // Don't touch volatile stores.
>>   if (!SI->isSimple())
>>     return false;
>> @@ -353,7 +357,7 @@ bool LoopIdiomRecognize::isLegalStore(St
>>              StorePtr->getType()->getPointerAddressSpace() == 0 &&
>>              (PatternValue = getMemSetPatternValue(StoredVal, DL))) {
>>     // It looks like we can use PatternValue!
>> -    ForMemset = true;
>> +    ForMemsetPattern = true;
>>     return true;
>>   }
>>
>> @@ -393,6 +397,7 @@ bool LoopIdiomRecognize::isLegalStore(St
>>
>> void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
>>   StoreRefsForMemset.clear();
>> +  StoreRefsForMemsetPattern.clear();
>>   StoreRefsForMemcpy.clear();
>>   for (Instruction &I : *BB) {
>>     StoreInst *SI = dyn_cast<StoreInst>(&I);
>> @@ -400,15 +405,22 @@ void LoopIdiomRecognize::collectStores(B
>>       continue;
>>
>>     bool ForMemset = false;
>> +    bool ForMemsetPattern = false;
>>     bool ForMemcpy = false;
>>     // Make sure this is a strided store with a constant stride.
>> -    if (!isLegalStore(SI, ForMemset, ForMemcpy))
>> +    if (!isLegalStore(SI, ForMemset, ForMemsetPattern, ForMemcpy))
>>       continue;
>>
>>     // Save the store locations.
>> -    if (ForMemset)
>> -      StoreRefsForMemset.push_back(SI);
>> -    else if (ForMemcpy)
>> +    if (ForMemset) {
>> +      // Find the base pointer.
>> +      Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL);
>> +      StoreRefsForMemset[Ptr].push_back(SI);
>> +    } else if (ForMemsetPattern) {
>> +      // Find the base pointer.
>> +      Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL);
>> +      StoreRefsForMemsetPattern[Ptr].push_back(SI);
>> +    } else if (ForMemcpy)
>>       StoreRefsForMemcpy.push_back(SI);
>>   }
>> }
>> @@ -430,9 +442,14 @@ bool LoopIdiomRecognize::runOnLoopBlock(
>>   // Look for store instructions, which may be optimized to
>> memset/memcpy.
>>   collectStores(BB);
>>
>> -  // Look for a single store which can be optimized into a memset.
>> -  for (auto &SI : StoreRefsForMemset)
>> -    MadeChange |= processLoopStore(SI, BECount);
>> +  // Look for a single store or sets of stores with a common base,
>> which can be
>> +  // optimized into a memset (memset_pattern).  The latter most
>> commonly happens
>> +  // with structs and handunrolled loops.
>> +  for (auto &SL : StoreRefsForMemset)
>> +    MadeChange |= processLoopStores(SL.second, BECount, true);
>> +
>> +  for (auto &SL : StoreRefsForMemsetPattern)
>> +    MadeChange |= processLoopStores(SL.second, BECount, false);
>>
>>   // Optimize the store into a memcpy, if it feeds an similarly strided
>> load.
>>   for (auto &SI : StoreRefsForMemcpy)
>> @@ -458,26 +475,155 @@ bool LoopIdiomRecognize::runOnLoopBlock(
>>   return MadeChange;
>> }
>>
>> -/// processLoopStore - See if this store can be promoted to a memset.
>> -bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV
>> *BECount) {
>> -  assert(SI->isSimple() && "Expected only non-volatile stores.");
>> +/// processLoopStores - See if this store(s) can be promoted to a
>> memset.
>> +bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *>
>> &SL,
>> +                                           const SCEV *BECount,
>> +                                           bool ForMemset) {
>> +  // Try to find consecutive stores that can be transformed into
>> memsets.
>> +  SetVector<StoreInst *> Heads, Tails;
>> +  SmallDenseMap<StoreInst *, StoreInst *> ConsecutiveChain;
>> +
>> +  // Do a quadratic search on all of the given stores and find
>> +  // all of the pairs of stores that follow each other.
>> +  SmallVector<unsigned, 16> IndexQueue;
>> +  for (unsigned i = 0, e = SL.size(); i < e; ++i) {
>> +    assert(SL[i]->isSimple() && "Expected only non-volatile stores.");
>> +
>> +    Value *FirstStoredVal = SL[i]->getValueOperand();
>> +    Value *FirstStorePtr = SL[i]->getPointerOperand();
>> +    const SCEVAddRecExpr *FirstStoreEv =
>> +        cast<SCEVAddRecExpr>(SE->getSCEV(FirstStorePtr));
>> +    unsigned FirstStride = getStoreStride(FirstStoreEv);
>> +    unsigned FirstStoreSize = getStoreSizeInBytes(SL[i], DL);
>> +
>> +    // See if we can optimize just this store in isolation.
>> +    if (FirstStride == FirstStoreSize || FirstStride ==
>> -FirstStoreSize) {
>> +      Heads.insert(SL[i]);
>> +      continue;
>> +    }
>>
>> -  Value *StoredVal = SI->getValueOperand();
>> -  Value *StorePtr = SI->getPointerOperand();
>> +    Value *FirstSplatValue = nullptr;
>> +    Constant *FirstPatternValue = nullptr;
>>
>> -  // Check to see if the stride matches the size of the store.  If so,
>> then we
>> -  // know that every byte is touched in the loop.
>> -  const SCEVAddRecExpr *StoreEv =
>> cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
>> -  unsigned Stride = getStoreStride(StoreEv);
>> -  unsigned StoreSize = getStoreSizeInBytes(SI, DL);
>> -  if (StoreSize != Stride && StoreSize != -Stride)
>> -    return false;
>> +    if (ForMemset)
>> +      FirstSplatValue = isBytewiseValue(FirstStoredVal);
>> +    else
>> +      FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL);
>> +
>> +    assert((FirstSplatValue || FirstPatternValue) &&
>> +           "Expected either splat value or pattern value.");
>> +
>> +    IndexQueue.clear();
>> +    // If a store has multiple consecutive store candidates, search
>> Stores
>> +    // array according to the sequence: from i+1 to e, then from i-1 to
>> 0.
>> +    // This is because usually pairing with immediate succeeding or
>> preceding
>> +    // candidate create the best chance to find memset opportunity.
>> +    unsigned j = 0;
>> +    for (j = i + 1; j < e; ++j)
>> +      IndexQueue.push_back(j);
>> +    for (j = i; j > 0; --j)
>> +      IndexQueue.push_back(j - 1);
>> +
>> +    for (auto &k : IndexQueue) {
>> +      assert(SL[k]->isSimple() && "Expected only non-volatile
>> stores.");
>> +      Value *SecondStorePtr = SL[k]->getPointerOperand();
>> +      const SCEVAddRecExpr *SecondStoreEv =
>> +          cast<SCEVAddRecExpr>(SE->getSCEV(SecondStorePtr));
>> +      unsigned SecondStride = getStoreStride(SecondStoreEv);
>>
>> -  bool NegStride = StoreSize == -Stride;
>> +      if (FirstStride != SecondStride)
>> +        continue;
>> +
>> +      Value *SecondStoredVal = SL[k]->getValueOperand();
>> +      Value *SecondSplatValue = nullptr;
>> +      Constant *SecondPatternValue = nullptr;
>> +
>> +      if (ForMemset)
>> +        SecondSplatValue = isBytewiseValue(SecondStoredVal);
>> +      else
>> +        SecondPatternValue = getMemSetPatternValue(SecondStoredVal,
>> DL);
>> +
>> +      assert((SecondSplatValue || SecondPatternValue) &&
>> +             "Expected either splat value or pattern value.");
>> +
>> +      if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) {
>> +        if (ForMemset) {
>> +          ConstantInt *C1 = dyn_cast<ConstantInt>(FirstSplatValue);
>> +          ConstantInt *C2 = dyn_cast<ConstantInt>(SecondSplatValue);
>> +          if (!C1 || !C2 || C1 != C2)
>> +            continue;
>> +        } else {
>> +          Constant *C1 = FirstPatternValue;
>> +          Constant *C2 = SecondPatternValue;
>> +
>> +          if (ConstantArray *CA1 = dyn_cast<ConstantArray>(C1))
>> +            C1 = CA1->getSplatValue();
>> +
>> +          if (ConstantArray *CA2 = dyn_cast<ConstantArray>(C2))
>> +            C2 = CA2->getSplatValue();
>> +
>> +          if (C1 != C2)
>> +            continue;
>> +        }
>> +        Tails.insert(SL[k]);
>> +        Heads.insert(SL[i]);
>> +        ConsecutiveChain[SL[i]] = SL[k];
>> +        break;
>> +      }
>> +    }
>> +  }
>> +
>> +  // We may run into multiple chains that merge into a single chain. We
>> mark the
>> +  // stores that we transformed so that we don't visit the same store
>> twice.
>> +  SmallPtrSet<Value *, 16> TransformedStores;
>> +  bool Changed = false;
>> +
>> +  // For stores that start but don't end a link in the chain:
>> +  for (SetVector<StoreInst *>::iterator it = Heads.begin(), e =
>> Heads.end();
>> +       it != e; ++it) {
>> +    if (Tails.count(*it))
>> +      continue;
>> +
>> +    // We found a store instr that starts a chain. Now follow the chain
>> and try
>> +    // to transform it.
>> +    SmallPtrSet<Instruction *, 8> AdjacentStores;
>> +    StoreInst *I = *it;
>> +
>> +    StoreInst *HeadStore = I;
>> +    unsigned StoreSize = 0;
>> +
>> +    // Collect the chain into a list.
>> +    while (Tails.count(I) || Heads.count(I)) {
>> +      if (TransformedStores.count(I))
>> +        break;
>> +      AdjacentStores.insert(I);
>> +
>> +      StoreSize += getStoreSizeInBytes(I, DL);
>> +      // Move to the next value in the chain.
>> +      I = ConsecutiveChain[I];
>> +    }
>> +
>> +    Value *StoredVal = HeadStore->getValueOperand();
>> +    Value *StorePtr = HeadStore->getPointerOperand();
>> +    const SCEVAddRecExpr *StoreEv =
>> cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
>> +    unsigned Stride = getStoreStride(StoreEv);
>> +
>> +    // Check to see if the stride matches the size of the stores.  If
>> so, then
>> +    // we know that every byte is touched in the loop.
>> +    if (StoreSize != Stride && StoreSize != -Stride)
>> +      continue;
>> +
>> +    bool NegStride = StoreSize == -Stride;
>> +
>> +    if (processLoopStridedStore(StorePtr, StoreSize,
>> HeadStore->getAlignment(),
>> +                                StoredVal, HeadStore, AdjacentStores,
>> StoreEv,
>> +                                BECount, NegStride)) {
>> +      TransformedStores.insert(AdjacentStores.begin(),
>> AdjacentStores.end());
>> +      Changed = true;
>> +    }
>> +  }
>>
>> -  // See if we can optimize just this store in isolation.
>> -  return processLoopStridedStore(StorePtr, StoreSize,
>> SI->getAlignment(),
>> -                                 StoredVal, SI, StoreEv, BECount,
>> NegStride);
>> +  return Changed;
>> }
>>
>> /// processLoopMemSet - See if this memset can be promoted to a large
>> memset.
>> @@ -520,18 +666,21 @@ bool LoopIdiomRecognize::processLoopMemS
>>   if (!SplatValue || !CurLoop->isLoopInvariant(SplatValue))
>>     return false;
>>
>> +  SmallPtrSet<Instruction *, 1> MSIs;
>> +  MSIs.insert(MSI);
>>   return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
>> -                                 MSI->getAlignment(), SplatValue, MSI,
>> Ev,
>> +                                 MSI->getAlignment(), SplatValue, MSI,
>> MSIs, Ev,
>>                                  BECount, /*NegStride=*/false);
>> }
>>
>> /// mayLoopAccessLocation - Return true if the specified loop might
>> access the
>> /// specified pointer location, which is a loop-strided access.  The
>> 'Access'
>> /// argument specifies what the verboten forms of access are (read or
>> write).
>> -static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop
>> *L,
>> -                                  const SCEV *BECount, unsigned
>> StoreSize,
>> -                                  AliasAnalysis &AA,
>> -                                  Instruction *IgnoredStore) {
>> +static bool
>> +mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
>> +                      const SCEV *BECount, unsigned StoreSize,
>> +                      AliasAnalysis &AA,
>> +                      SmallPtrSetImpl<Instruction *> &IgnoredStores) {
>>   // Get the location that may be stored across the loop.  Since the
>> access is
>>   // strided positively through memory, we say that the modified
>> location starts
>>   // at the pointer and has infinite size.
>> @@ -551,7 +700,8 @@ static bool mayLoopAccessLocation(Value
>>   for (Loop::block_iterator BI = L->block_begin(), E = L->block_end();
>> BI != E;
>>        ++BI)
>>     for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I !=
>> E; ++I)
>> -      if (&*I != IgnoredStore && (AA.getModRefInfo(&*I, StoreLoc) &
>> Access))
>> +      if (IgnoredStores.count(&*I) == 0 &&
>> +          (AA.getModRefInfo(&*I, StoreLoc) & Access))
>>         return true;
>>
>>   return false;
>> @@ -574,7 +724,8 @@ static const SCEV *getStartForNegStride(
>> /// transform this into a memset or memset_pattern in the loop
>> preheader, do so.
>> bool LoopIdiomRecognize::processLoopStridedStore(
>>     Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment,
>> -    Value *StoredVal, Instruction *TheStore, const SCEVAddRecExpr *Ev,
>> +    Value *StoredVal, Instruction *TheStore,
>> +    SmallPtrSetImpl<Instruction *> &Stores, const SCEVAddRecExpr *Ev,
>>     const SCEV *BECount, bool NegStride) {
>>   Value *SplatValue = isBytewiseValue(StoredVal);
>>   Constant *PatternValue = nullptr;
>> @@ -609,7 +760,7 @@ bool LoopIdiomRecognize::processLoopStri
>>   Value *BasePtr =
>>       Expander.expandCodeFor(Start, DestInt8PtrTy,
>> Preheader->getTerminator());
>>   if (mayLoopAccessLocation(BasePtr, MRI_ModRef, CurLoop, BECount,
>> StoreSize,
>> -                            *AA, TheStore)) {
>> +                            *AA, Stores)) {
>>     Expander.clear();
>>     // If we generated new code for the base pointer, clean up.
>>     RecursivelyDeleteTriviallyDeadInstructions(BasePtr, TLI);
>> @@ -662,7 +813,8 @@ bool LoopIdiomRecognize::processLoopStri
>>
>>   // Okay, the memset has been formed.  Zap the original store and
>> anything that
>>   // feeds into it.
>> -  deleteDeadInstruction(TheStore, TLI);
>> +  for (auto *I : Stores)
>> +    deleteDeadInstruction(I, TLI);
>>   ++NumMemSet;
>>   return true;
>> }
>> @@ -714,8 +866,10 @@ bool LoopIdiomRecognize::processLoopStor
>>   Value *StoreBasePtr = Expander.expandCodeFor(
>>       StrStart, Builder.getInt8PtrTy(StrAS),
>> Preheader->getTerminator());
>>
>> +  SmallPtrSet<Instruction *, 1> Stores;
>> +  Stores.insert(SI);
>>   if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount,
>> -                            StoreSize, *AA, SI)) {
>> +                            StoreSize, *AA, Stores)) {
>>     Expander.clear();
>>     // If we generated new code for the base pointer, clean up.
>>     RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
>> @@ -735,7 +889,7 @@ bool LoopIdiomRecognize::processLoopStor
>>       LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator());
>>
>>   if (mayLoopAccessLocation(LoadBasePtr, MRI_Mod, CurLoop, BECount,
>> StoreSize,
>> -                            *AA, SI)) {
>> +                            *AA, Stores)) {
>>     Expander.clear();
>>     // If we generated new code for the base pointer, clean up.
>>     RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI);
>>
>> Modified: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp?rev=258620&r1=258619&r2=258620&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp (original)
>> +++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp Sat Jan 23
>> 00:52:41 2016
>> @@ -26,6 +26,7 @@
>> #include "llvm/Analysis/AssumptionCache.h"
>> #include "llvm/Analysis/CodeMetrics.h"
>> #include "llvm/Analysis/LoopInfo.h"
>> +#include "llvm/Analysis/LoopAccessAnalysis.h"
>> #include "llvm/Analysis/ScalarEvolution.h"
>> #include "llvm/Analysis/ScalarEvolutionExpressions.h"
>> #include "llvm/Analysis/TargetTransformInfo.h"
>> @@ -401,9 +402,6 @@ public:
>>     }
>>   }
>>
>> -  /// \returns true if the memory operations A and B are consecutive.
>> -  bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL);
>> -
>>   /// \brief Perform LICM and CSE on the newly generated gather
>> sequences.
>>   void optimizeGatherSequence();
>>
>> @@ -438,14 +436,6 @@ private:
>>   /// vectorized, or NULL. They may happen in cycles.
>>   Value *alreadyVectorized(ArrayRef<Value *> VL) const;
>>
>> -  /// \brief Take the pointer operand from the Load/Store instruction.
>> -  /// \returns NULL if this is not a valid Load/Store instruction.
>> -  static Value *getPointerOperand(Value *I);
>> -
>> -  /// \brief Take the address space operand from the Load/Store
>> instruction.
>> -  /// \returns -1 if this is not a valid Load/Store instruction.
>> -  static unsigned getAddressSpaceOperand(Value *I);
>> -
>>   /// \returns the scalarization cost for this type. Scalarization in
>> this
>>   /// context means the creation of vectors from a group of scalars.
>>   int getGatherCost(Type *Ty);
>> @@ -1191,8 +1181,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
>>           return;
>>         }
>>
>> -        if (!isConsecutiveAccess(VL[i], VL[i + 1], DL)) {
>> -          if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0], DL))
>> {
>> +        if (!isConsecutiveAccess(VL[i], VL[i + 1], DL, *SE)) {
>> +          if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0], DL,
>> *SE)) {
>>             ++NumLoadsWantToChangeOrder;
>>           }
>>           BS.cancelScheduling(VL);
>> @@ -1364,7 +1354,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Val
>>       const DataLayout &DL = F->getParent()->getDataLayout();
>>       // Check if the stores are consecutive or of we need to swizzle
>> them.
>>       for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
>> -        if (!isConsecutiveAccess(VL[i], VL[i + 1], DL)) {
>> +        if (!isConsecutiveAccess(VL[i], VL[i + 1], DL, *SE)) {
>>           BS.cancelScheduling(VL);
>>           newTreeEntry(VL, false);
>>           DEBUG(dbgs() << "SLP: Non-consecutive store.\n");
>> @@ -1837,63 +1827,6 @@ int BoUpSLP::getGatherCost(ArrayRef<Valu
>>   return getGatherCost(VecTy);
>> }
>>
>> -Value *BoUpSLP::getPointerOperand(Value *I) {
>> -  if (LoadInst *LI = dyn_cast<LoadInst>(I))
>> -    return LI->getPointerOperand();
>> -  if (StoreInst *SI = dyn_cast<StoreInst>(I))
>> -    return SI->getPointerOperand();
>> -  return nullptr;
>> -}
>> -
>> -unsigned BoUpSLP::getAddressSpaceOperand(Value *I) {
>> -  if (LoadInst *L = dyn_cast<LoadInst>(I))
>> -    return L->getPointerAddressSpace();
>> -  if (StoreInst *S = dyn_cast<StoreInst>(I))
>> -    return S->getPointerAddressSpace();
>> -  return -1;
>> -}
>> -
>> -bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B, const DataLayout
>> &DL) {
>> -  Value *PtrA = getPointerOperand(A);
>> -  Value *PtrB = getPointerOperand(B);
>> -  unsigned ASA = getAddressSpaceOperand(A);
>> -  unsigned ASB = getAddressSpaceOperand(B);
>> -
>> -  // Check that the address spaces match and that the pointers are
>> valid.
>> -  if (!PtrA || !PtrB || (ASA != ASB))
>> -    return false;
>> -
>> -  // Make sure that A and B are different pointers of the same type.
>> -  if (PtrA == PtrB || PtrA->getType() != PtrB->getType())
>> -    return false;
>> -
>> -  unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA);
>> -  Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
>> -  APInt Size(PtrBitWidth, DL.getTypeStoreSize(Ty));
>> -
>> -  APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0);
>> -  PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
>> -  PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
>> -
>> -  APInt OffsetDelta = OffsetB - OffsetA;
>> -
>> -  // Check if they are based on the same pointer. That makes the
>> offsets
>> -  // sufficient.
>> -  if (PtrA == PtrB)
>> -    return OffsetDelta == Size;
>> -
>> -  // Compute the necessary base pointer delta to have the necessary
>> final delta
>> -  // equal to the size.
>> -  APInt BaseDelta = Size - OffsetDelta;
>> -
>> -  // Otherwise compute the distance with SCEV between the base
>> pointers.
>> -  const SCEV *PtrSCEVA = SE->getSCEV(PtrA);
>> -  const SCEV *PtrSCEVB = SE->getSCEV(PtrB);
>> -  const SCEV *C = SE->getConstant(BaseDelta);
>> -  const SCEV *X = SE->getAddExpr(PtrSCEVA, C);
>> -  return X == PtrSCEVB;
>> -}
>> -
>> // Reorder commutative operations in alternate shuffle if the resulting
>> vectors
>> // are consecutive loads. This would allow us to vectorize the tree.
>> // If we have something like-
>> @@ -1921,10 +1854,10 @@ void BoUpSLP::reorderAltShuffleOperands(
>>       if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
>>         Instruction *VL1 = cast<Instruction>(VL[j]);
>>         Instruction *VL2 = cast<Instruction>(VL[j + 1]);
>> -        if (isConsecutiveAccess(L, L1, DL) && VL1->isCommutative()) {
>> +        if (isConsecutiveAccess(L, L1, DL, *SE) &&
>> VL1->isCommutative()) {
>>           std::swap(Left[j], Right[j]);
>>           continue;
>> -        } else if (isConsecutiveAccess(L, L1, DL) &&
>> VL2->isCommutative()) {
>> +        } else if (isConsecutiveAccess(L, L1, DL, *SE) &&
>> VL2->isCommutative()) {
>>           std::swap(Left[j + 1], Right[j + 1]);
>>           continue;
>>         }
>> @@ -1935,10 +1868,10 @@ void BoUpSLP::reorderAltShuffleOperands(
>>       if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
>>         Instruction *VL1 = cast<Instruction>(VL[j]);
>>         Instruction *VL2 = cast<Instruction>(VL[j + 1]);
>> -        if (isConsecutiveAccess(L, L1, DL) && VL1->isCommutative()) {
>> +        if (isConsecutiveAccess(L, L1, DL, *SE) &&
>> VL1->isCommutative()) {
>>           std::swap(Left[j], Right[j]);
>>           continue;
>> -        } else if (isConsecutiveAccess(L, L1, DL) &&
>> VL2->isCommutative()) {
>> +        } else if (isConsecutiveAccess(L, L1, DL, *SE) &&
>> VL2->isCommutative()) {
>>           std::swap(Left[j + 1], Right[j + 1]);
>>           continue;
>>         }
>> @@ -2088,7 +2021,7 @@ void BoUpSLP::reorderInputsAccordingToOp
>>   for (unsigned j = 0; j < VL.size() - 1; ++j) {
>>     if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) {
>>       if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
>> -        if (isConsecutiveAccess(L, L1, DL)) {
>> +        if (isConsecutiveAccess(L, L1, DL, *SE)) {
>>           std::swap(Left[j + 1], Right[j + 1]);
>>           continue;
>>         }
>> @@ -2096,7 +2029,7 @@ void BoUpSLP::reorderInputsAccordingToOp
>>     }
>>     if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) {
>>       if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
>> -        if (isConsecutiveAccess(L, L1, DL)) {
>> +        if (isConsecutiveAccess(L, L1, DL, *SE)) {
>>           std::swap(Left[j + 1], Right[j + 1]);
>>           continue;
>>         }
>> @@ -3461,7 +3394,7 @@ bool SLPVectorizer::vectorizeStores(Arra
>>       IndexQueue.push_back(j - 1);
>>
>>     for (auto &k : IndexQueue) {
>> -      if (R.isConsecutiveAccess(Stores[i], Stores[k], DL)) {
>> +      if (isConsecutiveAccess(Stores[i], Stores[k], DL, *SE)) {
>>         Tails.insert(Stores[k]);
>>         Heads.insert(Stores[i]);
>>         ConsecutiveChain[Stores[i]] = Stores[k];
>>
>> Added: llvm/trunk/test/Transforms/LoopIdiom/struct.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopIdiom/struct.ll?rev=258620&view=auto
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/LoopIdiom/struct.ll (added)
>> +++ llvm/trunk/test/Transforms/LoopIdiom/struct.ll Sat Jan 23 00:52:41
>> 2016
>> @@ -0,0 +1,221 @@
>> +; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
>> +target datalayout =
>> "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
>> +
>> +target triple = "x86_64-apple-darwin10.0.0"
>> +
>> +%struct.foo = type { i32, i32 }
>> +%struct.foo1 = type { i32, i32, i32 }
>> +%struct.foo2 = type { i32, i16, i16 }
>> +
>> +;void bar1(foo_t *f, unsigned n) {
>> +;  for (unsigned i = 0; i < n; ++i) {
>> +;    f[i].a = 0;
>> +;    f[i].b = 0;
>> +;  }
>> +;}
>> +define void @bar1(%struct.foo* %f, i32 %n) nounwind ssp {
>> +entry:
>> +  %cmp1 = icmp eq i32 %n, 0
>> +  br i1 %cmp1, label %for.end, label %for.body.preheader
>> +
>> +for.body.preheader:                               ; preds = %entry
>> +  br label %for.body
>> +
>> +for.body:                                         ; preds =
>> %for.body.preheader, %for.body
>> +  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next,
>> %for.body ]
>> +  %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64
>> %indvars.iv, i32 0
>> +  store i32 0, i32* %a, align 4
>> +  %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64
>> %indvars.iv, i32 1
>> +  store i32 0, i32* %b, align 4
>> +  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
>> +  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
>> +  %exitcond = icmp ne i32 %lftr.wideiv, %n
>> +  br i1 %exitcond, label %for.body, label %for.end.loopexit
>> +
>> +for.end.loopexit:                                 ; preds = %for.body
>> +  br label %for.end
>> +
>> +for.end:                                          ; preds =
>> %for.end.loopexit, %entry
>> +  ret void
>> +; CHECK-LABEL: @bar1(
>> +; CHECK: call void @llvm.memset
>> +; CHECK-NOT: store
>> +}
>> +
>> +;void bar2(foo_t *f, unsigned n) {
>> +;  for (unsigned i = 0; i < n; ++i) {
>> +;    f[i].b = 0;
>> +;    f[i].a = 0;
>> +;  }
>> +;}
>> +define void @bar2(%struct.foo* %f, i32 %n) nounwind ssp {
>> +entry:
>> +  %cmp1 = icmp eq i32 %n, 0
>> +  br i1 %cmp1, label %for.end, label %for.body.preheader
>> +
>> +for.body.preheader:                               ; preds = %entry
>> +  br label %for.body
>> +
>> +for.body:                                         ; preds =
>> %for.body.preheader, %for.body
>> +  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next,
>> %for.body ]
>> +  %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64
>> %indvars.iv, i32 1
>> +  store i32 0, i32* %b, align 4
>> +  %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64
>> %indvars.iv, i32 0
>> +  store i32 0, i32* %a, align 4
>> +  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
>> +  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
>> +  %exitcond = icmp ne i32 %lftr.wideiv, %n
>> +  br i1 %exitcond, label %for.body, label %for.end.loopexit
>> +
>> +for.end.loopexit:                                 ; preds = %for.body
>> +  br label %for.end
>> +
>> +for.end:                                          ; preds =
>> %for.end.loopexit, %entry
>> +  ret void
>> +; CHECK-LABEL: @bar2(
>> +; CHECK: call void @llvm.memset
>> +; CHECK-NOT: store
>> +}
>> +
>> +;void bar3(foo_t *f, unsigned n) {
>> +;  for (unsigned i = n; i > 0; --i) {
>> +;    f[i].a = 0;
>> +;    f[i].b = 0;
>> +;  }
>> +;}
>> +define void @bar3(%struct.foo* nocapture %f, i32 %n) nounwind ssp {
>> +entry:
>> +  %cmp1 = icmp eq i32 %n, 0
>> +  br i1 %cmp1, label %for.end, label %for.body.preheader
>> +
>> +for.body.preheader:                               ; preds = %entry
>> +  %0 = zext i32 %n to i64
>> +  br label %for.body
>> +
>> +for.body:                                         ; preds =
>> %for.body.preheader, %for.body
>> +  %indvars.iv = phi i64 [ %0, %for.body.preheader ], [
>> %indvars.iv.next, %for.body ]
>> +  %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64
>> %indvars.iv, i32 0
>> +  store i32 0, i32* %a, align 4
>> +  %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64
>> %indvars.iv, i32 1
>> +  store i32 0, i32* %b, align 4
>> +  %1 = trunc i64 %indvars.iv to i32
>> +  %dec = add i32 %1, -1
>> +  %cmp = icmp eq i32 %dec, 0
>> +  %indvars.iv.next = add nsw i64 %indvars.iv, -1
>> +  br i1 %cmp, label %for.end.loopexit, label %for.body
>> +
>> +for.end.loopexit:                                 ; preds = %for.body
>> +  br label %for.end
>> +
>> +for.end:                                          ; preds =
>> %for.end.loopexit, %entry
>> +  ret void
>> +; CHECK-LABEL: @bar3(
>> +; CHECK: call void @llvm.memset
>> +; CHECK-NOT: store
>> +}
>> +
>> +;void bar4(foo_t *f, unsigned n) {
>> +;  for (unsigned i = 0; i < n; ++i) {
>> +;    f[i].a = 0;
>> +;    f[i].b = 1;
>> +;  }
>> +;}
>> +define void @bar4(%struct.foo* nocapture %f, i32 %n) nounwind ssp {
>> +entry:
>> +  %cmp1 = icmp eq i32 %n, 0
>> +  br i1 %cmp1, label %for.end, label %for.body.preheader
>> +
>> +for.body.preheader:                               ; preds = %entry
>> +  br label %for.body
>> +
>> +for.body:                                         ; preds =
>> %for.body.preheader, %for.body
>> +  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next,
>> %for.body ]
>> +  %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64
>> %indvars.iv, i32 0
>> +  store i32 0, i32* %a, align 4
>> +  %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64
>> %indvars.iv, i32 1
>> +  store i32 1, i32* %b, align 4
>> +  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
>> +  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
>> +  %exitcond = icmp ne i32 %lftr.wideiv, %n
>> +  br i1 %exitcond, label %for.body, label %for.end.loopexit
>> +
>> +for.end.loopexit:                                 ; preds = %for.body
>> +  br label %for.end
>> +
>> +for.end:                                          ; preds =
>> %for.end.loopexit, %entry
>> +  ret void
>> +; CHECK-LABEL: @bar4(
>> +; CHECK-NOT: call void @llvm.memset
>> +}
>> +
>> +;void bar5(foo1_t *f, unsigned n) {
>> +;  for (unsigned i = 0; i < n; ++i) {
>> +;    f[i].a = 0;
>> +;    f[i].b = 0;
>> +;  }
>> +;}
>> +define void @bar5(%struct.foo1* nocapture %f, i32 %n) nounwind ssp {
>> +entry:
>> +  %cmp1 = icmp eq i32 %n, 0
>> +  br i1 %cmp1, label %for.end, label %for.body.preheader
>> +
>> +for.body.preheader:                               ; preds = %entry
>> +  br label %for.body
>> +
>> +for.body:                                         ; preds =
>> %for.body.preheader, %for.body
>> +  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next,
>> %for.body ]
>> +  %a = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64
>> %indvars.iv, i32 0
>> +  store i32 0, i32* %a, align 4
>> +  %b = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64
>> %indvars.iv, i32 1
>> +  store i32 0, i32* %b, align 4
>> +  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
>> +  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
>> +  %exitcond = icmp ne i32 %lftr.wideiv, %n
>> +  br i1 %exitcond, label %for.body, label %for.end.loopexit
>> +
>> +for.end.loopexit:                                 ; preds = %for.body
>> +  br label %for.end
>> +
>> +for.end:                                          ; preds =
>> %for.end.loopexit, %entry
>> +  ret void
>> +; CHECK-LABEL: @bar5(
>> +; CHECK-NOT: call void @llvm.memset
>> +}
>> +
>> +;void bar6(foo2_t *f, unsigned n) {
>> +;  for (unsigned i = 0; i < n; ++i) {
>> +;    f[i].a = 0;
>> +;    f[i].b = 0;
>> +;    f[i].c = 0;
>> +;  }
>> +;}
>> +define void @bar6(%struct.foo2* nocapture %f, i32 %n) nounwind ssp {
>> +entry:
>> +  %cmp1 = icmp eq i32 %n, 0
>> +  br i1 %cmp1, label %for.end, label %for.body.preheader
>> +
>> +for.body.preheader:                               ; preds = %entry
>> +  br label %for.body
>> +
>> +for.body:                                         ; preds =
>> %for.body.preheader, %for.body
>> +  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next,
>> %for.body ]
>> +  %a = getelementptr inbounds %struct.foo2, %struct.foo2* %f, i64
>> %indvars.iv, i32 0
>> +  store i32 0, i32* %a, align 4
>> +  %b = getelementptr inbounds %struct.foo2, %struct.foo2* %f, i64
>> %indvars.iv, i32 1
>> +  store i16 0, i16* %b, align 4
>> +  %c = getelementptr inbounds %struct.foo2, %struct.foo2* %f, i64
>> %indvars.iv, i32 2
>> +  store i16 0, i16* %c, align 2
>> +  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
>> +  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
>> +  %exitcond = icmp ne i32 %lftr.wideiv, %n
>> +  br i1 %exitcond, label %for.body, label %for.end.loopexit
>> +
>> +for.end.loopexit:                                 ; preds = %for.body
>> +  br label %for.end
>> +
>> +for.end:                                          ; preds =
>> %for.end.loopexit, %entry
>> +  ret void
>> +; CHECK-LABEL: @bar6(
>> +; CHECK: call void @llvm.memset
>> +; CHECK-NOT: store
>> +}
>>
>> Added: llvm/trunk/test/Transforms/LoopIdiom/struct_pattern.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopIdiom/struct_pattern.ll?rev=258620&view=auto
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/LoopIdiom/struct_pattern.ll (added)
>> +++ llvm/trunk/test/Transforms/LoopIdiom/struct_pattern.ll Sat Jan 23
>> 00:52:41 2016
>> @@ -0,0 +1,186 @@
>> +; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
>> +target datalayout =
>> "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
>> +
>> +; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32]
>> [i32 2, i32 2, i32 2, i32 2], align 16
>> +; CHECK: @.memset_pattern.1 = private unnamed_addr constant [4 x i32]
>> [i32 2, i32 2, i32 2, i32 2], align 16
>> +; CHECK: @.memset_pattern.2 = private unnamed_addr constant [4 x i32]
>> [i32 2, i32 2, i32 2, i32 2], align 16
>> +
>> +target triple = "x86_64-apple-darwin10.0.0"
>> +
>> +%struct.foo = type { i32, i32 }
>> +%struct.foo1 = type { i32, i32, i32 }
>> +
>> +;void bar1(foo_t *f, unsigned n) {
>> +;  for (unsigned i = 0; i < n; ++i) {
>> +;    f[i].a = 2;
>> +;    f[i].b = 2;
>> +;  }
>> +;}
>> +define void @bar1(%struct.foo* %f, i32 %n) nounwind ssp {
>> +entry:
>> +  %cmp1 = icmp eq i32 %n, 0
>> +  br i1 %cmp1, label %for.end, label %for.body.preheader
>> +
>> +for.body.preheader:                               ; preds = %entry
>> +  br label %for.body
>> +
>> +for.body:                                         ; preds =
>> %for.body.preheader, %for.body
>> +  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next,
>> %for.body ]
>> +  %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64
>> %indvars.iv, i32 0
>> +  store i32 2, i32* %a, align 4
>> +  %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64
>> %indvars.iv, i32 1
>> +  store i32 2, i32* %b, align 4
>> +  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
>> +  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
>> +  %exitcond = icmp ne i32 %lftr.wideiv, %n
>> +  br i1 %exitcond, label %for.body, label %for.end.loopexit
>> +
>> +for.end.loopexit:                                 ; preds = %for.body
>> +  br label %for.end
>> +
>> +for.end:                                          ; preds =
>> %for.end.loopexit, %entry
>> +  ret void
>> +; CHECK-LABEL: @bar1(
>> +; CHECK: call void @memset_pattern16
>> +; CHECK-NOT: store
>> +}
>> +
>> +;void bar2(foo_t *f, unsigned n) {
>> +;  for (unsigned i = 0; i < n; ++i) {
>> +;    f[i].b = 2;
>> +;    f[i].a = 2;
>> +;  }
>> +;}
>> +define void @bar2(%struct.foo* %f, i32 %n) nounwind ssp {
>> +entry:
>> +  %cmp1 = icmp eq i32 %n, 0
>> +  br i1 %cmp1, label %for.end, label %for.body.preheader
>> +
>> +for.body.preheader:                               ; preds = %entry
>> +  br label %for.body
>> +
>> +for.body:                                         ; preds =
>> %for.body.preheader, %for.body
>> +  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next,
>> %for.body ]
>> +  %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64
>> %indvars.iv, i32 1
>> +  store i32 2, i32* %b, align 4
>> +  %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64
>> %indvars.iv, i32 0
>> +  store i32 2, i32* %a, align 4
>> +  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
>> +  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
>> +  %exitcond = icmp ne i32 %lftr.wideiv, %n
>> +  br i1 %exitcond, label %for.body, label %for.end.loopexit
>> +
>> +for.end.loopexit:                                 ; preds = %for.body
>> +  br label %for.end
>> +
>> +for.end:                                          ; preds =
>> %for.end.loopexit, %entry
>> +  ret void
>> +; CHECK-LABEL: @bar2(
>> +; CHECK: call void @memset_pattern16
>> +; CHECK-NOT: store
>> +}
>> +
>> +;void bar3(foo_t *f, unsigned n) {
>> +;  for (unsigned i = n; i > 0; --i) {
>> +;    f[i].a = 2;
>> +;    f[i].b = 2;
>> +;  }
>> +;}
>> +define void @bar3(%struct.foo* nocapture %f, i32 %n) nounwind ssp {
>> +entry:
>> +  %cmp1 = icmp eq i32 %n, 0
>> +  br i1 %cmp1, label %for.end, label %for.body.preheader
>> +
>> +for.body.preheader:                               ; preds = %entry
>> +  %0 = zext i32 %n to i64
>> +  br label %for.body
>> +
>> +for.body:                                         ; preds =
>> %for.body.preheader, %for.body
>> +  %indvars.iv = phi i64 [ %0, %for.body.preheader ], [
>> %indvars.iv.next, %for.body ]
>> +  %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64
>> %indvars.iv, i32 0
>> +  store i32 2, i32* %a, align 4
>> +  %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64
>> %indvars.iv, i32 1
>> +  store i32 2, i32* %b, align 4
>> +  %1 = trunc i64 %indvars.iv to i32
>> +  %dec = add i32 %1, -1
>> +  %cmp = icmp eq i32 %dec, 0
>> +  %indvars.iv.next = add nsw i64 %indvars.iv, -1
>> +  br i1 %cmp, label %for.end.loopexit, label %for.body
>> +
>> +for.end.loopexit:                                 ; preds = %for.body
>> +  br label %for.end
>> +
>> +for.end:                                          ; preds =
>> %for.end.loopexit, %entry
>> +  ret void
>> +; CHECK-LABEL: @bar3(
>> +; CHECK: call void @memset_pattern16
>> +; CHECK-NOT: store
>> +}
>> +
>> +;void bar4(foo_t *f, unsigned n) {
>> +;  for (unsigned i = 0; i < n; ++i) {
>> +;    f[i].a = 0;
>> +;    f[i].b = 1;
>> +;  }
>> +;}
>> +define void @bar4(%struct.foo* nocapture %f, i32 %n) nounwind ssp {
>> +entry:
>> +  %cmp1 = icmp eq i32 %n, 0
>> +  br i1 %cmp1, label %for.end, label %for.body.preheader
>> +
>> +for.body.preheader:                               ; preds = %entry
>> +  br label %for.body
>> +
>> +for.body:                                         ; preds =
>> %for.body.preheader, %for.body
>> +  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next,
>> %for.body ]
>> +  %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64
>> %indvars.iv, i32 0
>> +  store i32 0, i32* %a, align 4
>> +  %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64
>> %indvars.iv, i32 1
>> +  store i32 1, i32* %b, align 4
>> +  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
>> +  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
>> +  %exitcond = icmp ne i32 %lftr.wideiv, %n
>> +  br i1 %exitcond, label %for.body, label %for.end.loopexit
>> +
>> +for.end.loopexit:                                 ; preds = %for.body
>> +  br label %for.end
>> +
>> +for.end:                                          ; preds =
>> %for.end.loopexit, %entry
>> +  ret void
>> +; CHECK-LABEL: @bar4(
>> +; CHECK-NOT: call void @memset_pattern16
>> +}
>> +
>> +;void bar5(foo1_t *f, unsigned n) {
>> +;  for (unsigned i = 0; i < n; ++i) {
>> +;    f[i].a = 1;
>> +;    f[i].b = 1;
>> +;  }
>> +;}
>> +define void @bar5(%struct.foo1* nocapture %f, i32 %n) nounwind ssp {
>> +entry:
>> +  %cmp1 = icmp eq i32 %n, 0
>> +  br i1 %cmp1, label %for.end, label %for.body.preheader
>> +
>> +for.body.preheader:                               ; preds = %entry
>> +  br label %for.body
>> +
>> +for.body:                                         ; preds =
>> %for.body.preheader, %for.body
>> +  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next,
>> %for.body ]
>> +  %a = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64
>> %indvars.iv, i32 0
>> +  store i32 1, i32* %a, align 4
>> +  %b = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64
>> %indvars.iv, i32 1
>> +  store i32 1, i32* %b, align 4
>> +  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
>> +  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
>> +  %exitcond = icmp ne i32 %lftr.wideiv, %n
>> +  br i1 %exitcond, label %for.body, label %for.end.loopexit
>> +
>> +for.end.loopexit:                                 ; preds = %for.body
>> +  br label %for.end
>> +
>> +for.end:                                          ; preds =
>> %for.end.loopexit, %entry
>> +  ret void
>> +; CHECK-LABEL: @bar5(
>> +; CHECK-NOT: call void @memset_pattern16
>> +}
>>
>> Added: llvm/trunk/test/Transforms/LoopIdiom/unroll.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopIdiom/unroll.ll?rev=258620&view=auto
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/LoopIdiom/unroll.ll (added)
>> +++ llvm/trunk/test/Transforms/LoopIdiom/unroll.ll Sat Jan 23 00:52:41
>> 2016
>> @@ -0,0 +1,80 @@
>> +; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
>> +target datalayout =
>> "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
>> +
>> +; CHECK @.memset_pattern = private unnamed_addr constant [4 x i32] [i32
>> 2, i32 2, i32 2, i32 2], align 16
>> +
>> +target triple = "x86_64-apple-darwin10.0.0"
>> +
>> +;void test(int *f, unsigned n) {
>> +;  for (unsigned i = 0; i < 2 * n; i += 2) {
>> +;    f[i] = 0;
>> +;    f[i+1] = 0;
>> +;  }
>> +;}
>> +define void @test(i32* %f, i32 %n) nounwind ssp {
>> +entry:
>> +  %mul = shl i32 %n, 1
>> +  %cmp1 = icmp eq i32 %mul, 0
>> +  br i1 %cmp1, label %for.end, label %for.body.preheader
>> +
>> +for.body.preheader:                               ; preds = %entry
>> +  %0 = zext i32 %mul to i64
>> +  br label %for.body
>> +
>> +for.body:                                         ; preds =
>> %for.body.preheader, %for.body
>> +  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next,
>> %for.body ]
>> +  %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv
>> +  store i32 0, i32* %arrayidx, align 4
>> +  %1 = or i64 %indvars.iv, 1
>> +  %arrayidx2 = getelementptr inbounds i32, i32* %f, i64 %1
>> +  store i32 0, i32* %arrayidx2, align 4
>> +  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
>> +  %cmp = icmp ult i64 %indvars.iv.next, %0
>> +  br i1 %cmp, label %for.body, label %for.end.loopexit
>> +
>> +for.end.loopexit:                                 ; preds = %for.body
>> +  br label %for.end
>> +
>> +for.end:                                          ; preds =
>> %for.end.loopexit, %entry
>> +  ret void
>> +; CHECK-LABEL: @test(
>> +; CHECK: call void @llvm.memset
>> +; CHECK-NOT: store
>> +}
>> +
>> +;void test_pattern(int *f, unsigned n) {
>> +;  for (unsigned i = 0; i < 2 * n; i += 2) {
>> +;    f[i] = 2;
>> +;    f[i+1] = 2;
>> +;  }
>> +;}
>> +define void @test_pattern(i32* %f, i32 %n) nounwind ssp {
>> +entry:
>> +  %mul = shl i32 %n, 1
>> +  %cmp1 = icmp eq i32 %mul, 0
>> +  br i1 %cmp1, label %for.end, label %for.body.preheader
>> +
>> +for.body.preheader:                               ; preds = %entry
>> +  %0 = zext i32 %mul to i64
>> +  br label %for.body
>> +
>> +for.body:                                         ; preds =
>> %for.body.preheader, %for.body
>> +  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next,
>> %for.body ]
>> +  %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv
>> +  store i32 2, i32* %arrayidx, align 4
>> +  %1 = or i64 %indvars.iv, 1
>> +  %arrayidx2 = getelementptr inbounds i32, i32* %f, i64 %1
>> +  store i32 2, i32* %arrayidx2, align 4
>> +  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
>> +  %cmp = icmp ult i64 %indvars.iv.next, %0
>> +  br i1 %cmp, label %for.body, label %for.end.loopexit
>> +
>> +for.end.loopexit:                                 ; preds = %for.body
>> +  br label %for.end
>> +
>> +for.end:                                          ; preds =
>> %for.end.loopexit, %entry
>> +  ret void
>> +; CHECK-LABEL: @test_pattern(
>> +; CHECK: call void @memset_pattern16
>> +; CHECK-NOT: store
>> +}
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
>