[llvm] 99203f2 - [Analysis]Add getPointersDiff function to improve compile time.

Richard Smith via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 25 16:29:09 PDT 2021


On Thu, 25 Mar 2021 at 16:24, Richard Smith <richard at metafoo.co.uk> wrote:

> Hi Alexey, this patch has introduced a miscompile for us.
>
> On Tue, 23 Mar 2021 at 14:26, Alexey Bataev via llvm-commits <
> llvm-commits at lists.llvm.org> wrote:
>
>>
>> Author: Alexey Bataev
>> Date: 2021-03-23T14:25:36-07:00
>> New Revision: 99203f2004d031f2ef22f01e3c569d2775de1836
>>
>> URL:
>> https://github.com/llvm/llvm-project/commit/99203f2004d031f2ef22f01e3c569d2775de1836
>> DIFF:
>> https://github.com/llvm/llvm-project/commit/99203f2004d031f2ef22f01e3c569d2775de1836.diff
>>
>> LOG: [Analysis]Add getPointersDiff function to improve compile time.
>>
>> Added getPointersDiff function to LoopAccessAnalysis and used it instead
>> direct calculatoin of the distance between pointers and/or
>> isConsecutiveAccess function in SLP vectorizer to improve compile time
>> and detection of stores consecutive chains.
>>
>> Part of D57059
>>
>> Differential Revision: https://reviews.llvm.org/D98967
>>
>> Added:
>>
>>
>> Modified:
>>     llvm/include/llvm/Analysis/LoopAccessAnalysis.h
>>     llvm/lib/Analysis/LoopAccessAnalysis.cpp
>>     llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
>>     llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll
>>
>> Removed:
>>
>>
>>
>>
>> ################################################################################
>> diff  --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
>> b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
>> index 13fbe884eddf..39acfd5bbbee 100644
>> --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
>> +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
>> @@ -679,6 +679,15 @@ int64_t getPtrStride(PredicatedScalarEvolution &PSE,
>> Value *Ptr, const Loop *Lp,
>>                       const ValueToValueMap &StridesMap =
>> ValueToValueMap(),
>>                       bool Assume = false, bool ShouldCheckWrap = true);
>>
>> +/// Returns the distance between the pointers \p PtrA and \p PtrB iff
>> they are
>> +/// compatible and it is possible to calculate the distance between
>> them. This
>> +/// is a simple API that does not depend on the analysis pass.
>> +/// \param StrictCheck Ensure that the calculated distance matches the
>> +/// type-based one after all the bitcasts removal in the provided
>> pointers.
>> +Optional<int> getPointersDiff(Value *PtrA, Value *PtrB, const DataLayout
>> &DL,
>> +                              ScalarEvolution &SE, bool StrictCheck =
>> false,
>> +                              bool CheckType = true);
>> +
>>  /// Attempt to sort the pointers in \p VL and return the sorted indices
>>  /// in \p SortedIndices, if reordering is required.
>>  ///
>>
>> diff  --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
>> b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
>> index e632fe25c24c..997d4474a448 100644
>> --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
>> +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
>> @@ -1124,139 +1124,123 @@ int64_t
>> llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
>>    return Stride;
>>  }
>>
>> +Optional<int> llvm::getPointersDiff(Value *PtrA, Value *PtrB,
>> +                                    const DataLayout &DL,
>> ScalarEvolution &SE,
>> +                                    bool StrictCheck, bool CheckType) {
>> +  assert(PtrA && PtrB && "Expected non-nullptr pointers.");
>> +  // Make sure that A and B are
>> diff erent pointers.
>> +  if (PtrA == PtrB)
>> +    return 0;
>> +
>> +  // Make sure that PtrA and PtrB have the same type if required
>> +  if (CheckType && PtrA->getType() != PtrB->getType())
>> +    return None;
>> +
>> +  unsigned ASA = PtrA->getType()->getPointerAddressSpace();
>> +  unsigned ASB = PtrB->getType()->getPointerAddressSpace();
>> +
>> +  // Check that the address spaces match.
>> +  if (ASA != ASB)
>> +    return None;
>> +  unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
>> +
>> +  APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
>> +  Value *PtrA1 = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL,
>> OffsetA);
>> +  Value *PtrB1 = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL,
>> OffsetB);
>> +
>> +  int Val;
>> +  if (PtrA1 == PtrB1) {
>> +    // Retrieve the address space again as pointer stripping now tracks
>> through
>> +    // `addrspacecast`.
>> +    ASA = cast<PointerType>(PtrA1->getType())->getAddressSpace();
>> +    ASB = cast<PointerType>(PtrB1->getType())->getAddressSpace();
>> +    // Check that the address spaces match and that the pointers are
>> valid.
>> +    if (ASA != ASB)
>> +      return None;
>> +
>> +    IdxWidth = DL.getIndexSizeInBits(ASA);
>> +    OffsetA = OffsetA.sextOrTrunc(IdxWidth);
>> +    OffsetB = OffsetB.sextOrTrunc(IdxWidth);
>> +
>> +    OffsetB -= OffsetA;
>> +    Val = OffsetB.getSExtValue();
>> +  } else {
>> +    // Otherwise compute the distance with SCEV between the base
>> pointers.
>> +    const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
>> +    const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
>> +    const auto *Diff =
>> +        dyn_cast<SCEVConstant>(SE.getMinusSCEV(PtrSCEVB, PtrSCEVA));
>> +    if (!Diff)
>> +      return None;
>> +    Val = Diff->getAPInt().getSExtValue();
>> +  }
>> +  Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
>> +  int Size = DL.getTypeStoreSize(Ty);
>> +  int Dist = Val / Size;
>> +
>> +  // Ensure that the calculated distance matches the type-based one
>> after all
>> +  // the bitcasts removal in the provided pointers.
>> +  if (!StrictCheck || Dist * Size == Val)
>> +    return Dist;
>>
>
> Specifically, we get here, with Val == 12 and Size == 8, resulting in an
> incorrect computation of Dist, for the following testcase:
>
> ; Function Attrs: nofree norecurse nounwind uwtable
> define void @Int32R2.5(i8* nocapture readnone %retval, i8* noalias
> nocapture readnone %run_options, i8** noalias nocapture readnone %params,
> i8** noalias nocapture readonly %buffer_table, i64* noalias nocapture
> readnone %prof_counters) local_unnamed_addr #0 {
> entry:
>   %0 = getelementptr inbounds i8*, i8** %buffer_table, i64 2
>   %1 = bitcast i8** %0 to i32**
>   %2 = load i32*, i32** %1, align 8, !invariant.load !0, !dereferenceable
> !1, !align !1
>   %3 = getelementptr inbounds i8*, i8** %buffer_table, i64 3
>   %4 = bitcast i8** %3 to i32**
>   %5 = load i32*, i32** %4, align 8, !invariant.load !0, !dereferenceable
> !1, !align !1
>   %6 = getelementptr inbounds i8*, i8** %buffer_table, i64 1
>   %7 = bitcast i8** %6 to [2 x [2 x i32]]**
>   %8 = load [2 x [2 x i32]]*, [2 x [2 x i32]]** %7, align 8,
> !invariant.load !0, !dereferenceable !2, !align !2
>   %9 = load i32, i32* %2, align 4, !invariant.load !0, !noalias !3
>   %10 = icmp sgt i32 %9, 0
>   %11 = load i32, i32* %5, align 4, !invariant.load !0, !noalias !3
>   %12 = icmp sgt i32 %11, 0
>   %13 = select i1 %10, i64 12, i64 0
>   %14 = select i1 %12, i64 4, i64 0
>   %15 = add nuw nsw i64 %13, %14
>   %scevgep6 = getelementptr [36 x i8], [36 x i8]* @0, i64 0, i64 %15
>   %16 = bitcast i8* %scevgep6 to i64*
>   %17 = bitcast [2 x [2 x i32]]* %8 to i64*
>   %18 = load i64, i64* %16, align 4
>   store i64 %18, i64* %17, align 16
>   %scevgep.1 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]]* %8, i64 0,
> i64 1, i64 0
>   %19 = add nuw nsw i64 %15, 12
>   %scevgep6.1 = getelementptr [36 x i8], [36 x i8]* @0, i64 0, i64 %19
>   %20 = bitcast i8* %scevgep6.1 to i64*
>   %21 = bitcast i32* %scevgep.1 to i64*
>   %22 = load i64, i64* %20, align 4
>   store i64 %22, i64* %21, align 8
>   ret void
> }
>
> ... when computing the difference between %16 and %20 in the call from
> LoopAccessAnalysis.cpp:1207.
>
> Perhaps StrictCheck should always be enabled, given that this function
> truncates its result and produces a wrong answer if not?
>
>
>> +  return None;
>> +}
>> +
>>  bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL,
>>                             ScalarEvolution &SE,
>>                             SmallVectorImpl<unsigned> &SortedIndices) {
>>    assert(llvm::all_of(
>>               VL, [](const Value *V) { return
>> V->getType()->isPointerTy(); }) &&
>>           "Expected list of pointer operands.");
>> -  SmallVector<std::pair<int64_t, Value *>, 4> OffValPairs;
>> -  OffValPairs.reserve(VL.size());
>> -
>>    // Walk over the pointers, and map each of them to an offset relative
>> to
>>    // first pointer in the array.
>>    Value *Ptr0 = VL[0];
>> -  const SCEV *Scev0 = SE.getSCEV(Ptr0);
>> -  Value *Obj0 = getUnderlyingObject(Ptr0);
>> -
>> -  llvm::SmallSet<int64_t, 4> Offsets;
>> -  for (auto *Ptr : VL) {
>> -    // TODO: Outline this code as a special, more time consuming,
>> version of
>> -    // computeConstantDifference() function.
>> -    if (Ptr->getType()->getPointerAddressSpace() !=
>> -        Ptr0->getType()->getPointerAddressSpace())
>> -      return false;
>> -    // If a pointer refers to a
>> diff erent underlying object, bail - the
>> -    // pointers are by definition incomparable.
>> -    Value *CurrObj = getUnderlyingObject(Ptr);
>> -    if (CurrObj != Obj0)
>> -      return false;
>>
>> -    const SCEV *Scev = SE.getSCEV(Ptr);
>> -    const auto *Diff = dyn_cast<SCEVConstant>(SE.getMinusSCEV(Scev,
>> Scev0));
>> -    // The pointers may not have a constant offset from each other, or
>> SCEV
>> -    // may just not be smart enough to figure out they do. Regardless,
>> -    // there's nothing we can do.
>> +  using DistOrdPair = std::pair<int64_t, int>;
>> +  auto Compare = [](const DistOrdPair &L, const DistOrdPair &R) {
>> +    return L.first < R.first;
>> +  };
>> +  std::set<DistOrdPair, decltype(Compare)> Offsets(Compare);
>> +  Offsets.emplace(0, 0);
>> +  int Cnt = 1;
>> +  bool IsConsecutive = true;
>> +  for (auto *Ptr : VL.drop_front()) {
>> +    Optional<int> Diff = getPointersDiff(Ptr0, Ptr, DL, SE);
>>
>
Passing StrictCheck = true here fixes the miscompile.


>      if (!Diff)
>>        return false;
>>
>>      // Check if the pointer with the same offset is found.
>> -    int64_t Offset = Diff->getAPInt().getSExtValue();
>> -    if (!Offsets.insert(Offset).second)
>> +    int64_t Offset = *Diff;
>> +    auto Res = Offsets.emplace(Offset, Cnt);
>> +    if (!Res.second)
>>        return false;
>> -    OffValPairs.emplace_back(Offset, Ptr);
>> +    // Consecutive order if the inserted element is the last one.
>> +    IsConsecutive = IsConsecutive && std::next(Res.first) ==
>> Offsets.end();
>> +    ++Cnt;
>>    }
>>    SortedIndices.clear();
>> -  SortedIndices.resize(VL.size());
>> -  std::iota(SortedIndices.begin(), SortedIndices.end(), 0);
>> -
>> -  // Sort the memory accesses and keep the order of their uses in
>> UseOrder.
>> -  llvm::stable_sort(SortedIndices, [&](unsigned Left, unsigned Right) {
>> -    return OffValPairs[Left].first < OffValPairs[Right].first;
>> -  });
>> -
>> -  // Check if the order is consecutive already.
>> -  if (llvm::all_of(SortedIndices, [&SortedIndices](const unsigned I) {
>> -        return I == SortedIndices[I];
>> -      }))
>> -    SortedIndices.clear();
>> -
>> +  if (!IsConsecutive) {
>> +    // Fill SortedIndices array only if it is non-consecutive.
>> +    SortedIndices.resize(VL.size());
>> +    Cnt = 0;
>> +    for (const std::pair<int64_t, int> &Pair : Offsets) {
>> +      IsConsecutive = IsConsecutive && Cnt == Pair.second;
>> +      SortedIndices[Cnt] = Pair.second;
>> +      ++Cnt;
>> +    }
>> +  }
>>    return true;
>>  }
>>
>> -/// Take the address space operand from the Load/Store instruction.
>> -/// Returns -1 if this is not a valid Load/Store instruction.
>> -static unsigned getAddressSpaceOperand(Value *I) {
>> -  if (LoadInst *L = dyn_cast<LoadInst>(I))
>> -    return L->getPointerAddressSpace();
>> -  if (StoreInst *S = dyn_cast<StoreInst>(I))
>> -    return S->getPointerAddressSpace();
>> -  return -1;
>> -}
>> -
>>  /// Returns true if the memory operations \p A and \p B are consecutive.
>>  bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
>>                                 ScalarEvolution &SE, bool CheckType) {
>>    Value *PtrA = getLoadStorePointerOperand(A);
>>    Value *PtrB = getLoadStorePointerOperand(B);
>> -  unsigned ASA = getAddressSpaceOperand(A);
>> -  unsigned ASB = getAddressSpaceOperand(B);
>> -
>> -  // Check that the address spaces match and that the pointers are valid.
>> -  if (!PtrA || !PtrB || (ASA != ASB))
>> -    return false;
>> -
>> -  // Make sure that A and B are
>> diff erent pointers.
>> -  if (PtrA == PtrB)
>> -    return false;
>> -
>> -  // Make sure that A and B have the same type if required.
>> -  if (CheckType && PtrA->getType() != PtrB->getType())
>> +  if (!PtrA || !PtrB)
>>      return false;
>> -
>> -  unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
>> -  Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
>> -
>> -  APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
>> -  PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
>> -  PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
>> -
>> -  // Retrieve the address space again as pointer stripping now tracks
>> through
>> -  // `addrspacecast`.
>> -  ASA = cast<PointerType>(PtrA->getType())->getAddressSpace();
>> -  ASB = cast<PointerType>(PtrB->getType())->getAddressSpace();
>> -  // Check that the address spaces match and that the pointers are valid.
>> -  if (ASA != ASB)
>> -    return false;
>> -
>> -  IdxWidth = DL.getIndexSizeInBits(ASA);
>> -  OffsetA = OffsetA.sextOrTrunc(IdxWidth);
>> -  OffsetB = OffsetB.sextOrTrunc(IdxWidth);
>> -
>> -  APInt Size(IdxWidth, DL.getTypeStoreSize(Ty));
>> -
>> -  //  OffsetDelta = OffsetB - OffsetA;
>> -  const SCEV *OffsetSCEVA = SE.getConstant(OffsetA);
>> -  const SCEV *OffsetSCEVB = SE.getConstant(OffsetB);
>> -  const SCEV *OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB,
>> OffsetSCEVA);
>> -  const APInt &OffsetDelta =
>> cast<SCEVConstant>(OffsetDeltaSCEV)->getAPInt();
>> -
>> -  // Check if they are based on the same pointer. That makes the offsets
>> -  // sufficient.
>> -  if (PtrA == PtrB)
>> -    return OffsetDelta == Size;
>> -
>> -  // Compute the necessary base pointer delta to have the necessary
>> final delta
>> -  // equal to the size.
>> -  // BaseDelta = Size - OffsetDelta;
>> -  const SCEV *SizeSCEV = SE.getConstant(Size);
>> -  const SCEV *BaseDelta = SE.getMinusSCEV(SizeSCEV, OffsetDeltaSCEV);
>> -
>> -  // Otherwise compute the distance with SCEV between the base pointers.
>> -  const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
>> -  const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
>> -  const SCEV *X = SE.getAddExpr(PtrSCEVA, BaseDelta);
>> -  return X == PtrSCEVB;
>> +  Optional<int> Diff =
>> +      getPointersDiff(PtrA, PtrB, DL, SE, /*StrictCheck=*/true,
>> CheckType);
>> +  return Diff && *Diff == 1;
>>  }
>>
>>  MemoryDepChecker::VectorizationSafetyStatus
>>
>> diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
>> b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
>> index 385b6f30dc0f..78d2ea0032db 100644
>> --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
>> +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
>> @@ -941,10 +941,16 @@ class BoUpSLP {
>>                                 ScalarEvolution &SE) {
>>        auto *LI1 = dyn_cast<LoadInst>(V1);
>>        auto *LI2 = dyn_cast<LoadInst>(V2);
>> -      if (LI1 && LI2)
>> -        return isConsecutiveAccess(LI1, LI2, DL, SE)
>> -                   ? VLOperands::ScoreConsecutiveLoads
>> -                   : VLOperands::ScoreFail;
>> +      if (LI1 && LI2) {
>> +        if (LI1->getParent() != LI2->getParent())
>> +          return VLOperands::ScoreFail;
>> +
>> +        Optional<int> Dist =
>> +            getPointersDiff(LI1->getPointerOperand(),
>> LI2->getPointerOperand(),
>> +                            DL, SE, /*StrictCheck=*/true);
>> +        return (Dist && *Dist == 1) ? VLOperands::ScoreConsecutiveLoads
>> +                                    : VLOperands::ScoreFail;
>> +      }
>>
>>        auto *C1 = dyn_cast<Constant>(V1);
>>        auto *C2 = dyn_cast<Constant>(V2);
>> @@ -2871,13 +2877,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL,
>> unsigned Depth,
>>            Ptr0 = PointerOps[CurrentOrder.front()];
>>            PtrN = PointerOps[CurrentOrder.back()];
>>          }
>> -        const SCEV *Scev0 = SE->getSCEV(Ptr0);
>> -        const SCEV *ScevN = SE->getSCEV(PtrN);
>> -        const auto *Diff =
>> -            dyn_cast<SCEVConstant>(SE->getMinusSCEV(ScevN, Scev0));
>> -        uint64_t Size = DL->getTypeAllocSize(ScalarTy);
>> +        Optional<int> Diff = getPointersDiff(Ptr0, PtrN, *DL, *SE);
>>          // Check that the sorted loads are consecutive.
>> -        if (Diff && Diff->getAPInt() == (VL.size() - 1) * Size) {
>> +        if (static_cast<unsigned>(*Diff) == VL.size() - 1) {
>>            if (CurrentOrder.empty()) {
>>              // Original loads are consecutive and does not require
>> reordering.
>>              ++NumOpsWantToKeepOriginalOrder;
>> @@ -3150,13 +3152,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL,
>> unsigned Depth,
>>            Ptr0 = PointerOps[CurrentOrder.front()];
>>            PtrN = PointerOps[CurrentOrder.back()];
>>          }
>> -        const SCEV *Scev0 = SE->getSCEV(Ptr0);
>> -        const SCEV *ScevN = SE->getSCEV(PtrN);
>> -        const auto *Diff =
>> -            dyn_cast<SCEVConstant>(SE->getMinusSCEV(ScevN, Scev0));
>> -        uint64_t Size = DL->getTypeAllocSize(ScalarTy);
>> +        Optional<int> Dist = getPointersDiff(Ptr0, PtrN, *DL, *SE);
>>          // Check that the sorted pointer operands are consecutive.
>> -        if (Diff && Diff->getAPInt() == (VL.size() - 1) * Size) {
>> +        if (static_cast<unsigned>(*Dist) == VL.size() - 1) {
>>            if (CurrentOrder.empty()) {
>>              // Original stores are consecutive and does not require
>> reordering.
>>              ++NumOpsWantToKeepOriginalOrder;
>> @@ -6107,20 +6105,41 @@ bool
>> SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
>>
>>    int E = Stores.size();
>>    SmallBitVector Tails(E, false);
>> -  SmallVector<int, 16> ConsecutiveChain(E, E + 1);
>>    int MaxIter = MaxStoreLookup.getValue();
>> +  SmallVector<std::pair<int, int>, 16> ConsecutiveChain(
>> +      E, std::make_pair(E, INT_MAX));
>> +  SmallVector<SmallBitVector, 4> CheckedPairs(E, SmallBitVector(E,
>> false));
>>    int IterCnt;
>>    auto &&FindConsecutiveAccess = [this, &Stores, &Tails, &IterCnt,
>> MaxIter,
>> +                                  &CheckedPairs,
>>                                    &ConsecutiveChain](int K, int Idx) {
>>      if (IterCnt >= MaxIter)
>>        return true;
>> +    if (CheckedPairs[Idx].test(K))
>> +      return ConsecutiveChain[K].second == 1 &&
>> +             ConsecutiveChain[K].first == Idx;
>>      ++IterCnt;
>> -    if (!isConsecutiveAccess(Stores[K], Stores[Idx], *DL, *SE))
>> +    CheckedPairs[Idx].set(K);
>> +    CheckedPairs[K].set(Idx);
>> +    Optional<int> Diff = getPointersDiff(Stores[K]->getPointerOperand(),
>> +
>>  Stores[Idx]->getPointerOperand(), *DL,
>> +                                         *SE, /*StrictCheck=*/true);
>> +    if (!Diff || *Diff == 0)
>> +      return false;
>> +    int Val = *Diff;
>> +    if (Val < 0) {
>> +      if (ConsecutiveChain[Idx].second > -Val) {
>> +        Tails.set(K);
>> +        ConsecutiveChain[Idx] = std::make_pair(K, -Val);
>> +      }
>> +      return false;
>> +    }
>> +    if (ConsecutiveChain[K].second <= Val)
>>        return false;
>>
>>      Tails.set(Idx);
>> -    ConsecutiveChain[K] = Idx;
>> -    return true;
>> +    ConsecutiveChain[K] = std::make_pair(Idx, Val);
>> +    return Val == 1;
>>    };
>>    // Do a quadratic search on all of the given stores in reverse order
>> and find
>>    // all of the pairs of stores that follow each other.
>> @@ -6140,17 +6159,31 @@ bool
>> SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
>>    // For stores that start but don't end a link in the chain:
>>    for (int Cnt = E; Cnt > 0; --Cnt) {
>>      int I = Cnt - 1;
>> -    if (ConsecutiveChain[I] == E + 1 || Tails.test(I))
>> +    if (ConsecutiveChain[I].first == E || Tails.test(I))
>>        continue;
>>      // We found a store instr that starts a chain. Now follow the chain
>> and try
>>      // to vectorize it.
>>      BoUpSLP::ValueList Operands;
>>      // Collect the chain into a list.
>> -    while (I != E + 1 && !VectorizedStores.count(Stores[I])) {
>> +    while (I != E && !VectorizedStores.count(Stores[I])) {
>>        Operands.push_back(Stores[I]);
>> +      Tails.set(I);
>> +      if (ConsecutiveChain[I].second != 1) {
>> +        // Mark the new end in the chain and go back, if required. It
>> might be
>> +        // required if the original stores come in reversed order, for
>> example.
>> +        if (ConsecutiveChain[I].first != E &&
>> +            Tails.test(ConsecutiveChain[I].first) &&
>> +            !VectorizedStores.count(Stores[ConsecutiveChain[I].first])) {
>> +          Tails.reset(ConsecutiveChain[I].first);
>> +          if (Cnt < ConsecutiveChain[I].first + 2)
>> +            Cnt = ConsecutiveChain[I].first + 2;
>> +        }
>> +        break;
>> +      }
>>        // Move to the next value in the chain.
>> -      I = ConsecutiveChain[I];
>> +      I = ConsecutiveChain[I].first;
>>      }
>> +    assert(!Operands.empty() && "Expected non-empty list of stores.");
>>
>>      unsigned MaxVecRegSize = R.getMaxVecRegSize();
>>      unsigned EltSize = R.getVectorElementSize(Operands[0]);
>>
>> diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll
>> b/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll
>> index 267cf1a02c29..e28362894910 100644
>> --- a/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll
>> +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll
>> @@ -1,7 +1,7 @@
>>  ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
>> -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -slp-vectorizer
>> -slp-vectorizer -mattr=+sse2 -S | FileCheck %s --check-prefix=SSE
>> -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -slp-vectorizer
>> -slp-vectorizer -mattr=+avx  -S | FileCheck %s --check-prefix=AVX
>> -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -slp-vectorizer
>> -slp-vectorizer -mattr=+avx2 -S | FileCheck %s --check-prefix=AVX
>> +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -slp-vectorizer
>> -mattr=+sse2 -S | FileCheck %s --check-prefix=SSE
>> +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -slp-vectorizer
>> -mattr=+avx  -S | FileCheck %s --check-prefix=AVX
>> +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -slp-vectorizer
>> -mattr=+avx2 -S | FileCheck %s --check-prefix=AVX
>>
>>  %class.1 = type { %class.2 }
>>  %class.2 = type { %"class.3" }
>> @@ -117,13 +117,10 @@ define void @pr35497() local_unnamed_addr #0 {
>>  ; AVX-NEXT:    [[ARRAYIDX2_6:%.*]] = getelementptr inbounds [0 x i64],
>> [0 x i64]* undef, i64 0, i64 0
>>  ; AVX-NEXT:    [[TMP10:%.*]] = bitcast i64* [[ARRAYIDX2_6]] to <2 x i64>*
>>  ; AVX-NEXT:    store <2 x i64> [[TMP4]], <2 x i64>* [[TMP10]], align 1
>> -; AVX-NEXT:    [[TMP11:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
>> -; AVX-NEXT:    [[TMP12:%.*]] = insertelement <2 x i64> poison, i64
>> [[TMP11]], i32 0
>> -; AVX-NEXT:    [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64
>> [[TMP5]], i32 1
>> -; AVX-NEXT:    [[TMP14:%.*]] = lshr <2 x i64> [[TMP13]], <i64 6, i64 6>
>> -; AVX-NEXT:    [[TMP15:%.*]] = add nuw nsw <2 x i64> [[TMP9]], [[TMP14]]
>> -; AVX-NEXT:    [[TMP16:%.*]] = bitcast i64* [[ARRAYIDX2_2]] to <2 x i64>*
>> -; AVX-NEXT:    store <2 x i64> [[TMP15]], <2 x i64>* [[TMP16]], align 1
>> +; AVX-NEXT:    [[TMP11:%.*]] = lshr <2 x i64> [[TMP4]], <i64 6, i64 6>
>> +; AVX-NEXT:    [[TMP12:%.*]] = add nuw nsw <2 x i64> [[TMP9]], [[TMP11]]
>> +; AVX-NEXT:    [[TMP13:%.*]] = bitcast i64* [[ARRAYIDX2_2]] to <2 x i64>*
>> +; AVX-NEXT:    store <2 x i64> [[TMP12]], <2 x i64>* [[TMP13]], align 1
>>  ; AVX-NEXT:    ret void
>>  ;
>>  entry:
>>
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210325/8f550418/attachment.html>


More information about the llvm-commits mailing list