[llvm] f1b47ad - Revert "[Analysis]Add getPointersDiff function to improve compile time."

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 23 13:18:06 PDT 2021


Author: Alexey Bataev
Date: 2021-03-23T13:17:54-07:00
New Revision: f1b47ad278b809f5fee0ad7b7fbca5ae376c8fdc

URL: https://github.com/llvm/llvm-project/commit/f1b47ad278b809f5fee0ad7b7fbca5ae376c8fdc
DIFF: https://github.com/llvm/llvm-project/commit/f1b47ad278b809f5fee0ad7b7fbca5ae376c8fdc.diff

LOG: Revert "[Analysis]Add getPointersDiff function to improve compile time."

This reverts commit 065a14a12d2694f26f4e894641f5ab8cfc5da8bd to
investigate and fix crash in SLP vectorizer.

Added: 
    

Modified: 
    llvm/include/llvm/Analysis/LoopAccessAnalysis.h
    llvm/lib/Analysis/LoopAccessAnalysis.cpp
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 39acfd5bbbeec..13fbe884eddf9 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -679,15 +679,6 @@ int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
                      const ValueToValueMap &StridesMap = ValueToValueMap(),
                      bool Assume = false, bool ShouldCheckWrap = true);
 
-/// Returns the distance between the pointers \p PtrA and \p PtrB iff they are
-/// compatible and it is possible to calculate the distance between them. This
-/// is a simple API that does not depend on the analysis pass.
-/// \param StrictCheck Ensure that the calculated distance matches the
-/// type-based one after all the bitcasts removal in the provided pointers.
-Optional<int> getPointersDiff(Value *PtrA, Value *PtrB, const DataLayout &DL,
-                              ScalarEvolution &SE, bool StrictCheck = false,
-                              bool CheckType = true);
-
 /// Attempt to sort the pointers in \p VL and return the sorted indices
 /// in \p SortedIndices, if reordering is required.
 ///

diff  --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 997d4474a4489..e632fe25c24c8 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -1124,123 +1124,139 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
   return Stride;
 }
 
-Optional<int> llvm::getPointersDiff(Value *PtrA, Value *PtrB,
-                                    const DataLayout &DL, ScalarEvolution &SE,
-                                    bool StrictCheck, bool CheckType) {
-  assert(PtrA && PtrB && "Expected non-nullptr pointers.");
-  // Make sure that A and B are 
diff erent pointers.
-  if (PtrA == PtrB)
-    return 0;
-
-  // Make sure that PtrA and PtrB have the same type if required
-  if (CheckType && PtrA->getType() != PtrB->getType())
-    return None;
-
-  unsigned ASA = PtrA->getType()->getPointerAddressSpace();
-  unsigned ASB = PtrB->getType()->getPointerAddressSpace();
-
-  // Check that the address spaces match.
-  if (ASA != ASB)
-    return None;
-  unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
-
-  APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
-  Value *PtrA1 = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
-  Value *PtrB1 = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
-
-  int Val;
-  if (PtrA1 == PtrB1) {
-    // Retrieve the address space again as pointer stripping now tracks through
-    // `addrspacecast`.
-    ASA = cast<PointerType>(PtrA1->getType())->getAddressSpace();
-    ASB = cast<PointerType>(PtrB1->getType())->getAddressSpace();
-    // Check that the address spaces match and that the pointers are valid.
-    if (ASA != ASB)
-      return None;
-
-    IdxWidth = DL.getIndexSizeInBits(ASA);
-    OffsetA = OffsetA.sextOrTrunc(IdxWidth);
-    OffsetB = OffsetB.sextOrTrunc(IdxWidth);
-
-    OffsetB -= OffsetA;
-    Val = OffsetB.getSExtValue();
-  } else {
-    // Otherwise compute the distance with SCEV between the base pointers.
-    const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
-    const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
-    const auto *Diff =
-        dyn_cast<SCEVConstant>(SE.getMinusSCEV(PtrSCEVB, PtrSCEVA));
-    if (!Diff)
-      return None;
-    Val = Diff->getAPInt().getSExtValue();
-  }
-  Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
-  int Size = DL.getTypeStoreSize(Ty);
-  int Dist = Val / Size;
-
-  // Ensure that the calculated distance matches the type-based one after all
-  // the bitcasts removal in the provided pointers.
-  if (!StrictCheck || Dist * Size == Val)
-    return Dist;
-  return None;
-}
-
 bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL,
                            ScalarEvolution &SE,
                            SmallVectorImpl<unsigned> &SortedIndices) {
   assert(llvm::all_of(
              VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
          "Expected list of pointer operands.");
+  SmallVector<std::pair<int64_t, Value *>, 4> OffValPairs;
+  OffValPairs.reserve(VL.size());
+
   // Walk over the pointers, and map each of them to an offset relative to
   // first pointer in the array.
   Value *Ptr0 = VL[0];
+  const SCEV *Scev0 = SE.getSCEV(Ptr0);
+  Value *Obj0 = getUnderlyingObject(Ptr0);
+
+  llvm::SmallSet<int64_t, 4> Offsets;
+  for (auto *Ptr : VL) {
+    // TODO: Outline this code as a special, more time consuming, version of
+    // computeConstantDifference() function.
+    if (Ptr->getType()->getPointerAddressSpace() !=
+        Ptr0->getType()->getPointerAddressSpace())
+      return false;
+    // If a pointer refers to a 
diff erent underlying object, bail - the
+    // pointers are by definition incomparable.
+    Value *CurrObj = getUnderlyingObject(Ptr);
+    if (CurrObj != Obj0)
+      return false;
 
-  using DistOrdPair = std::pair<int64_t, int>;
-  auto Compare = [](const DistOrdPair &L, const DistOrdPair &R) {
-    return L.first < R.first;
-  };
-  std::set<DistOrdPair, decltype(Compare)> Offsets(Compare);
-  Offsets.emplace(0, 0);
-  int Cnt = 1;
-  bool IsConsecutive = true;
-  for (auto *Ptr : VL.drop_front()) {
-    Optional<int> Diff = getPointersDiff(Ptr0, Ptr, DL, SE);
+    const SCEV *Scev = SE.getSCEV(Ptr);
+    const auto *Diff = dyn_cast<SCEVConstant>(SE.getMinusSCEV(Scev, Scev0));
+    // The pointers may not have a constant offset from each other, or SCEV
+    // may just not be smart enough to figure out they do. Regardless,
+    // there's nothing we can do.
     if (!Diff)
       return false;
 
     // Check if the pointer with the same offset is found.
-    int64_t Offset = *Diff;
-    auto Res = Offsets.emplace(Offset, Cnt);
-    if (!Res.second)
+    int64_t Offset = Diff->getAPInt().getSExtValue();
+    if (!Offsets.insert(Offset).second)
       return false;
-    // Consecutive order if the inserted element is the last one.
-    IsConsecutive = IsConsecutive && std::next(Res.first) == Offsets.end();
-    ++Cnt;
+    OffValPairs.emplace_back(Offset, Ptr);
   }
   SortedIndices.clear();
-  if (!IsConsecutive) {
-    // Fill SortedIndices array only if it is non-consecutive.
-    SortedIndices.resize(VL.size());
-    Cnt = 0;
-    for (const std::pair<int64_t, int> &Pair : Offsets) {
-      IsConsecutive = IsConsecutive && Cnt == Pair.second;
-      SortedIndices[Cnt] = Pair.second;
-      ++Cnt;
-    }
-  }
+  SortedIndices.resize(VL.size());
+  std::iota(SortedIndices.begin(), SortedIndices.end(), 0);
+
+  // Sort the memory accesses and keep the order of their uses in UseOrder.
+  llvm::stable_sort(SortedIndices, [&](unsigned Left, unsigned Right) {
+    return OffValPairs[Left].first < OffValPairs[Right].first;
+  });
+
+  // Check if the order is consecutive already.
+  if (llvm::all_of(SortedIndices, [&SortedIndices](const unsigned I) {
+        return I == SortedIndices[I];
+      }))
+    SortedIndices.clear();
+
   return true;
 }
 
+/// Take the address space operand from the Load/Store instruction.
+/// Returns -1 if this is not a valid Load/Store instruction.
+static unsigned getAddressSpaceOperand(Value *I) {
+  if (LoadInst *L = dyn_cast<LoadInst>(I))
+    return L->getPointerAddressSpace();
+  if (StoreInst *S = dyn_cast<StoreInst>(I))
+    return S->getPointerAddressSpace();
+  return -1;
+}
+
 /// Returns true if the memory operations \p A and \p B are consecutive.
 bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
                                ScalarEvolution &SE, bool CheckType) {
   Value *PtrA = getLoadStorePointerOperand(A);
   Value *PtrB = getLoadStorePointerOperand(B);
-  if (!PtrA || !PtrB)
+  unsigned ASA = getAddressSpaceOperand(A);
+  unsigned ASB = getAddressSpaceOperand(B);
+
+  // Check that the address spaces match and that the pointers are valid.
+  if (!PtrA || !PtrB || (ASA != ASB))
+    return false;
+
+  // Make sure that A and B are 
diff erent pointers.
+  if (PtrA == PtrB)
+    return false;
+
+  // Make sure that A and B have the same type if required.
+  if (CheckType && PtrA->getType() != PtrB->getType())
     return false;
-  Optional<int> Diff =
-      getPointersDiff(PtrA, PtrB, DL, SE, /*StrictCheck=*/true, CheckType);
-  return Diff && *Diff == 1;
+
+  unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
+  Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
+
+  APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
+  PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
+  PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
+
+  // Retrieve the address space again as pointer stripping now tracks through
+  // `addrspacecast`.
+  ASA = cast<PointerType>(PtrA->getType())->getAddressSpace();
+  ASB = cast<PointerType>(PtrB->getType())->getAddressSpace();
+  // Check that the address spaces match and that the pointers are valid.
+  if (ASA != ASB)
+    return false;
+
+  IdxWidth = DL.getIndexSizeInBits(ASA);
+  OffsetA = OffsetA.sextOrTrunc(IdxWidth);
+  OffsetB = OffsetB.sextOrTrunc(IdxWidth);
+
+  APInt Size(IdxWidth, DL.getTypeStoreSize(Ty));
+
+  //  OffsetDelta = OffsetB - OffsetA;
+  const SCEV *OffsetSCEVA = SE.getConstant(OffsetA);
+  const SCEV *OffsetSCEVB = SE.getConstant(OffsetB);
+  const SCEV *OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA);
+  const APInt &OffsetDelta = cast<SCEVConstant>(OffsetDeltaSCEV)->getAPInt();
+
+  // Check if they are based on the same pointer. That makes the offsets
+  // sufficient.
+  if (PtrA == PtrB)
+    return OffsetDelta == Size;
+
+  // Compute the necessary base pointer delta to have the necessary final delta
+  // equal to the size.
+  // BaseDelta = Size - OffsetDelta;
+  const SCEV *SizeSCEV = SE.getConstant(Size);
+  const SCEV *BaseDelta = SE.getMinusSCEV(SizeSCEV, OffsetDeltaSCEV);
+
+  // Otherwise compute the distance with SCEV between the base pointers.
+  const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
+  const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
+  const SCEV *X = SE.getAddExpr(PtrSCEVA, BaseDelta);
+  return X == PtrSCEVB;
 }
 
 MemoryDepChecker::VectorizationSafetyStatus

diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 33e70c5e8965e..385b6f30dc0fc 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -941,16 +941,10 @@ class BoUpSLP {
                                ScalarEvolution &SE) {
       auto *LI1 = dyn_cast<LoadInst>(V1);
       auto *LI2 = dyn_cast<LoadInst>(V2);
-      if (LI1 && LI2) {
-        if (LI1->getParent() != LI2->getParent())
-          return VLOperands::ScoreFail;
-
-        Optional<int> Dist =
-            getPointersDiff(LI1->getPointerOperand(), LI2->getPointerOperand(),
-                            DL, SE, /*StrictCheck=*/true);
-        return (Dist && *Dist == 1) ? VLOperands::ScoreConsecutiveLoads
-                                    : VLOperands::ScoreFail;
-      }
+      if (LI1 && LI2)
+        return isConsecutiveAccess(LI1, LI2, DL, SE)
+                   ? VLOperands::ScoreConsecutiveLoads
+                   : VLOperands::ScoreFail;
 
       auto *C1 = dyn_cast<Constant>(V1);
       auto *C2 = dyn_cast<Constant>(V2);
@@ -2877,9 +2871,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
           Ptr0 = PointerOps[CurrentOrder.front()];
           PtrN = PointerOps[CurrentOrder.back()];
         }
-        Optional<int> Diff = getPointersDiff(Ptr0, PtrN, *DL, *SE);
+        const SCEV *Scev0 = SE->getSCEV(Ptr0);
+        const SCEV *ScevN = SE->getSCEV(PtrN);
+        const auto *Diff =
+            dyn_cast<SCEVConstant>(SE->getMinusSCEV(ScevN, Scev0));
+        uint64_t Size = DL->getTypeAllocSize(ScalarTy);
         // Check that the sorted loads are consecutive.
-        if (static_cast<unsigned>(*Diff) == VL.size() - 1) {
+        if (Diff && Diff->getAPInt() == (VL.size() - 1) * Size) {
           if (CurrentOrder.empty()) {
             // Original loads are consecutive and does not require reordering.
             ++NumOpsWantToKeepOriginalOrder;
@@ -3152,9 +3150,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
           Ptr0 = PointerOps[CurrentOrder.front()];
           PtrN = PointerOps[CurrentOrder.back()];
         }
-        Optional<int> Dist = getPointersDiff(Ptr0, PtrN, *DL, *SE);
+        const SCEV *Scev0 = SE->getSCEV(Ptr0);
+        const SCEV *ScevN = SE->getSCEV(PtrN);
+        const auto *Diff =
+            dyn_cast<SCEVConstant>(SE->getMinusSCEV(ScevN, Scev0));
+        uint64_t Size = DL->getTypeAllocSize(ScalarTy);
         // Check that the sorted pointer operands are consecutive.
-        if (static_cast<unsigned>(*Dist) == VL.size() - 1) {
+        if (Diff && Diff->getAPInt() == (VL.size() - 1) * Size) {
           if (CurrentOrder.empty()) {
             // Original stores are consecutive and does not require reordering.
             ++NumOpsWantToKeepOriginalOrder;
@@ -6105,41 +6107,20 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
 
   int E = Stores.size();
   SmallBitVector Tails(E, false);
+  SmallVector<int, 16> ConsecutiveChain(E, E + 1);
   int MaxIter = MaxStoreLookup.getValue();
-  SmallVector<std::pair<int, int>, 16> ConsecutiveChain(
-      E, std::make_pair(E, INT_MAX));
-  SmallVector<SmallBitVector, 4> CheckedPairs(E, SmallBitVector(E, false));
   int IterCnt;
   auto &&FindConsecutiveAccess = [this, &Stores, &Tails, &IterCnt, MaxIter,
-                                  &CheckedPairs,
                                   &ConsecutiveChain](int K, int Idx) {
     if (IterCnt >= MaxIter)
       return true;
-    if (CheckedPairs[Idx].test(K))
-      return ConsecutiveChain[K].second == 1 &&
-             ConsecutiveChain[K].first == Idx;
     ++IterCnt;
-    CheckedPairs[Idx].set(K);
-    CheckedPairs[K].set(Idx);
-    Optional<int> Diff = getPointersDiff(Stores[K]->getPointerOperand(),
-                                         Stores[Idx]->getPointerOperand(), *DL,
-                                         *SE, /*StrictCheck=*/true);
-    if (!Diff || *Diff == 0)
-      return false;
-    int Val = *Diff;
-    if (Val < 0) {
-      if (ConsecutiveChain[Idx].second > -Val) {
-        Tails.set(K);
-        ConsecutiveChain[Idx] = std::make_pair(K, -Val);
-      }
-      return false;
-    }
-    if (ConsecutiveChain[K].second <= Val)
+    if (!isConsecutiveAccess(Stores[K], Stores[Idx], *DL, *SE))
       return false;
 
     Tails.set(Idx);
-    ConsecutiveChain[K] = std::make_pair(Idx, Val);
-    return Val == 1;
+    ConsecutiveChain[K] = Idx;
+    return true;
   };
   // Do a quadratic search on all of the given stores in reverse order and find
   // all of the pairs of stores that follow each other.
@@ -6159,28 +6140,16 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
   // For stores that start but don't end a link in the chain:
   for (int Cnt = E; Cnt > 0; --Cnt) {
     int I = Cnt - 1;
-    if (ConsecutiveChain[I].first == E || Tails.test(I))
+    if (ConsecutiveChain[I] == E + 1 || Tails.test(I))
       continue;
     // We found a store instr that starts a chain. Now follow the chain and try
     // to vectorize it.
     BoUpSLP::ValueList Operands;
     // Collect the chain into a list.
-    while (I != E && !VectorizedStores.count(Stores[I])) {
+    while (I != E + 1 && !VectorizedStores.count(Stores[I])) {
       Operands.push_back(Stores[I]);
-      Tails.set(I);
-      if (ConsecutiveChain[I].second != 1) {
-        // Mark the new end in the chain and go back, if required. It might be
-        // required if the original stores comes in reversed order, for example.
-        if (ConsecutiveChain[I].first != E &&
-            Tails.test(ConsecutiveChain[I].first)) {
-          Tails.reset(ConsecutiveChain[I].first);
-          if (Cnt < ConsecutiveChain[I].first + 2)
-            Cnt = ConsecutiveChain[I].first + 2;
-        }
-        break;
-      }
       // Move to the next value in the chain.
-      I = ConsecutiveChain[I].first;
+      I = ConsecutiveChain[I];
     }
 
     unsigned MaxVecRegSize = R.getMaxVecRegSize();

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll
index e283628949107..267cf1a02c298 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -slp-vectorizer -mattr=+sse2 -S | FileCheck %s --check-prefix=SSE
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -slp-vectorizer -mattr=+avx  -S | FileCheck %s --check-prefix=AVX
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -slp-vectorizer -mattr=+avx2 -S | FileCheck %s --check-prefix=AVX
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -slp-vectorizer -slp-vectorizer -mattr=+sse2 -S | FileCheck %s --check-prefix=SSE
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -slp-vectorizer -slp-vectorizer -mattr=+avx  -S | FileCheck %s --check-prefix=AVX
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -slp-vectorizer -slp-vectorizer -mattr=+avx2 -S | FileCheck %s --check-prefix=AVX
 
 %class.1 = type { %class.2 }
 %class.2 = type { %"class.3" }
@@ -117,10 +117,13 @@ define void @pr35497() local_unnamed_addr #0 {
 ; AVX-NEXT:    [[ARRAYIDX2_6:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 0
 ; AVX-NEXT:    [[TMP10:%.*]] = bitcast i64* [[ARRAYIDX2_6]] to <2 x i64>*
 ; AVX-NEXT:    store <2 x i64> [[TMP4]], <2 x i64>* [[TMP10]], align 1
-; AVX-NEXT:    [[TMP11:%.*]] = lshr <2 x i64> [[TMP4]], <i64 6, i64 6>
-; AVX-NEXT:    [[TMP12:%.*]] = add nuw nsw <2 x i64> [[TMP9]], [[TMP11]]
-; AVX-NEXT:    [[TMP13:%.*]] = bitcast i64* [[ARRAYIDX2_2]] to <2 x i64>*
-; AVX-NEXT:    store <2 x i64> [[TMP12]], <2 x i64>* [[TMP13]], align 1
+; AVX-NEXT:    [[TMP11:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
+; AVX-NEXT:    [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP11]], i32 0
+; AVX-NEXT:    [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP5]], i32 1
+; AVX-NEXT:    [[TMP14:%.*]] = lshr <2 x i64> [[TMP13]], <i64 6, i64 6>
+; AVX-NEXT:    [[TMP15:%.*]] = add nuw nsw <2 x i64> [[TMP9]], [[TMP14]]
+; AVX-NEXT:    [[TMP16:%.*]] = bitcast i64* [[ARRAYIDX2_2]] to <2 x i64>*
+; AVX-NEXT:    store <2 x i64> [[TMP15]], <2 x i64>* [[TMP16]], align 1
 ; AVX-NEXT:    ret void
 ;
 entry:


        


More information about the llvm-commits mailing list