<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
</head>
<body dir="auto">
Hi Richard, yes, that’s what I thought. Can you commit this fix?<br>
<br>
<div dir="ltr">Best regards,
<div>Alexey Bataev</div>
</div>
<div dir="ltr"><br>
<blockquote type="cite">25 марта 2021 г., в 19:29, Richard Smith <richard@metafoo.co.uk> написал(а):<br>
<br>
</blockquote>
</div>
<blockquote type="cite">
<div dir="ltr">
<div dir="ltr">
<div dir="ltr">On Thu, 25 Mar 2021 at 16:24, Richard Smith <<a href="mailto:richard@metafoo.co.uk">richard@metafoo.co.uk</a>> wrote:<br>
</div>
<div class="gmail_quote">
<blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
<div dir="ltr">
<div dir="ltr">Hi Alexey, this patch has introduced a miscompile for us.</div>
<br>
<div class="gmail_quote">
<div dir="ltr" class="gmail_attr">On Tue, 23 Mar 2021 at 14:26, Alexey Bataev via llvm-commits <<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a>> wrote:<br>
</div>
<blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
<br>
Author: Alexey Bataev<br>
Date: 2021-03-23T14:25:36-07:00<br>
New Revision: 99203f2004d031f2ef22f01e3c569d2775de1836<br>
<br>
URL: <a href="https://github.com/llvm/llvm-project/commit/99203f2004d031f2ef22f01e3c569d2775de1836" rel="noreferrer" target="_blank">
https://github.com/llvm/llvm-project/commit/99203f2004d031f2ef22f01e3c569d2775de1836</a><br>
DIFF: <a href="https://github.com/llvm/llvm-project/commit/99203f2004d031f2ef22f01e3c569d2775de1836.diff" rel="noreferrer" target="_blank">
https://github.com/llvm/llvm-project/commit/99203f2004d031f2ef22f01e3c569d2775de1836.diff</a><br>
<br>
LOG: [Analysis]Add getPointersDiff function to improve compile time.<br>
<br>
Added getPointersDiff function to LoopAccessAnalysis and used it instead<br>
direct calculatoin of the distance between pointers and/or<br>
isConsecutiveAccess function in SLP vectorizer to improve compile time<br>
and detection of stores consecutive chains.<br>
<br>
Part of D57059<br>
<br>
Differential Revision: <a href="https://reviews.llvm.org/D98967" rel="noreferrer" target="_blank">
https://reviews.llvm.org/D98967</a><br>
<br>
Added: <br>
<br>
<br>
Modified: <br>
llvm/include/llvm/Analysis/LoopAccessAnalysis.h<br>
llvm/lib/Analysis/LoopAccessAnalysis.cpp<br>
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp<br>
llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll<br>
<br>
Removed: <br>
<br>
<br>
<br>
################################################################################<br>
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h<br>
index 13fbe884eddf..39acfd5bbbee 100644<br>
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h<br>
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h<br>
@@ -679,6 +679,15 @@ int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,<br>
const ValueToValueMap &StridesMap = ValueToValueMap(),<br>
bool Assume = false, bool ShouldCheckWrap = true);<br>
<br>
+/// Returns the distance between the pointers \p PtrA and \p PtrB iff they are<br>
+/// compatible and it is possible to calculate the distance between them. This<br>
+/// is a simple API that does not depend on the analysis pass.<br>
+/// \param StrictCheck Ensure that the calculated distance matches the<br>
+/// type-based one after all the bitcasts removal in the provided pointers.<br>
+Optional<int> getPointersDiff(Value *PtrA, Value *PtrB, const DataLayout &DL,<br>
+ ScalarEvolution &SE, bool StrictCheck = false,<br>
+ bool CheckType = true);<br>
+<br>
/// Attempt to sort the pointers in \p VL and return the sorted indices<br>
/// in \p SortedIndices, if reordering is required.<br>
///<br>
<br>
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp<br>
index e632fe25c24c..997d4474a448 100644<br>
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp<br>
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp<br>
@@ -1124,139 +1124,123 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,<br>
return Stride;<br>
}<br>
<br>
+Optional<int> llvm::getPointersDiff(Value *PtrA, Value *PtrB,<br>
+ const DataLayout &DL, ScalarEvolution &SE,<br>
+ bool StrictCheck, bool CheckType) {<br>
+ assert(PtrA && PtrB && "Expected non-nullptr pointers.");<br>
+ // Make sure that A and B are <br>
diff erent pointers.<br>
+ if (PtrA == PtrB)<br>
+ return 0;<br>
+<br>
+ // Make sure that PtrA and PtrB have the same type if required<br>
+ if (CheckType && PtrA->getType() != PtrB->getType())<br>
+ return None;<br>
+<br>
+ unsigned ASA = PtrA->getType()->getPointerAddressSpace();<br>
+ unsigned ASB = PtrB->getType()->getPointerAddressSpace();<br>
+<br>
+ // Check that the address spaces match.<br>
+ if (ASA != ASB)<br>
+ return None;<br>
+ unsigned IdxWidth = DL.getIndexSizeInBits(ASA);<br>
+<br>
+ APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);<br>
+ Value *PtrA1 = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);<br>
+ Value *PtrB1 = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);<br>
+<br>
+ int Val;<br>
+ if (PtrA1 == PtrB1) {<br>
+ // Retrieve the address space again as pointer stripping now tracks through<br>
+ // `addrspacecast`.<br>
+ ASA = cast<PointerType>(PtrA1->getType())->getAddressSpace();<br>
+ ASB = cast<PointerType>(PtrB1->getType())->getAddressSpace();<br>
+ // Check that the address spaces match and that the pointers are valid.<br>
+ if (ASA != ASB)<br>
+ return None;<br>
+<br>
+ IdxWidth = DL.getIndexSizeInBits(ASA);<br>
+ OffsetA = OffsetA.sextOrTrunc(IdxWidth);<br>
+ OffsetB = OffsetB.sextOrTrunc(IdxWidth);<br>
+<br>
+ OffsetB -= OffsetA;<br>
+ Val = OffsetB.getSExtValue();<br>
+ } else {<br>
+ // Otherwise compute the distance with SCEV between the base pointers.<br>
+ const SCEV *PtrSCEVA = SE.getSCEV(PtrA);<br>
+ const SCEV *PtrSCEVB = SE.getSCEV(PtrB);<br>
+ const auto *Diff =<br>
+ dyn_cast<SCEVConstant>(SE.getMinusSCEV(PtrSCEVB, PtrSCEVA));<br>
+ if (!Diff)<br>
+ return None;<br>
+ Val = Diff->getAPInt().getSExtValue();<br>
+ }<br>
+ Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();<br>
+ int Size = DL.getTypeStoreSize(Ty);<br>
+ int Dist = Val / Size;<br>
+<br>
+ // Ensure that the calculated distance matches the type-based one after all<br>
+ // the bitcasts removal in the provided pointers.<br>
+ if (!StrictCheck || Dist * Size == Val)<br>
+ return Dist;<br>
</blockquote>
<div><br>
</div>
<div>Specifically, we get here, with Val == 12 and Size == 8, resulting in an incorrect computation of Dist, for the following testcase:</div>
<div><br>
</div>
<div>; Function Attrs: nofree norecurse nounwind uwtable<br>
define void @Int32R2.5(i8* nocapture readnone %retval, i8* noalias nocapture readnone %run_options, i8** noalias nocapture readnone %params, i8** noalias nocapture readonly %buffer_table, i64* noalias nocapture readnone %prof_counters) local_unnamed_addr #0
{<br>
entry:<br>
%0 = getelementptr inbounds i8*, i8** %buffer_table, i64 2<br>
%1 = bitcast i8** %0 to i32**<br>
%2 = load i32*, i32** %1, align 8, !invariant.load !0, !dereferenceable !1, !align !1<br>
%3 = getelementptr inbounds i8*, i8** %buffer_table, i64 3<br>
%4 = bitcast i8** %3 to i32**<br>
%5 = load i32*, i32** %4, align 8, !invariant.load !0, !dereferenceable !1, !align !1<br>
%6 = getelementptr inbounds i8*, i8** %buffer_table, i64 1<br>
%7 = bitcast i8** %6 to [2 x [2 x i32]]**<br>
%8 = load [2 x [2 x i32]]*, [2 x [2 x i32]]** %7, align 8, !invariant.load !0, !dereferenceable !2, !align !2<br>
%9 = load i32, i32* %2, align 4, !invariant.load !0, !noalias !3<br>
%10 = icmp sgt i32 %9, 0<br>
%11 = load i32, i32* %5, align 4, !invariant.load !0, !noalias !3<br>
%12 = icmp sgt i32 %11, 0<br>
%13 = select i1 %10, i64 12, i64 0<br>
%14 = select i1 %12, i64 4, i64 0<br>
%15 = add nuw nsw i64 %13, %14<br>
%scevgep6 = getelementptr [36 x i8], [36 x i8]* @0, i64 0, i64 %15<br>
%16 = bitcast i8* %scevgep6 to i64*<br>
%17 = bitcast [2 x [2 x i32]]* %8 to i64*<br>
%18 = load i64, i64* %16, align 4<br>
store i64 %18, i64* %17, align 16<br>
%scevgep.1 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]]* %8, i64 0, i64 1, i64 0<br>
%19 = add nuw nsw i64 %15, 12<br>
%scevgep6.1 = getelementptr [36 x i8], [36 x i8]* @0, i64 0, i64 %19<br>
%20 = bitcast i8* %scevgep6.1 to i64*<br>
%21 = bitcast i32* %scevgep.1 to i64*<br>
%22 = load i64, i64* %20, align 4<br>
store i64 %22, i64* %21, align 8<br>
ret void<br>
}<br>
</div>
<div><br>
</div>
<div>... when computing the difference between %16 and %20 in the call from LoopAccessAnalysis.cpp:1207.</div>
<div><br>
</div>
<div>Perhaps StrictCheck should always be enabled, given that this function truncates its result and produces a wrong answer if not?</div>
<div> </div>
<blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+ return None;<br>
+}<br>
+<br>
bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL,<br>
ScalarEvolution &SE,<br>
SmallVectorImpl<unsigned> &SortedIndices) {<br>
assert(llvm::all_of(<br>
VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&<br>
"Expected list of pointer operands.");<br>
- SmallVector<std::pair<int64_t, Value *>, 4> OffValPairs;<br>
- OffValPairs.reserve(VL.size());<br>
-<br>
// Walk over the pointers, and map each of them to an offset relative to<br>
// first pointer in the array.<br>
Value *Ptr0 = VL[0];<br>
- const SCEV *Scev0 = SE.getSCEV(Ptr0);<br>
- Value *Obj0 = getUnderlyingObject(Ptr0);<br>
-<br>
- llvm::SmallSet<int64_t, 4> Offsets;<br>
- for (auto *Ptr : VL) {<br>
- // TODO: Outline this code as a special, more time consuming, version of<br>
- // computeConstantDifference() function.<br>
- if (Ptr->getType()->getPointerAddressSpace() !=<br>
- Ptr0->getType()->getPointerAddressSpace())<br>
- return false;<br>
- // If a pointer refers to a <br>
diff erent underlying object, bail - the<br>
- // pointers are by definition incomparable.<br>
- Value *CurrObj = getUnderlyingObject(Ptr);<br>
- if (CurrObj != Obj0)<br>
- return false;<br>
<br>
- const SCEV *Scev = SE.getSCEV(Ptr);<br>
- const auto *Diff = dyn_cast<SCEVConstant>(SE.getMinusSCEV(Scev, Scev0));<br>
- // The pointers may not have a constant offset from each other, or SCEV<br>
- // may just not be smart enough to figure out they do. Regardless,<br>
- // there's nothing we can do.<br>
+ using DistOrdPair = std::pair<int64_t, int>;<br>
+ auto Compare = [](const DistOrdPair &L, const DistOrdPair &R) {<br>
+ return L.first < R.first;<br>
+ };<br>
+ std::set<DistOrdPair, decltype(Compare)> Offsets(Compare);<br>
+ Offsets.emplace(0, 0);<br>
+ int Cnt = 1;<br>
+ bool IsConsecutive = true;<br>
+ for (auto *Ptr : VL.drop_front()) {<br>
+ Optional<int> Diff = getPointersDiff(Ptr0, Ptr, DL, SE);<br>
</blockquote>
</div>
</div>
</blockquote>
<div><br>
</div>
<div>Passing StrictCheck = true here fixes the miscompile.</div>
<div> </div>
<blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
<div dir="ltr">
<div class="gmail_quote">
<blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
if (!Diff)<br>
return false;<br>
<br>
// Check if the pointer with the same offset is found.<br>
- int64_t Offset = Diff->getAPInt().getSExtValue();<br>
- if (!Offsets.insert(Offset).second)<br>
+ int64_t Offset = *Diff;<br>
+ auto Res = Offsets.emplace(Offset, Cnt);<br>
+ if (!Res.second)<br>
return false;<br>
- OffValPairs.emplace_back(Offset, Ptr);<br>
+ // Consecutive order if the inserted element is the last one.<br>
+ IsConsecutive = IsConsecutive && std::next(Res.first) == Offsets.end();<br>
+ ++Cnt;<br>
}<br>
SortedIndices.clear();<br>
- SortedIndices.resize(VL.size());<br>
- std::iota(SortedIndices.begin(), SortedIndices.end(), 0);<br>
-<br>
- // Sort the memory accesses and keep the order of their uses in UseOrder.<br>
- llvm::stable_sort(SortedIndices, [&](unsigned Left, unsigned Right) {<br>
- return OffValPairs[Left].first < OffValPairs[Right].first;<br>
- });<br>
-<br>
- // Check if the order is consecutive already.<br>
- if (llvm::all_of(SortedIndices, [&SortedIndices](const unsigned I) {<br>
- return I == SortedIndices[I];<br>
- }))<br>
- SortedIndices.clear();<br>
-<br>
+ if (!IsConsecutive) {<br>
+ // Fill SortedIndices array only if it is non-consecutive.<br>
+ SortedIndices.resize(VL.size());<br>
+ Cnt = 0;<br>
+ for (const std::pair<int64_t, int> &Pair : Offsets) {<br>
+ IsConsecutive = IsConsecutive && Cnt == Pair.second;<br>
+ SortedIndices[Cnt] = Pair.second;<br>
+ ++Cnt;<br>
+ }<br>
+ }<br>
return true;<br>
}<br>
<br>
-/// Take the address space operand from the Load/Store instruction.<br>
-/// Returns -1 if this is not a valid Load/Store instruction.<br>
-static unsigned getAddressSpaceOperand(Value *I) {<br>
- if (LoadInst *L = dyn_cast<LoadInst>(I))<br>
- return L->getPointerAddressSpace();<br>
- if (StoreInst *S = dyn_cast<StoreInst>(I))<br>
- return S->getPointerAddressSpace();<br>
- return -1;<br>
-}<br>
-<br>
/// Returns true if the memory operations \p A and \p B are consecutive.<br>
bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,<br>
ScalarEvolution &SE, bool CheckType) {<br>
Value *PtrA = getLoadStorePointerOperand(A);<br>
Value *PtrB = getLoadStorePointerOperand(B);<br>
- unsigned ASA = getAddressSpaceOperand(A);<br>
- unsigned ASB = getAddressSpaceOperand(B);<br>
-<br>
- // Check that the address spaces match and that the pointers are valid.<br>
- if (!PtrA || !PtrB || (ASA != ASB))<br>
- return false;<br>
-<br>
- // Make sure that A and B are <br>
diff erent pointers.<br>
- if (PtrA == PtrB)<br>
- return false;<br>
-<br>
- // Make sure that A and B have the same type if required.<br>
- if (CheckType && PtrA->getType() != PtrB->getType())<br>
+ if (!PtrA || !PtrB)<br>
return false;<br>
-<br>
- unsigned IdxWidth = DL.getIndexSizeInBits(ASA);<br>
- Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();<br>
-<br>
- APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);<br>
- PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);<br>
- PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);<br>
-<br>
- // Retrieve the address space again as pointer stripping now tracks through<br>
- // `addrspacecast`.<br>
- ASA = cast<PointerType>(PtrA->getType())->getAddressSpace();<br>
- ASB = cast<PointerType>(PtrB->getType())->getAddressSpace();<br>
- // Check that the address spaces match and that the pointers are valid.<br>
- if (ASA != ASB)<br>
- return false;<br>
-<br>
- IdxWidth = DL.getIndexSizeInBits(ASA);<br>
- OffsetA = OffsetA.sextOrTrunc(IdxWidth);<br>
- OffsetB = OffsetB.sextOrTrunc(IdxWidth);<br>
-<br>
- APInt Size(IdxWidth, DL.getTypeStoreSize(Ty));<br>
-<br>
- // OffsetDelta = OffsetB - OffsetA;<br>
- const SCEV *OffsetSCEVA = SE.getConstant(OffsetA);<br>
- const SCEV *OffsetSCEVB = SE.getConstant(OffsetB);<br>
- const SCEV *OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA);<br>
- const APInt &OffsetDelta = cast<SCEVConstant>(OffsetDeltaSCEV)->getAPInt();<br>
-<br>
- // Check if they are based on the same pointer. That makes the offsets<br>
- // sufficient.<br>
- if (PtrA == PtrB)<br>
- return OffsetDelta == Size;<br>
-<br>
- // Compute the necessary base pointer delta to have the necessary final delta<br>
- // equal to the size.<br>
- // BaseDelta = Size - OffsetDelta;<br>
- const SCEV *SizeSCEV = SE.getConstant(Size);<br>
- const SCEV *BaseDelta = SE.getMinusSCEV(SizeSCEV, OffsetDeltaSCEV);<br>
-<br>
- // Otherwise compute the distance with SCEV between the base pointers.<br>
- const SCEV *PtrSCEVA = SE.getSCEV(PtrA);<br>
- const SCEV *PtrSCEVB = SE.getSCEV(PtrB);<br>
- const SCEV *X = SE.getAddExpr(PtrSCEVA, BaseDelta);<br>
- return X == PtrSCEVB;<br>
+ Optional<int> Diff =<br>
+ getPointersDiff(PtrA, PtrB, DL, SE, /*StrictCheck=*/true, CheckType);<br>
+ return Diff && *Diff == 1;<br>
}<br>
<br>
MemoryDepChecker::VectorizationSafetyStatus<br>
<br>
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp<br>
index 385b6f30dc0f..78d2ea0032db 100644<br>
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp<br>
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp<br>
@@ -941,10 +941,16 @@ class BoUpSLP {<br>
ScalarEvolution &SE) {<br>
auto *LI1 = dyn_cast<LoadInst>(V1);<br>
auto *LI2 = dyn_cast<LoadInst>(V2);<br>
- if (LI1 && LI2)<br>
- return isConsecutiveAccess(LI1, LI2, DL, SE)<br>
- ? VLOperands::ScoreConsecutiveLoads<br>
- : VLOperands::ScoreFail;<br>
+ if (LI1 && LI2) {<br>
+ if (LI1->getParent() != LI2->getParent())<br>
+ return VLOperands::ScoreFail;<br>
+<br>
+ Optional<int> Dist =<br>
+ getPointersDiff(LI1->getPointerOperand(), LI2->getPointerOperand(),<br>
+ DL, SE, /*StrictCheck=*/true);<br>
+ return (Dist && *Dist == 1) ? VLOperands::ScoreConsecutiveLoads<br>
+ : VLOperands::ScoreFail;<br>
+ }<br>
<br>
auto *C1 = dyn_cast<Constant>(V1);<br>
auto *C2 = dyn_cast<Constant>(V2);<br>
@@ -2871,13 +2877,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,<br>
Ptr0 = PointerOps[CurrentOrder.front()];<br>
PtrN = PointerOps[CurrentOrder.back()];<br>
}<br>
- const SCEV *Scev0 = SE->getSCEV(Ptr0);<br>
- const SCEV *ScevN = SE->getSCEV(PtrN);<br>
- const auto *Diff =<br>
- dyn_cast<SCEVConstant>(SE->getMinusSCEV(ScevN, Scev0));<br>
- uint64_t Size = DL->getTypeAllocSize(ScalarTy);<br>
+ Optional<int> Diff = getPointersDiff(Ptr0, PtrN, *DL, *SE);<br>
// Check that the sorted loads are consecutive.<br>
- if (Diff && Diff->getAPInt() == (VL.size() - 1) * Size) {<br>
+ if (static_cast<unsigned>(*Diff) == VL.size() - 1) {<br>
if (CurrentOrder.empty()) {<br>
// Original loads are consecutive and does not require reordering.<br>
++NumOpsWantToKeepOriginalOrder;<br>
@@ -3150,13 +3152,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,<br>
Ptr0 = PointerOps[CurrentOrder.front()];<br>
PtrN = PointerOps[CurrentOrder.back()];<br>
}<br>
- const SCEV *Scev0 = SE->getSCEV(Ptr0);<br>
- const SCEV *ScevN = SE->getSCEV(PtrN);<br>
- const auto *Diff =<br>
- dyn_cast<SCEVConstant>(SE->getMinusSCEV(ScevN, Scev0));<br>
- uint64_t Size = DL->getTypeAllocSize(ScalarTy);<br>
+ Optional<int> Dist = getPointersDiff(Ptr0, PtrN, *DL, *SE);<br>
// Check that the sorted pointer operands are consecutive.<br>
- if (Diff && Diff->getAPInt() == (VL.size() - 1) * Size) {<br>
+ if (static_cast<unsigned>(*Dist) == VL.size() - 1) {<br>
if (CurrentOrder.empty()) {<br>
// Original stores are consecutive and does not require reordering.<br>
++NumOpsWantToKeepOriginalOrder;<br>
@@ -6107,20 +6105,41 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,<br>
<br>
int E = Stores.size();<br>
SmallBitVector Tails(E, false);<br>
- SmallVector<int, 16> ConsecutiveChain(E, E + 1);<br>
int MaxIter = MaxStoreLookup.getValue();<br>
+ SmallVector<std::pair<int, int>, 16> ConsecutiveChain(<br>
+ E, std::make_pair(E, INT_MAX));<br>
+ SmallVector<SmallBitVector, 4> CheckedPairs(E, SmallBitVector(E, false));<br>
int IterCnt;<br>
auto &&FindConsecutiveAccess = [this, &Stores, &Tails, &IterCnt, MaxIter,<br>
+ &CheckedPairs,<br>
&ConsecutiveChain](int K, int Idx) {<br>
if (IterCnt >= MaxIter)<br>
return true;<br>
+ if (CheckedPairs[Idx].test(K))<br>
+ return ConsecutiveChain[K].second == 1 &&<br>
+ ConsecutiveChain[K].first == Idx;<br>
++IterCnt;<br>
- if (!isConsecutiveAccess(Stores[K], Stores[Idx], *DL, *SE))<br>
+ CheckedPairs[Idx].set(K);<br>
+ CheckedPairs[K].set(Idx);<br>
+ Optional<int> Diff = getPointersDiff(Stores[K]->getPointerOperand(),<br>
+ Stores[Idx]->getPointerOperand(), *DL,<br>
+ *SE, /*StrictCheck=*/true);<br>
+ if (!Diff || *Diff == 0)<br>
+ return false;<br>
+ int Val = *Diff;<br>
+ if (Val < 0) {<br>
+ if (ConsecutiveChain[Idx].second > -Val) {<br>
+ Tails.set(K);<br>
+ ConsecutiveChain[Idx] = std::make_pair(K, -Val);<br>
+ }<br>
+ return false;<br>
+ }<br>
+ if (ConsecutiveChain[K].second <= Val)<br>
return false;<br>
<br>
Tails.set(Idx);<br>
- ConsecutiveChain[K] = Idx;<br>
- return true;<br>
+ ConsecutiveChain[K] = std::make_pair(Idx, Val);<br>
+ return Val == 1;<br>
};<br>
// Do a quadratic search on all of the given stores in reverse order and find<br>
// all of the pairs of stores that follow each other.<br>
@@ -6140,17 +6159,31 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,<br>
// For stores that start but don't end a link in the chain:<br>
for (int Cnt = E; Cnt > 0; --Cnt) {<br>
int I = Cnt - 1;<br>
- if (ConsecutiveChain[I] == E + 1 || Tails.test(I))<br>
+ if (ConsecutiveChain[I].first == E || Tails.test(I))<br>
continue;<br>
// We found a store instr that starts a chain. Now follow the chain and try<br>
// to vectorize it.<br>
BoUpSLP::ValueList Operands;<br>
// Collect the chain into a list.<br>
- while (I != E + 1 && !VectorizedStores.count(Stores[I])) {<br>
+ while (I != E && !VectorizedStores.count(Stores[I])) {<br>
Operands.push_back(Stores[I]);<br>
+ Tails.set(I);<br>
+ if (ConsecutiveChain[I].second != 1) {<br>
+ // Mark the new end in the chain and go back, if required. It might be<br>
+ // required if the original stores come in reversed order, for example.<br>
+ if (ConsecutiveChain[I].first != E &&<br>
+ Tails.test(ConsecutiveChain[I].first) &&<br>
+ !VectorizedStores.count(Stores[ConsecutiveChain[I].first])) {<br>
+ Tails.reset(ConsecutiveChain[I].first);<br>
+ if (Cnt < ConsecutiveChain[I].first + 2)<br>
+ Cnt = ConsecutiveChain[I].first + 2;<br>
+ }<br>
+ break;<br>
+ }<br>
// Move to the next value in the chain.<br>
- I = ConsecutiveChain[I];<br>
+ I = ConsecutiveChain[I].first;<br>
}<br>
+ assert(!Operands.empty() && "Expected non-empty list of stores.");<br>
<br>
unsigned MaxVecRegSize = R.getMaxVecRegSize();<br>
unsigned EltSize = R.getVectorElementSize(Operands[0]);<br>
<br>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll<br>
index 267cf1a02c29..e28362894910 100644<br>
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll<br>
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll<br>
@@ -1,7 +1,7 @@<br>
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py<br>
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -slp-vectorizer -slp-vectorizer -mattr=+sse2 -S | FileCheck %s --check-prefix=SSE<br>
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -slp-vectorizer -slp-vectorizer -mattr=+avx -S | FileCheck %s --check-prefix=AVX<br>
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -slp-vectorizer -slp-vectorizer -mattr=+avx2 -S | FileCheck %s --check-prefix=AVX<br>
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -slp-vectorizer -mattr=+sse2 -S | FileCheck %s --check-prefix=SSE<br>
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -slp-vectorizer -mattr=+avx -S | FileCheck %s --check-prefix=AVX<br>
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -slp-vectorizer -mattr=+avx2 -S | FileCheck %s --check-prefix=AVX<br>
<br>
%class.1 = type { %class.2 }<br>
%class.2 = type { %"class.3" }<br>
@@ -117,13 +117,10 @@ define void @pr35497() local_unnamed_addr #0 {<br>
; AVX-NEXT: [[ARRAYIDX2_6:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 0<br>
; AVX-NEXT: [[TMP10:%.*]] = bitcast i64* [[ARRAYIDX2_6]] to <2 x i64>*<br>
; AVX-NEXT: store <2 x i64> [[TMP4]], <2 x i64>* [[TMP10]], align 1<br>
-; AVX-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0<br>
-; AVX-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP11]], i32 0<br>
-; AVX-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP5]], i32 1<br>
-; AVX-NEXT: [[TMP14:%.*]] = lshr <2 x i64> [[TMP13]], <i64 6, i64 6><br>
-; AVX-NEXT: [[TMP15:%.*]] = add nuw nsw <2 x i64> [[TMP9]], [[TMP14]]<br>
-; AVX-NEXT: [[TMP16:%.*]] = bitcast i64* [[ARRAYIDX2_2]] to <2 x i64>*<br>
-; AVX-NEXT: store <2 x i64> [[TMP15]], <2 x i64>* [[TMP16]], align 1<br>
+; AVX-NEXT: [[TMP11:%.*]] = lshr <2 x i64> [[TMP4]], <i64 6, i64 6><br>
+; AVX-NEXT: [[TMP12:%.*]] = add nuw nsw <2 x i64> [[TMP9]], [[TMP11]]<br>
+; AVX-NEXT: [[TMP13:%.*]] = bitcast i64* [[ARRAYIDX2_2]] to <2 x i64>*<br>
+; AVX-NEXT: store <2 x i64> [[TMP12]], <2 x i64>* [[TMP13]], align 1<br>
; AVX-NEXT: ret void<br>
;<br>
entry:<br>
<br>
<br>
<br>
_______________________________________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a><br>
<a href="https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits</a><br>
</blockquote>
</div>
</div>
</blockquote>
</div>
</div>
</div>
</blockquote>
</body>
</html>