[llvm] 99203f2 - [Analysis]Add getPointersDiff function to improve compile time.
Richard Smith via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 25 16:24:45 PDT 2021
Hi Alexey, this patch has introduced a miscompile for us.
On Tue, 23 Mar 2021 at 14:26, Alexey Bataev via llvm-commits <
llvm-commits at lists.llvm.org> wrote:
>
> Author: Alexey Bataev
> Date: 2021-03-23T14:25:36-07:00
> New Revision: 99203f2004d031f2ef22f01e3c569d2775de1836
>
> URL:
> https://github.com/llvm/llvm-project/commit/99203f2004d031f2ef22f01e3c569d2775de1836
> DIFF:
> https://github.com/llvm/llvm-project/commit/99203f2004d031f2ef22f01e3c569d2775de1836.diff
>
> LOG: [Analysis]Add getPointersDiff function to improve compile time.
>
> Added getPointersDiff function to LoopAccessAnalysis and used it instead
> direct calculatoin of the distance between pointers and/or
> isConsecutiveAccess function in SLP vectorizer to improve compile time
> and detection of stores consecutive chains.
>
> Part of D57059
>
> Differential Revision: https://reviews.llvm.org/D98967
>
> Added:
>
>
> Modified:
> llvm/include/llvm/Analysis/LoopAccessAnalysis.h
> llvm/lib/Analysis/LoopAccessAnalysis.cpp
> llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
> llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll
>
> Removed:
>
>
>
>
> ################################################################################
> diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
> b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
> index 13fbe884eddf..39acfd5bbbee 100644
> --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
> +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
> @@ -679,6 +679,15 @@ int64_t getPtrStride(PredicatedScalarEvolution &PSE,
> Value *Ptr, const Loop *Lp,
> const ValueToValueMap &StridesMap =
> ValueToValueMap(),
> bool Assume = false, bool ShouldCheckWrap = true);
>
> +/// Returns the distance between the pointers \p PtrA and \p PtrB iff
> they are
> +/// compatible and it is possible to calculate the distance between them.
> This
> +/// is a simple API that does not depend on the analysis pass.
> +/// \param StrictCheck Ensure that the calculated distance matches the
> +/// type-based one after all the bitcasts removal in the provided
> pointers.
> +Optional<int> getPointersDiff(Value *PtrA, Value *PtrB, const DataLayout
> &DL,
> + ScalarEvolution &SE, bool StrictCheck =
> false,
> + bool CheckType = true);
> +
> /// Attempt to sort the pointers in \p VL and return the sorted indices
> /// in \p SortedIndices, if reordering is required.
> ///
>
> diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
> b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
> index e632fe25c24c..997d4474a448 100644
> --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
> +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
> @@ -1124,139 +1124,123 @@ int64_t
> llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
> return Stride;
> }
>
> +Optional<int> llvm::getPointersDiff(Value *PtrA, Value *PtrB,
> + const DataLayout &DL, ScalarEvolution
> &SE,
> + bool StrictCheck, bool CheckType) {
> + assert(PtrA && PtrB && "Expected non-nullptr pointers.");
> + // Make sure that A and B are
> diff erent pointers.
> + if (PtrA == PtrB)
> + return 0;
> +
> + // Make sure that PtrA and PtrB have the same type if required
> + if (CheckType && PtrA->getType() != PtrB->getType())
> + return None;
> +
> + unsigned ASA = PtrA->getType()->getPointerAddressSpace();
> + unsigned ASB = PtrB->getType()->getPointerAddressSpace();
> +
> + // Check that the address spaces match.
> + if (ASA != ASB)
> + return None;
> + unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
> +
> + APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
> + Value *PtrA1 = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL,
> OffsetA);
> + Value *PtrB1 = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL,
> OffsetB);
> +
> + int Val;
> + if (PtrA1 == PtrB1) {
> + // Retrieve the address space again as pointer stripping now tracks
> through
> + // `addrspacecast`.
> + ASA = cast<PointerType>(PtrA1->getType())->getAddressSpace();
> + ASB = cast<PointerType>(PtrB1->getType())->getAddressSpace();
> + // Check that the address spaces match and that the pointers are
> valid.
> + if (ASA != ASB)
> + return None;
> +
> + IdxWidth = DL.getIndexSizeInBits(ASA);
> + OffsetA = OffsetA.sextOrTrunc(IdxWidth);
> + OffsetB = OffsetB.sextOrTrunc(IdxWidth);
> +
> + OffsetB -= OffsetA;
> + Val = OffsetB.getSExtValue();
> + } else {
> + // Otherwise compute the distance with SCEV between the base pointers.
> + const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
> + const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
> + const auto *Diff =
> + dyn_cast<SCEVConstant>(SE.getMinusSCEV(PtrSCEVB, PtrSCEVA));
> + if (!Diff)
> + return None;
> + Val = Diff->getAPInt().getSExtValue();
> + }
> + Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
> + int Size = DL.getTypeStoreSize(Ty);
> + int Dist = Val / Size;
> +
> + // Ensure that the calculated distance matches the type-based one after
> all
> + // the bitcasts removal in the provided pointers.
> + if (!StrictCheck || Dist * Size == Val)
> + return Dist;
>
Specifically, we get here, with Val == 12 and Size == 8, resulting in an
incorrect computation of Dist, for the following testcase:
; Function Attrs: nofree norecurse nounwind uwtable
define void @Int32R2.5(i8* nocapture readnone %retval, i8* noalias
nocapture readnone %run_options, i8** noalias nocapture readnone %params,
i8** noalias nocapture readonly %buffer_table, i64* noalias nocapture
readnone %prof_counters) local_unnamed_addr #0 {
entry:
%0 = getelementptr inbounds i8*, i8** %buffer_table, i64 2
%1 = bitcast i8** %0 to i32**
%2 = load i32*, i32** %1, align 8, !invariant.load !0, !dereferenceable
!1, !align !1
%3 = getelementptr inbounds i8*, i8** %buffer_table, i64 3
%4 = bitcast i8** %3 to i32**
%5 = load i32*, i32** %4, align 8, !invariant.load !0, !dereferenceable
!1, !align !1
%6 = getelementptr inbounds i8*, i8** %buffer_table, i64 1
%7 = bitcast i8** %6 to [2 x [2 x i32]]**
%8 = load [2 x [2 x i32]]*, [2 x [2 x i32]]** %7, align 8,
!invariant.load !0, !dereferenceable !2, !align !2
%9 = load i32, i32* %2, align 4, !invariant.load !0, !noalias !3
%10 = icmp sgt i32 %9, 0
%11 = load i32, i32* %5, align 4, !invariant.load !0, !noalias !3
%12 = icmp sgt i32 %11, 0
%13 = select i1 %10, i64 12, i64 0
%14 = select i1 %12, i64 4, i64 0
%15 = add nuw nsw i64 %13, %14
%scevgep6 = getelementptr [36 x i8], [36 x i8]* @0, i64 0, i64 %15
%16 = bitcast i8* %scevgep6 to i64*
%17 = bitcast [2 x [2 x i32]]* %8 to i64*
%18 = load i64, i64* %16, align 4
store i64 %18, i64* %17, align 16
%scevgep.1 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]]* %8, i64 0,
i64 1, i64 0
%19 = add nuw nsw i64 %15, 12
%scevgep6.1 = getelementptr [36 x i8], [36 x i8]* @0, i64 0, i64 %19
%20 = bitcast i8* %scevgep6.1 to i64*
%21 = bitcast i32* %scevgep.1 to i64*
%22 = load i64, i64* %20, align 4
store i64 %22, i64* %21, align 8
ret void
}
... when computing the difference between %16 and %20 in the call from
LoopAccessAnalysis.cpp:1207.
Perhaps StrictCheck should always be enabled, given that this function
truncates its result and produces a wrong answer if not?
> + return None;
> +}
> +
> bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL,
> ScalarEvolution &SE,
> SmallVectorImpl<unsigned> &SortedIndices) {
> assert(llvm::all_of(
> VL, [](const Value *V) { return V->getType()->isPointerTy();
> }) &&
> "Expected list of pointer operands.");
> - SmallVector<std::pair<int64_t, Value *>, 4> OffValPairs;
> - OffValPairs.reserve(VL.size());
> -
> // Walk over the pointers, and map each of them to an offset relative to
> // first pointer in the array.
> Value *Ptr0 = VL[0];
> - const SCEV *Scev0 = SE.getSCEV(Ptr0);
> - Value *Obj0 = getUnderlyingObject(Ptr0);
> -
> - llvm::SmallSet<int64_t, 4> Offsets;
> - for (auto *Ptr : VL) {
> - // TODO: Outline this code as a special, more time consuming, version
> of
> - // computeConstantDifference() function.
> - if (Ptr->getType()->getPointerAddressSpace() !=
> - Ptr0->getType()->getPointerAddressSpace())
> - return false;
> - // If a pointer refers to a
> diff erent underlying object, bail - the
> - // pointers are by definition incomparable.
> - Value *CurrObj = getUnderlyingObject(Ptr);
> - if (CurrObj != Obj0)
> - return false;
>
> - const SCEV *Scev = SE.getSCEV(Ptr);
> - const auto *Diff = dyn_cast<SCEVConstant>(SE.getMinusSCEV(Scev,
> Scev0));
> - // The pointers may not have a constant offset from each other, or
> SCEV
> - // may just not be smart enough to figure out they do. Regardless,
> - // there's nothing we can do.
> + using DistOrdPair = std::pair<int64_t, int>;
> + auto Compare = [](const DistOrdPair &L, const DistOrdPair &R) {
> + return L.first < R.first;
> + };
> + std::set<DistOrdPair, decltype(Compare)> Offsets(Compare);
> + Offsets.emplace(0, 0);
> + int Cnt = 1;
> + bool IsConsecutive = true;
> + for (auto *Ptr : VL.drop_front()) {
> + Optional<int> Diff = getPointersDiff(Ptr0, Ptr, DL, SE);
> if (!Diff)
> return false;
>
> // Check if the pointer with the same offset is found.
> - int64_t Offset = Diff->getAPInt().getSExtValue();
> - if (!Offsets.insert(Offset).second)
> + int64_t Offset = *Diff;
> + auto Res = Offsets.emplace(Offset, Cnt);
> + if (!Res.second)
> return false;
> - OffValPairs.emplace_back(Offset, Ptr);
> + // Consecutive order if the inserted element is the last one.
> + IsConsecutive = IsConsecutive && std::next(Res.first) ==
> Offsets.end();
> + ++Cnt;
> }
> SortedIndices.clear();
> - SortedIndices.resize(VL.size());
> - std::iota(SortedIndices.begin(), SortedIndices.end(), 0);
> -
> - // Sort the memory accesses and keep the order of their uses in
> UseOrder.
> - llvm::stable_sort(SortedIndices, [&](unsigned Left, unsigned Right) {
> - return OffValPairs[Left].first < OffValPairs[Right].first;
> - });
> -
> - // Check if the order is consecutive already.
> - if (llvm::all_of(SortedIndices, [&SortedIndices](const unsigned I) {
> - return I == SortedIndices[I];
> - }))
> - SortedIndices.clear();
> -
> + if (!IsConsecutive) {
> + // Fill SortedIndices array only if it is non-consecutive.
> + SortedIndices.resize(VL.size());
> + Cnt = 0;
> + for (const std::pair<int64_t, int> &Pair : Offsets) {
> + IsConsecutive = IsConsecutive && Cnt == Pair.second;
> + SortedIndices[Cnt] = Pair.second;
> + ++Cnt;
> + }
> + }
> return true;
> }
>
> -/// Take the address space operand from the Load/Store instruction.
> -/// Returns -1 if this is not a valid Load/Store instruction.
> -static unsigned getAddressSpaceOperand(Value *I) {
> - if (LoadInst *L = dyn_cast<LoadInst>(I))
> - return L->getPointerAddressSpace();
> - if (StoreInst *S = dyn_cast<StoreInst>(I))
> - return S->getPointerAddressSpace();
> - return -1;
> -}
> -
> /// Returns true if the memory operations \p A and \p B are consecutive.
> bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
> ScalarEvolution &SE, bool CheckType) {
> Value *PtrA = getLoadStorePointerOperand(A);
> Value *PtrB = getLoadStorePointerOperand(B);
> - unsigned ASA = getAddressSpaceOperand(A);
> - unsigned ASB = getAddressSpaceOperand(B);
> -
> - // Check that the address spaces match and that the pointers are valid.
> - if (!PtrA || !PtrB || (ASA != ASB))
> - return false;
> -
> - // Make sure that A and B are
> diff erent pointers.
> - if (PtrA == PtrB)
> - return false;
> -
> - // Make sure that A and B have the same type if required.
> - if (CheckType && PtrA->getType() != PtrB->getType())
> + if (!PtrA || !PtrB)
> return false;
> -
> - unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
> - Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
> -
> - APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
> - PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
> - PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
> -
> - // Retrieve the address space again as pointer stripping now tracks
> through
> - // `addrspacecast`.
> - ASA = cast<PointerType>(PtrA->getType())->getAddressSpace();
> - ASB = cast<PointerType>(PtrB->getType())->getAddressSpace();
> - // Check that the address spaces match and that the pointers are valid.
> - if (ASA != ASB)
> - return false;
> -
> - IdxWidth = DL.getIndexSizeInBits(ASA);
> - OffsetA = OffsetA.sextOrTrunc(IdxWidth);
> - OffsetB = OffsetB.sextOrTrunc(IdxWidth);
> -
> - APInt Size(IdxWidth, DL.getTypeStoreSize(Ty));
> -
> - // OffsetDelta = OffsetB - OffsetA;
> - const SCEV *OffsetSCEVA = SE.getConstant(OffsetA);
> - const SCEV *OffsetSCEVB = SE.getConstant(OffsetB);
> - const SCEV *OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA);
> - const APInt &OffsetDelta =
> cast<SCEVConstant>(OffsetDeltaSCEV)->getAPInt();
> -
> - // Check if they are based on the same pointer. That makes the offsets
> - // sufficient.
> - if (PtrA == PtrB)
> - return OffsetDelta == Size;
> -
> - // Compute the necessary base pointer delta to have the necessary final
> delta
> - // equal to the size.
> - // BaseDelta = Size - OffsetDelta;
> - const SCEV *SizeSCEV = SE.getConstant(Size);
> - const SCEV *BaseDelta = SE.getMinusSCEV(SizeSCEV, OffsetDeltaSCEV);
> -
> - // Otherwise compute the distance with SCEV between the base pointers.
> - const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
> - const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
> - const SCEV *X = SE.getAddExpr(PtrSCEVA, BaseDelta);
> - return X == PtrSCEVB;
> + Optional<int> Diff =
> + getPointersDiff(PtrA, PtrB, DL, SE, /*StrictCheck=*/true,
> CheckType);
> + return Diff && *Diff == 1;
> }
>
> MemoryDepChecker::VectorizationSafetyStatus
>
> diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
> b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
> index 385b6f30dc0f..78d2ea0032db 100644
> --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
> +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
> @@ -941,10 +941,16 @@ class BoUpSLP {
> ScalarEvolution &SE) {
> auto *LI1 = dyn_cast<LoadInst>(V1);
> auto *LI2 = dyn_cast<LoadInst>(V2);
> - if (LI1 && LI2)
> - return isConsecutiveAccess(LI1, LI2, DL, SE)
> - ? VLOperands::ScoreConsecutiveLoads
> - : VLOperands::ScoreFail;
> + if (LI1 && LI2) {
> + if (LI1->getParent() != LI2->getParent())
> + return VLOperands::ScoreFail;
> +
> + Optional<int> Dist =
> + getPointersDiff(LI1->getPointerOperand(),
> LI2->getPointerOperand(),
> + DL, SE, /*StrictCheck=*/true);
> + return (Dist && *Dist == 1) ? VLOperands::ScoreConsecutiveLoads
> + : VLOperands::ScoreFail;
> + }
>
> auto *C1 = dyn_cast<Constant>(V1);
> auto *C2 = dyn_cast<Constant>(V2);
> @@ -2871,13 +2877,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL,
> unsigned Depth,
> Ptr0 = PointerOps[CurrentOrder.front()];
> PtrN = PointerOps[CurrentOrder.back()];
> }
> - const SCEV *Scev0 = SE->getSCEV(Ptr0);
> - const SCEV *ScevN = SE->getSCEV(PtrN);
> - const auto *Diff =
> - dyn_cast<SCEVConstant>(SE->getMinusSCEV(ScevN, Scev0));
> - uint64_t Size = DL->getTypeAllocSize(ScalarTy);
> + Optional<int> Diff = getPointersDiff(Ptr0, PtrN, *DL, *SE);
> // Check that the sorted loads are consecutive.
> - if (Diff && Diff->getAPInt() == (VL.size() - 1) * Size) {
> + if (static_cast<unsigned>(*Diff) == VL.size() - 1) {
> if (CurrentOrder.empty()) {
> // Original loads are consecutive and does not require
> reordering.
> ++NumOpsWantToKeepOriginalOrder;
> @@ -3150,13 +3152,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL,
> unsigned Depth,
> Ptr0 = PointerOps[CurrentOrder.front()];
> PtrN = PointerOps[CurrentOrder.back()];
> }
> - const SCEV *Scev0 = SE->getSCEV(Ptr0);
> - const SCEV *ScevN = SE->getSCEV(PtrN);
> - const auto *Diff =
> - dyn_cast<SCEVConstant>(SE->getMinusSCEV(ScevN, Scev0));
> - uint64_t Size = DL->getTypeAllocSize(ScalarTy);
> + Optional<int> Dist = getPointersDiff(Ptr0, PtrN, *DL, *SE);
> // Check that the sorted pointer operands are consecutive.
> - if (Diff && Diff->getAPInt() == (VL.size() - 1) * Size) {
> + if (static_cast<unsigned>(*Dist) == VL.size() - 1) {
> if (CurrentOrder.empty()) {
> // Original stores are consecutive and does not require
> reordering.
> ++NumOpsWantToKeepOriginalOrder;
> @@ -6107,20 +6105,41 @@ bool
> SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
>
> int E = Stores.size();
> SmallBitVector Tails(E, false);
> - SmallVector<int, 16> ConsecutiveChain(E, E + 1);
> int MaxIter = MaxStoreLookup.getValue();
> + SmallVector<std::pair<int, int>, 16> ConsecutiveChain(
> + E, std::make_pair(E, INT_MAX));
> + SmallVector<SmallBitVector, 4> CheckedPairs(E, SmallBitVector(E,
> false));
> int IterCnt;
> auto &&FindConsecutiveAccess = [this, &Stores, &Tails, &IterCnt,
> MaxIter,
> + &CheckedPairs,
> &ConsecutiveChain](int K, int Idx) {
> if (IterCnt >= MaxIter)
> return true;
> + if (CheckedPairs[Idx].test(K))
> + return ConsecutiveChain[K].second == 1 &&
> + ConsecutiveChain[K].first == Idx;
> ++IterCnt;
> - if (!isConsecutiveAccess(Stores[K], Stores[Idx], *DL, *SE))
> + CheckedPairs[Idx].set(K);
> + CheckedPairs[K].set(Idx);
> + Optional<int> Diff = getPointersDiff(Stores[K]->getPointerOperand(),
> +
> Stores[Idx]->getPointerOperand(), *DL,
> + *SE, /*StrictCheck=*/true);
> + if (!Diff || *Diff == 0)
> + return false;
> + int Val = *Diff;
> + if (Val < 0) {
> + if (ConsecutiveChain[Idx].second > -Val) {
> + Tails.set(K);
> + ConsecutiveChain[Idx] = std::make_pair(K, -Val);
> + }
> + return false;
> + }
> + if (ConsecutiveChain[K].second <= Val)
> return false;
>
> Tails.set(Idx);
> - ConsecutiveChain[K] = Idx;
> - return true;
> + ConsecutiveChain[K] = std::make_pair(Idx, Val);
> + return Val == 1;
> };
> // Do a quadratic search on all of the given stores in reverse order
> and find
> // all of the pairs of stores that follow each other.
> @@ -6140,17 +6159,31 @@ bool
> SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
> // For stores that start but don't end a link in the chain:
> for (int Cnt = E; Cnt > 0; --Cnt) {
> int I = Cnt - 1;
> - if (ConsecutiveChain[I] == E + 1 || Tails.test(I))
> + if (ConsecutiveChain[I].first == E || Tails.test(I))
> continue;
> // We found a store instr that starts a chain. Now follow the chain
> and try
> // to vectorize it.
> BoUpSLP::ValueList Operands;
> // Collect the chain into a list.
> - while (I != E + 1 && !VectorizedStores.count(Stores[I])) {
> + while (I != E && !VectorizedStores.count(Stores[I])) {
> Operands.push_back(Stores[I]);
> + Tails.set(I);
> + if (ConsecutiveChain[I].second != 1) {
> + // Mark the new end in the chain and go back, if required. It
> might be
> + // required if the original stores come in reversed order, for
> example.
> + if (ConsecutiveChain[I].first != E &&
> + Tails.test(ConsecutiveChain[I].first) &&
> + !VectorizedStores.count(Stores[ConsecutiveChain[I].first])) {
> + Tails.reset(ConsecutiveChain[I].first);
> + if (Cnt < ConsecutiveChain[I].first + 2)
> + Cnt = ConsecutiveChain[I].first + 2;
> + }
> + break;
> + }
> // Move to the next value in the chain.
> - I = ConsecutiveChain[I];
> + I = ConsecutiveChain[I].first;
> }
> + assert(!Operands.empty() && "Expected non-empty list of stores.");
>
> unsigned MaxVecRegSize = R.getMaxVecRegSize();
> unsigned EltSize = R.getVectorElementSize(Operands[0]);
>
> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll
> b/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll
> index 267cf1a02c29..e28362894910 100644
> --- a/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll
> +++ b/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll
> @@ -1,7 +1,7 @@
> ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
> -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -slp-vectorizer
> -slp-vectorizer -mattr=+sse2 -S | FileCheck %s --check-prefix=SSE
> -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -slp-vectorizer
> -slp-vectorizer -mattr=+avx -S | FileCheck %s --check-prefix=AVX
> -; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -slp-vectorizer
> -slp-vectorizer -mattr=+avx2 -S | FileCheck %s --check-prefix=AVX
> +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -slp-vectorizer
> -mattr=+sse2 -S | FileCheck %s --check-prefix=SSE
> +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -slp-vectorizer
> -mattr=+avx -S | FileCheck %s --check-prefix=AVX
> +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -slp-vectorizer
> -mattr=+avx2 -S | FileCheck %s --check-prefix=AVX
>
> %class.1 = type { %class.2 }
> %class.2 = type { %"class.3" }
> @@ -117,13 +117,10 @@ define void @pr35497() local_unnamed_addr #0 {
> ; AVX-NEXT: [[ARRAYIDX2_6:%.*]] = getelementptr inbounds [0 x i64], [0
> x i64]* undef, i64 0, i64 0
> ; AVX-NEXT: [[TMP10:%.*]] = bitcast i64* [[ARRAYIDX2_6]] to <2 x i64>*
> ; AVX-NEXT: store <2 x i64> [[TMP4]], <2 x i64>* [[TMP10]], align 1
> -; AVX-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
> -; AVX-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64
> [[TMP11]], i32 0
> -; AVX-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64
> [[TMP5]], i32 1
> -; AVX-NEXT: [[TMP14:%.*]] = lshr <2 x i64> [[TMP13]], <i64 6, i64 6>
> -; AVX-NEXT: [[TMP15:%.*]] = add nuw nsw <2 x i64> [[TMP9]], [[TMP14]]
> -; AVX-NEXT: [[TMP16:%.*]] = bitcast i64* [[ARRAYIDX2_2]] to <2 x i64>*
> -; AVX-NEXT: store <2 x i64> [[TMP15]], <2 x i64>* [[TMP16]], align 1
> +; AVX-NEXT: [[TMP11:%.*]] = lshr <2 x i64> [[TMP4]], <i64 6, i64 6>
> +; AVX-NEXT: [[TMP12:%.*]] = add nuw nsw <2 x i64> [[TMP9]], [[TMP11]]
> +; AVX-NEXT: [[TMP13:%.*]] = bitcast i64* [[ARRAYIDX2_2]] to <2 x i64>*
> +; AVX-NEXT: store <2 x i64> [[TMP12]], <2 x i64>* [[TMP13]], align 1
> ; AVX-NEXT: ret void
> ;
> entry:
>
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210325/6ebc772c/attachment-0001.html>
More information about the llvm-commits
mailing list