[llvm] r273864 - Fixed consecutive memory access detection in Loop Vectorizer.

Eric Liu via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 28 07:36:19 PDT 2016


Hi Elena,

I see this is a re-commit of a previous patch but wondering if new changes
you introduced in this re-commit have been reviewed or not?

One of our internal test cases fails when built with llvm that is sync to
this revision (the test passes with both r273863 llvm and gcc). I am trying
to find a minimal test case that can reproduce the bug, but please could
you take a second look at the changes? Thanks a lot!

Cheers,
Eric

On Mon, Jun 27, 2016 at 1:26 PM Elena Demikhovsky via llvm-commits <
llvm-commits at lists.llvm.org> wrote:

> Author: delena
> Date: Mon Jun 27 06:19:23 2016
> New Revision: 273864
>
> URL: http://llvm.org/viewvc/llvm-project?rev=273864&view=rev
> Log:
> Fixed consecutive memory access detection in Loop Vectorizer.
> It did not handle correctly cases without GEP.
>
> The following loop wasn't vectorized:
>
> for (int i=0; i<len; i++)
>
>   *to++ = *from++;
>
> I use getPtrStride() to find Stride for memory access and return 0 is the
> Stride is not 1 or -1.
>
> Re-commit rL273257 - revision: http://reviews.llvm.org/D20789
>
>
> Added:
>     llvm/trunk/test/Transforms/LoopVectorize/consec_no_gep.ll
>       - copied unchanged from r273257,
> llvm/trunk/test/Transforms/LoopVectorize/consec_no_gep.ll
>     llvm/trunk/test/Transforms/LoopVectorize/consec_no_gep2.ll
> Modified:
>     llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h
>     llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp
>     llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
>     llvm/trunk/test/Transforms/LoopVectorize/ptr-induction.ll
>
> Modified: llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h?rev=273864&r1=273863&r2=273864&view=diff
>
> ==============================================================================
> --- llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h (original)
> +++ llvm/trunk/include/llvm/Analysis/LoopAccessAnalysis.h Mon Jun 27
> 06:19:23 2016
> @@ -679,9 +679,11 @@ const SCEV *replaceSymbolicStrideSCEV(Pr
>  /// to \p PtrToStride and therefore add further predicates to \p PSE.
>  /// The \p Assume parameter indicates if we are allowed to make additional
>  /// run-time assumptions.
> +/// The \p ShouldCheckWrap indicates that we should ensure that address
> +/// calculation does not wrap.
>  int getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop
> *Lp,
>                   const ValueToValueMap &StridesMap = ValueToValueMap(),
> -                 bool Assume = false);
> +                 bool Assume = false, bool ShouldCheckWrap = true);
>
>  /// \brief Returns true if the memory operations \p A and \p B are
> consecutive.
>  /// This is a simple API that does not depend on the analysis pass.
>
> Modified: llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp?rev=273864&r1=273863&r2=273864&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp (original)
> +++ llvm/trunk/lib/Analysis/LoopAccessAnalysis.cpp Mon Jun 27 06:19:23 2016
> @@ -866,7 +866,7 @@ static bool isNoWrapAddRec(Value *Ptr, c
>  /// \brief Check whether the access through \p Ptr has a constant stride.
>  int llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
>                         const Loop *Lp, const ValueToValueMap &StridesMap,
> -                       bool Assume) {
> +                       bool Assume, bool ShouldCheckWrap) {
>    Type *Ty = Ptr->getType();
>    assert(Ty->isPointerTy() && "Unexpected non-ptr");
>
> @@ -905,9 +905,9 @@ int llvm::getPtrStride(PredicatedScalarE
>    // to access the pointer value "0" which is undefined behavior in
> address
>    // space 0, therefore we can also vectorize this case.
>    bool IsInBoundsGEP = isInBoundsGep(Ptr);
> -  bool IsNoWrapAddRec =
> -      PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) ||
> -      isNoWrapAddRec(Ptr, AR, PSE, Lp);
> +  bool IsNoWrapAddRec = !ShouldCheckWrap ||
> +    PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) ||
> +    isNoWrapAddRec(Ptr, AR, PSE, Lp);
>    bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
>    if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
>      if (Assume) {
>
> Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=273864&r1=273863&r2=273864&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
> +++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Mon Jun 27
> 06:19:23 2016
> @@ -2242,87 +2242,13 @@ Value *InnerLoopVectorizer::getStepVecto
>  }
>
>  int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
> -  assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr");
> -  auto *SE = PSE.getSE();
> -  // Make sure that the pointer does not point to structs.
> -  if (Ptr->getType()->getPointerElementType()->isAggregateType())
> -    return 0;
> -
> -  // If this value is a pointer induction variable, we know it is
> consecutive.
> -  PHINode *Phi = dyn_cast_or_null<PHINode>(Ptr);
> -  if (Phi && Inductions.count(Phi)) {
> -    InductionDescriptor II = Inductions[Phi];
> -    return II.getConsecutiveDirection();
> -  }
> -
> -  GetElementPtrInst *Gep = getGEPInstruction(Ptr);
> -  if (!Gep)
> -    return 0;
> -
> -  unsigned NumOperands = Gep->getNumOperands();
> -  Value *GpPtr = Gep->getPointerOperand();
> -  // If this GEP value is a consecutive pointer induction variable and
> all of
> -  // the indices are constant, then we know it is consecutive.
> -  Phi = dyn_cast<PHINode>(GpPtr);
> -  if (Phi && Inductions.count(Phi)) {
> -
> -    // Make sure that the pointer does not point to structs.
> -    PointerType *GepPtrType = cast<PointerType>(GpPtr->getType());
> -    if (GepPtrType->getElementType()->isAggregateType())
> -      return 0;
> -
> -    // Make sure that all of the index operands are loop invariant.
> -    for (unsigned i = 1; i < NumOperands; ++i)
> -      if (!SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop))
> -        return 0;
> -
> -    InductionDescriptor II = Inductions[Phi];
> -    return II.getConsecutiveDirection();
> -  }
>
> -  unsigned InductionOperand = getGEPInductionOperand(Gep);
> -
> -  // Check that all of the gep indices are uniform except for our
> induction
> -  // operand.
> -  for (unsigned i = 0; i != NumOperands; ++i)
> -    if (i != InductionOperand &&
> -        !SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop))
> -      return 0;
> -
> -  // We can emit wide load/stores only if the last non-zero index is the
> -  // induction variable.
> -  const SCEV *Last = nullptr;
> -  if (!getSymbolicStrides() || !getSymbolicStrides()->count(Gep))
> -    Last = PSE.getSCEV(Gep->getOperand(InductionOperand));
> -  else {
> -    // Because of the multiplication by a stride we can have a s/zext
> cast.
> -    // We are going to replace this stride by 1 so the cast is safe to
> ignore.
> -    //
> -    //  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next,
> %for.body ]
> -    //  %0 = trunc i64 %indvars.iv to i32
> -    //  %mul = mul i32 %0, %Stride1
> -    //  %idxprom = zext i32 %mul to i64  << Safe cast.
> -    //  %arrayidx = getelementptr inbounds i32* %B, i64 %idxprom
> -    //
> -    Last = replaceSymbolicStrideSCEV(PSE, *getSymbolicStrides(),
> -                                     Gep->getOperand(InductionOperand),
> Gep);
> -    if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(Last))
> -      Last =
> -          (C->getSCEVType() == scSignExtend || C->getSCEVType() ==
> scZeroExtend)
> -              ? C->getOperand()
> -              : Last;
> -  }
> -  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Last)) {
> -    const SCEV *Step = AR->getStepRecurrence(*SE);
> -
> -    // The memory is consecutive because the last index is consecutive
> -    // and all other indices are loop invariant.
> -    if (Step->isOne())
> -      return 1;
> -    if (Step->isAllOnesValue())
> -      return -1;
> -  }
> +  const ValueToValueMap &Strides = getSymbolicStrides() ?
> *getSymbolicStrides() :
> +    ValueToValueMap();
>
> +  int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false);
> +  if (Stride == 1 || Stride == -1)
> +    return Stride;
>    return 0;
>  }
>
> @@ -2658,7 +2584,9 @@ void InnerLoopVectorizer::vectorizeMemor
>    // Handle consecutive loads/stores.
>    GetElementPtrInst *Gep = getGEPInstruction(Ptr);
>    if (ConsecutiveStride) {
> -    if (Gep && Legal->isInductionVariable(Gep->getPointerOperand())) {
> +    if (Gep &&
> +
> !PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getPointerOperand()),
> +                                      OrigLoop)) {
>        setDebugLocFromInst(Builder, Gep);
>        Value *PtrOperand = Gep->getPointerOperand();
>        Value *FirstBasePtr = getVectorValue(PtrOperand)[0];
> @@ -2671,9 +2599,6 @@ void InnerLoopVectorizer::vectorizeMemor
>        Ptr = Builder.Insert(Gep2);
>      } else if (Gep) {
>        setDebugLocFromInst(Builder, Gep);
> -
> assert(PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getPointerOperand()),
> -                                          OrigLoop) &&
> -             "Base ptr must be invariant");
>        // The last index does not have to be the induction. It can be
>        // consecutive and be a function of the index. For example A[I+1];
>        unsigned NumOperands = Gep->getNumOperands();
> @@ -2702,8 +2627,6 @@ void InnerLoopVectorizer::vectorizeMemor
>        }
>        Ptr = Builder.Insert(Gep2);
>      } else { // No GEP
> -      // Use the induction element ptr.
> -      assert(isa<PHINode>(Ptr) && "Invalid induction ptr");
>        setDebugLocFromInst(Builder, Ptr);
>        VectorParts &PtrVal = getVectorValue(Ptr);
>        Ptr = Builder.CreateExtractElement(PtrVal[0], Zero);
>
> Added: llvm/trunk/test/Transforms/LoopVectorize/consec_no_gep2.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/consec_no_gep2.ll?rev=273864&view=auto
>
> ==============================================================================
> --- llvm/trunk/test/Transforms/LoopVectorize/consec_no_gep2.ll (added)
> +++ llvm/trunk/test/Transforms/LoopVectorize/consec_no_gep2.ll Mon Jun 27
> 06:19:23 2016
> @@ -0,0 +1,34 @@
> +; RUN: opt < %s -loop-vectorize -S | FileCheck %s
> +target datalayout = "E-m:e-i64:64-n32:64"
> +target triple = "powerpc64-unknown-linux-gnu"
> +
> +; CHECK-LABEL: @img2buf
> +; CHECK: store <4 x i32>
> +; Function Attrs: nounwind
> +define void @img2buf(i64 %val, i8* nocapture %buf, i32 %N)
> local_unnamed_addr #0 {
> +entry:
> +  br label %l2
> +
> +l2:
> +  br label %for.body57.us
> +
> +for.body57.us:
> +  %indvars.iv24 = phi i64 [ %val, %l2 ], [ %indvars.iv.next25, %
> for.body57.us ]
> +  %0 = trunc i64 %indvars.iv24 to i32
> +  %add77.us = add i32 5, %0
> +  %mul78.us = shl nsw i32 %add77.us, 2
> +  %idx.ext79.us = sext i32 %mul78.us to i64
> +  %add.ptr80.us = getelementptr inbounds i8, i8* %buf, i64 %idx.ext79.us
> +  %ui32.0.add.ptr80.sroa_cast.us = bitcast i8* %add.ptr80.us to i32*
> +  store i32 0, i32* %ui32.0.add.ptr80.sroa_cast.us, align 1
> +  %indvars.iv.next25 = add nsw i64 %indvars.iv24, 1
> +  %lftr.wideiv26 = trunc i64 %indvars.iv.next25 to i32
> +  %exitcond27 = icmp eq i32 %lftr.wideiv26, %N
> +  br i1 %exitcond27, label %l3, label %for.body57.us
> +
> +l3:
> +  ret void
> +}
> +
> +attributes #0 = { nounwind "disable-tail-calls"="false"
> "less-precise-fpmad"="false" "no-frame-pointer-elim"="false"
> "no-infs-fp-math"="false" "no-jump-tables"="false"
> "no-nans-fp-math"="false" "stack-protector-buffer-size"="8"
> "target-cpu"="ppc64"
> "target-features"="+altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-qpx,-vsx"
> "unsafe-fp-math"="false" "use-soft-float"="false" }
> +
>
> Modified: llvm/trunk/test/Transforms/LoopVectorize/ptr-induction.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/ptr-induction.ll?rev=273864&r1=273863&r2=273864&view=diff
>
> ==============================================================================
> --- llvm/trunk/test/Transforms/LoopVectorize/ptr-induction.ll (original)
> +++ llvm/trunk/test/Transforms/LoopVectorize/ptr-induction.ll Mon Jun 27
> 06:19:23 2016
> @@ -18,6 +18,7 @@ while.body.preheader:
>  while.body:                                       ; preds =
> %while.body.preheader, %while.body
>    %a.pn = phi i32* [ %incdec.ptr8, %while.body ], [ %a,
> %while.body.preheader ]
>    %acc.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
> +  %a1.pn = getelementptr inbounds i32, i32* %a.pn, i64 0
>    %incdec.ptr8 = getelementptr inbounds i32, i32* %a.pn, i64 1
>    %0 = load i32, i32* %incdec.ptr8, align 1
>    %add = add nuw nsw i32 %0, %acc.07
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160628/04ffcf53/attachment.html>


More information about the llvm-commits mailing list