[llvm] r281732 - Reapplying r278731 after fixing the problem that caused it to be reverted.

Fri Sep 16 15:57:58 PDT 2016

On Fri, Sep 16, 2016 at 7:38 AM, David L Kreitzer via llvm-commits <
llvm-commits at lists.llvm.org> wrote:

> Author: dlkreitz
> Date: Fri Sep 16 09:38:13 2016
> New Revision: 281732
>
> URL: http://llvm.org/viewvc/llvm-project?rev=281732&view=rev
> Log:
> Reapplying r278731 after fixing the problem that caused it to be reverted.
>
> Enhance SCEV to compute the trip count for some loops with unknown stride.
>
> Patch by Pankaj Chawla
>
> Differential Revision: https://reviews.llvm.org/D22377
>
> Added:
>     llvm/trunk/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll
> Modified:
>     llvm/trunk/include/llvm/Analysis/ScalarEvolution.h
>     llvm/trunk/lib/Analysis/ScalarEvolution.cpp
>
> Modified: llvm/trunk/include/llvm/Analysis/ScalarEvolution.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/
> llvm/Analysis/ScalarEvolution.h?rev=281732&r1=281731&r2=281732&view=diff
> ============================================================
> ==================
> --- llvm/trunk/include/llvm/Analysis/ScalarEvolution.h (original)
> +++ llvm/trunk/include/llvm/Analysis/ScalarEvolution.h Fri Sep 16
> 09:38:13 2016
> @@ -803,6 +803,13 @@ namespace llvm {
>      /// Cache for \c loopHasNoAbnormalExits.
>      DenseMap<const Loop *, bool> LoopHasNoAbnormalExits;
>
> +    /// Cache for \c loopHasNoSideEffects.
> +    DenseMap<const Loop *, bool> LoopHasNoSideEffects;
> +
> +    /// Returns true if \p L contains no instruction that can have side
> effects
> +    /// (i.e. via throwing an exception, volatile or atomic access).
> +    bool loopHasNoSideEffects(const Loop *L);
> +
>      /// Returns true if \p L contains no instruction that can abnormally
> exit
>      /// the loop (i.e. via throwing an exception, by terminating the
> thread
>      /// cleanly or by infinite looping in a called function).  Strictly
>
> Modified: llvm/trunk/lib/Analysis/ScalarEvolution.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/
> Analysis/ScalarEvolution.cpp?rev=281732&r1=281731&r2=281732&view=diff
> ============================================================
> ==================
> --- llvm/trunk/lib/Analysis/ScalarEvolution.cpp (original)
> +++ llvm/trunk/lib/Analysis/ScalarEvolution.cpp Fri Sep 16 09:38:13 2016
> @@ -4953,6 +4953,28 @@ bool ScalarEvolution::isAddRecNeverPoiso
>    return LatchControlDependentOnPoison && loopHasNoAbnormalExits(L);
>  }
>
> +bool ScalarEvolution::loopHasNoSideEffects(const Loop *L) {
> +  auto Itr = LoopHasNoSideEffects.find(L);
> +  if (Itr == LoopHasNoSideEffects.end()) {
> +    auto NoSideEffectsInBB = [&](BasicBlock *BB) {
> +      return all_of(*BB, [](Instruction &I) {
> +        // Non-atomic, non-volatile stores are ok.
> +        if (auto *SI = dyn_cast<StoreInst>(&I))
> +          return SI->isSimple();
>

A loop which stores to a global variable has no side effects?

> +
> +        return !I.mayHaveSideEffects();
> +      });
> +    };
> +
> +    auto InsertPair = LoopHasNoSideEffects.insert(
> +        {L, all_of(L->getBlocks(), NoSideEffectsInBB)});
> +    assert(InsertPair.second && "We just checked!");
> +    Itr = InsertPair.first;
> +  }
> +
> +  return Itr->second;
> +}
> +
>  bool ScalarEvolution::loopHasNoAbnormalExits(const Loop *L) {
>    auto Itr = LoopHasNoAbnormalExits.find(L);
>    if (Itr == LoopHasNoAbnormalExits.end()) {
> @@ -5540,6 +5562,7 @@ void ScalarEvolution::forgetLoop(const L
>      forgetLoop(I);
>
>    LoopHasNoAbnormalExits.erase(L);
> +  LoopHasNoSideEffects.erase(L);
>  }
>
>  void ScalarEvolution::forgetValue(Value *V) {
> @@ -8614,6 +8637,8 @@ bool ScalarEvolution::isImpliedCondOpera
>
>  bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV
> *Stride,
>                                           bool IsSigned, bool NoWrap) {
> +  assert(isKnownPositive(Stride) && "Positive stride expected!");
> +
>    if (NoWrap) return false;
>
>    unsigned BitWidth = getTypeSizeInBits(RHS->getType());
> @@ -8682,11 +8707,15 @@ ScalarEvolution::howManyLessThans(const
>      return getCouldNotCompute();
>
>    const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
> -  if (!IV && AllowPredicates)
> +  bool PredicatedIV = false;
> +
> +  if (!IV && AllowPredicates) {
>      // Try to make this an AddRec using runtime tests, in the first X
>      // iterations of this loop, where X is the SCEV expression found by
> the
>      // algorithm below.
>      IV = convertSCEVToAddRecWithPredicates(LHS, L, P);
> +    PredicatedIV = true;
> +  }
>
>    // Avoid weird loops
>    if (!IV || IV->getLoop() != L || !IV->isAffine())
> @@ -8697,15 +8726,62 @@ ScalarEvolution::howManyLessThans(const
>
>    const SCEV *Stride = IV->getStepRecurrence(*this);
>
> -  // Avoid negative or zero stride values
> -  if (!isKnownPositive(Stride))
> -    return getCouldNotCompute();
> +  bool PositiveStride = isKnownPositive(Stride);
>
> -  // Avoid proven overflow cases: this will ensure that the backedge
> taken count
> -  // will not generate any unsigned overflow. Relaxed no-overflow
> conditions
> -  // exploit NoWrapFlags, allowing to optimize in presence of undefined
> -  // behaviors like the case of C language.
> -  if (!Stride->isOne() && doesIVOverflowOnLT(RHS, Stride, IsSigned,
> NoWrap))
> +  // Avoid negative or zero stride values.
> +  if (!PositiveStride) {
> +    // We can compute the correct backedge taken count for loops with
> unknown
> +    // strides if we can prove that the loop is not an infinite loop with
> side
> +    // effects. Here's the loop structure we are trying to handle -
> +    //
> +    // i = start
> +    // do {
> +    //   A[i] = i;
> +    //   i += s;
> +    // } while (i < end);
> +    //
> +    // The backedge taken count for such loops is evaluated as -
> +    // (max(end, start + stride) - start - 1) /u stride
> +    //
> +    // The additional preconditions that we need to check to prove
> correctness
> +    // of the above formula is as follows -
> +    //
> +    // a) IV is either nuw or nsw depending upon signedness (indicated by
> the
> +    //    NoWrap flag).
> +    // b) loop is single exit with no side effects.
> +    //
> +    //
> +    // Precondition a) implies that if the stride is negative, this is a
> single
> +    // trip loop. The backedge taken count formula reduces to zero in
> this case.
> +    //
> +    // Precondition b) implies that the unknown stride cannot be zero
> otherwise
> +    // we have UB.
> +    //
> +    // The positive stride case is the same as isKnownPositive(Stride)
> returning
> +    // true (original behavior of the function).
> +    //
> +    // We want to make sure that the stride is truly unknown as there are
> edge
> +    // cases where ScalarEvolution propagates no wrap flags to the
> +    // post-increment/decrement IV even though the increment/decrement
> operation
> +    // itself is wrapping. The computed backedge taken count may be wrong
> in
> +    // such cases. This is prevented by checking that the stride is not
> known to
> +    // be either positive or non-positive. For example, no wrap flags are
> +    // propagated to the post-increment IV of this loop with a trip count
> of 2 -
> +    //
> +    // unsigned char i;
> +    // for(i=127; i<128; i+=129)
> +    //   A[i] = i;
> +    //
> +    if (PredicatedIV || !NoWrap || isKnownNonPositive(Stride) ||
> +        !loopHasNoSideEffects(L))
> +      return getCouldNotCompute();
> +
> +  } else if (!Stride->isOne() &&
> +             doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap))
> +    // Avoid proven overflow cases: this will ensure that the backedge
> taken
> +    // count will not generate any unsigned overflow. Relaxed no-overflow
> +    // conditions exploit NoWrapFlags, allowing to optimize in presence of
> +    // undefined behaviors like the case of C language.
>      return getCouldNotCompute();
>
>    ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT
> @@ -8720,12 +8796,21 @@ ScalarEvolution::howManyLessThans(const
>    APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin()
>                              : getUnsignedRange(Start).getUnsignedMin();
>
> -  APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin()
> -                             : getUnsignedRange(Stride).getUnsignedMin();
> -
>    unsigned BitWidth = getTypeSizeInBits(LHS->getType());
> -  APInt Limit = IsSigned ? APInt::getSignedMaxValue(BitWidth) -
> (MinStride - 1)
> -                         : APInt::getMaxValue(BitWidth) - (MinStride - 1);
> +
> +  APInt StrideForMaxBECount;
> +
> +  if (PositiveStride)
> +    StrideForMaxBECount = IsSigned ? getSignedRange(Stride).
> getSignedMin()
> +                                   : getUnsignedRange(Stride).
> getUnsignedMin();
> +  else
> +    // Using a stride of 1 is safe when computing max backedge taken
> count for
> +    // a loop with unknown stride.
> +    StrideForMaxBECount = APInt(BitWidth, 1, IsSigned);
> +
> +  APInt Limit =
> +      IsSigned ? APInt::getSignedMaxValue(BitWidth) -
> (StrideForMaxBECount - 1)
> +               : APInt::getMaxValue(BitWidth) - (StrideForMaxBECount - 1);
>
>    // Although End can be a MAX expression we estimate MaxEnd considering
> only
>    // the case End = RHS. This is safe because in the other case (End -
> Start)
> @@ -8739,7 +8824,7 @@ ScalarEvolution::howManyLessThans(const
>      MaxBECount = BECount;
>    else
>      MaxBECount = computeBECount(getConstant(MaxEnd - MinStart),
> -                                getConstant(MinStride), false);
> +                                getConstant(StrideForMaxBECount), false);
>
>    if (isa<SCEVCouldNotCompute>(MaxBECount))
>      MaxBECount = BECount;
>
> Added: llvm/trunk/test/Analysis/ScalarEvolution/trip-count-
> unknown-stride.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
> Analysis/ScalarEvolution/trip-count-unknown-stride.ll?rev=281732&view=auto
> ============================================================
> ==================
> --- llvm/trunk/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll
> (added)
> +++ llvm/trunk/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll
> Fri Sep 16 09:38:13 2016
> @@ -0,0 +1,62 @@
> +; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
> +
> +; ScalarEvolution should be able to compute trip count of the loop by
> proving
> +; that this is not an infinite loop with side effects.
> +
> +; CHECK: Determining loop execution counts for: @foo1
> +; CHECK: backedge-taken count is ((-1 + %n) /u %s)
> +
> +; We should have a conservative estimate for the max backedge taken count
> for
> +; loops with unknown stride.
> +; CHECK: max backedge-taken count is -1
> +
> +target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
> +
> +; Function Attrs: norecurse nounwind
> +define void @foo1(i32* nocapture %A, i32 %n, i32 %s) #0 {
> +entry:
> +  %cmp4 = icmp sgt i32 %n, 0
> +  br i1 %cmp4, label %for.body, label %for.end
> +
> +for.body:                                         ; preds = %entry,
> %for.body
> +  %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
> +  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.05
> +  %0 = load i32, i32* %arrayidx, align 4
> +  %inc = add nsw i32 %0, 1
> +  store i32 %inc, i32* %arrayidx, align 4
> +  %add = add nsw i32 %i.05, %s
> +  %cmp = icmp slt i32 %add, %n
> +  br i1 %cmp, label %for.body, label %for.end
> +
> +for.end:                                          ; preds = %for.body,
> %entry
> +  ret void
> +}
> +
> +
> +; Check that we are able to compute trip count of a loop without an entry
> guard.
> +; CHECK: Determining loop execution counts for: @foo2
> +; CHECK: backedge-taken count is ((-1 + (%n smax %s)) /u %s)
> +
> +; We should have a conservative estimate for the max backedge taken count
> for
> +; loops with unknown stride.
> +; CHECK: max backedge-taken count is -1
> +
> +; Function Attrs: norecurse nounwind
> +define void @foo2(i32* nocapture %A, i32 %n, i32 %s) #0 {
> +entry:
> +  br label %for.body
> +
> +for.body:                                         ; preds = %entry,
> %for.body
> +  %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
> +  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.05
> +  %0 = load i32, i32* %arrayidx, align 4
> +  %inc = add nsw i32 %0, 1
> +  store i32 %inc, i32* %arrayidx, align 4
> +  %add = add nsw i32 %i.05, %s
> +  %cmp = icmp slt i32 %add, %n
> +  br i1 %cmp, label %for.body, label %for.end
> +
> +for.end:                                          ; preds = %for.body,
> %entry
> +  ret void
> +}
> +
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160916/9eaa181d/attachment.html>