[llvm] r281732 - Reapplying r278731 after fixing the problem that caused it to be reverted.

Sun Sep 18 19:17:55 PDT 2016

Hi David,

>> A loop which stores to a global variable has no side effects?

This seems to be an oversight.

How about we check that the pointer operand of the store is not a GLobalValue?
Does this seem reasonable?

Thanks,
Pankaj

From: David Majnemer [mailto:david.majnemer at gmail.com]
Sent: Friday, September 16, 2016 6:58 PM
To: Kreitzer, David L <david.l.kreitzer at intel.com<mailto:david.l.kreitzer at intel.com>>
Cc: llvm-commits <llvm-commits at lists.llvm.org<mailto:llvm-commits at lists.llvm.org>>
Subject: Re: [llvm] r281732 - Reapplying r278731 after fixing the problem that caused it to be reverted.



On Fri, Sep 16, 2016 at 7:38 AM, David L Kreitzer via llvm-commits <llvm-commits at lists.llvm.org<mailto:llvm-commits at lists.llvm.org>> wrote:
Author: dlkreitz
Date: Fri Sep 16 09:38:13 2016
New Revision: 281732

URL: http://llvm.org/viewvc/llvm-project?rev=281732&view=rev
Log:
Reapplying r278731 after fixing the problem that caused it to be reverted.

Enhance SCEV to compute the trip count for some loops with unknown stride.

Patch by Pankaj Chawla

Differential Revision: https://reviews.llvm.org/D22377

Added:
    llvm/trunk/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll
Modified:
    llvm/trunk/include/llvm/Analysis/ScalarEvolution.h
    llvm/trunk/lib/Analysis/ScalarEvolution.cpp

Modified: llvm/trunk/include/llvm/Analysis/ScalarEvolution.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/ScalarEvolution.h?rev=281732&r1=281731&r2=281732&view=diff
==============================================================================

--- llvm/trunk/include/llvm/Analysis/ScalarEvolution.h (original)
+++ llvm/trunk/include/llvm/Analysis/ScalarEvolution.h Fri Sep 16 09:38:13 2016
@@ -803,6 +803,13 @@ namespace llvm {
     /// Cache for \c loopHasNoAbnormalExits.
     DenseMap<const Loop *, bool> LoopHasNoAbnormalExits;

+    /// Cache for \c loopHasNoSideEffects.
+    DenseMap<const Loop *, bool> LoopHasNoSideEffects;
+
+    /// Returns true if \p L contains no instruction that can have side effects
+    /// (i.e. via throwing an exception, volatile or atomic access).
+    bool loopHasNoSideEffects(const Loop *L);
+
     /// Returns true if \p L contains no instruction that can abnormally exit
     /// the loop (i.e. via throwing an exception, by terminating the thread
     /// cleanly or by infinite looping in a called function).  Strictly

Modified: llvm/trunk/lib/Analysis/ScalarEvolution.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ScalarEvolution.cpp?rev=281732&r1=281731&r2=281732&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/ScalarEvolution.cpp (original)
+++ llvm/trunk/lib/Analysis/ScalarEvolution.cpp Fri Sep 16 09:38:13 2016
@@ -4953,6 +4953,28 @@ bool ScalarEvolution::isAddRecNeverPoiso
   return LatchControlDependentOnPoison && loopHasNoAbnormalExits(L);
 }

+bool ScalarEvolution::loopHasNoSideEffects(const Loop *L) {
+  auto Itr = LoopHasNoSideEffects.find(L);
+  if (Itr == LoopHasNoSideEffects.end()) {
+    auto NoSideEffectsInBB = [&](BasicBlock *BB) {
+      return all_of(*BB, [](Instruction &I) {
+        // Non-atomic, non-volatile stores are ok.
+        if (auto *SI = dyn_cast<StoreInst>(&I))
+          return SI->isSimple();


A loop which stores to a global variable has no side effects?

+
+        return !I.mayHaveSideEffects();
+      });
+    };
+
+    auto InsertPair = LoopHasNoSideEffects.insert(
+        {L, all_of(L->getBlocks(), NoSideEffectsInBB)});
+    assert(InsertPair.second && "We just checked!");
+    Itr = InsertPair.first;
+  }
+
+  return Itr->second;
+}
+
 bool ScalarEvolution::loopHasNoAbnormalExits(const Loop *L) {
   auto Itr = LoopHasNoAbnormalExits.find(L);
   if (Itr == LoopHasNoAbnormalExits.end()) {
@@ -5540,6 +5562,7 @@ void ScalarEvolution::forgetLoop(const L
     forgetLoop(I);

   LoopHasNoAbnormalExits.erase(L);
+  LoopHasNoSideEffects.erase(L);
 }

 void ScalarEvolution::forgetValue(Value *V) {
@@ -8614,6 +8637,8 @@ bool ScalarEvolution::isImpliedCondOpera

 bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
                                          bool IsSigned, bool NoWrap) {
+  assert(isKnownPositive(Stride) && "Positive stride expected!");
+
   if (NoWrap) return false;

   unsigned BitWidth = getTypeSizeInBits(RHS->getType());
@@ -8682,11 +8707,15 @@ ScalarEvolution::howManyLessThans(const
     return getCouldNotCompute();

   const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
-  if (!IV && AllowPredicates)
+  bool PredicatedIV = false;
+
+  if (!IV && AllowPredicates) {
     // Try to make this an AddRec using runtime tests, in the first X
     // iterations of this loop, where X is the SCEV expression found by the
     // algorithm below.
     IV = convertSCEVToAddRecWithPredicates(LHS, L, P);
+    PredicatedIV = true;
+  }

   // Avoid weird loops
   if (!IV || IV->getLoop() != L || !IV->isAffine())
@@ -8697,15 +8726,62 @@ ScalarEvolution::howManyLessThans(const

   const SCEV *Stride = IV->getStepRecurrence(*this);

-  // Avoid negative or zero stride values
-  if (!isKnownPositive(Stride))
-    return getCouldNotCompute();
+  bool PositiveStride = isKnownPositive(Stride);

-  // Avoid proven overflow cases: this will ensure that the backedge taken count
-  // will not generate any unsigned overflow. Relaxed no-overflow conditions
-  // exploit NoWrapFlags, allowing to optimize in presence of undefined
-  // behaviors like the case of C language.
-  if (!Stride->isOne() && doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap))
+  // Avoid negative or zero stride values.
+  if (!PositiveStride) {
+    // We can compute the correct backedge taken count for loops with unknown
+    // strides if we can prove that the loop is not an infinite loop with side
+    // effects. Here's the loop structure we are trying to handle -
+    //
+    // i = start
+    // do {
+    //   A[i] = i;
+    //   i += s;
+    // } while (i < end);
+    //
+    // The backedge taken count for such loops is evaluated as -
+    // (max(end, start + stride) - start - 1) /u stride
+    //
+    // The additional preconditions that we need to check to prove correctness
+    // of the above formula is as follows -
+    //
+    // a) IV is either nuw or nsw depending upon signedness (indicated by the
+    //    NoWrap flag).
+    // b) loop is single exit with no side effects.
+    //
+    //
+    // Precondition a) implies that if the stride is negative, this is a single
+    // trip loop. The backedge taken count formula reduces to zero in this case.
+    //
+    // Precondition b) implies that the unknown stride cannot be zero otherwise
+    // we have UB.
+    //
+    // The positive stride case is the same as isKnownPositive(Stride) returning
+    // true (original behavior of the function).
+    //
+    // We want to make sure that the stride is truly unknown as there are edge
+    // cases where ScalarEvolution propagates no wrap flags to the
+    // post-increment/decrement IV even though the increment/decrement operation
+    // itself is wrapping. The computed backedge taken count may be wrong in
+    // such cases. This is prevented by checking that the stride is not known to
+    // be either positive or non-positive. For example, no wrap flags are
+    // propagated to the post-increment IV of this loop with a trip count of 2 -
+    //
+    // unsigned char i;
+    // for(i=127; i<128; i+=129)
+    //   A[i] = i;
+    //
+    if (PredicatedIV || !NoWrap || isKnownNonPositive(Stride) ||
+        !loopHasNoSideEffects(L))
+      return getCouldNotCompute();
+
+  } else if (!Stride->isOne() &&
+             doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap))
+    // Avoid proven overflow cases: this will ensure that the backedge taken
+    // count will not generate any unsigned overflow. Relaxed no-overflow
+    // conditions exploit NoWrapFlags, allowing to optimize in presence of
+    // undefined behaviors like the case of C language.
     return getCouldNotCompute();

   ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT
@@ -8720,12 +8796,21 @@ ScalarEvolution::howManyLessThans(const
   APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin()
                             : getUnsignedRange(Start).getUnsignedMin();

-  APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin()
-                             : getUnsignedRange(Stride).getUnsignedMin();
-
   unsigned BitWidth = getTypeSizeInBits(LHS->getType());
-  APInt Limit = IsSigned ? APInt::getSignedMaxValue(BitWidth) - (MinStride - 1)
-                         : APInt::getMaxValue(BitWidth) - (MinStride - 1);
+
+  APInt StrideForMaxBECount;
+
+  if (PositiveStride)
+    StrideForMaxBECount = IsSigned ? getSignedRange(Stride).getSignedMin()
+                                   : getUnsignedRange(Stride).getUnsignedMin();
+  else
+    // Using a stride of 1 is safe when computing max backedge taken count for
+    // a loop with unknown stride.
+    StrideForMaxBECount = APInt(BitWidth, 1, IsSigned);
+
+  APInt Limit =
+      IsSigned ? APInt::getSignedMaxValue(BitWidth) - (StrideForMaxBECount - 1)
+               : APInt::getMaxValue(BitWidth) - (StrideForMaxBECount - 1);

   // Although End can be a MAX expression we estimate MaxEnd considering only
   // the case End = RHS. This is safe because in the other case (End - Start)
@@ -8739,7 +8824,7 @@ ScalarEvolution::howManyLessThans(const
     MaxBECount = BECount;
   else
     MaxBECount = computeBECount(getConstant(MaxEnd - MinStart),
-                                getConstant(MinStride), false);
+                                getConstant(StrideForMaxBECount), false);

   if (isa<SCEVCouldNotCompute>(MaxBECount))
     MaxBECount = BECount;

Added: llvm/trunk/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll?rev=281732&view=auto
==============================================================================
--- llvm/trunk/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll (added)
+++ llvm/trunk/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll Fri Sep 16 09:38:13 2016
@@ -0,0 +1,62 @@
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+
+; ScalarEvolution should be able to compute trip count of the loop by proving
+; that this is not an infinite loop with side effects.
+
+; CHECK: Determining loop execution counts for: @foo1
+; CHECK: backedge-taken count is ((-1 + %n) /u %s)
+
+; We should have a conservative estimate for the max backedge taken count for
+; loops with unknown stride.
+; CHECK: max backedge-taken count is -1
+
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+
+; Function Attrs: norecurse nounwind
+define void @foo1(i32* nocapture %A, i32 %n, i32 %s) #0 {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.05
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %add = add nsw i32 %i.05, %s
+  %cmp = icmp slt i32 %add, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+; Check that we are able to compute trip count of a loop without an entry guard.
+; CHECK: Determining loop execution counts for: @foo2
+; CHECK: backedge-taken count is ((-1 + (%n smax %s)) /u %s)
+
+; We should have a conservative estimate for the max backedge taken count for
+; loops with unknown stride.
+; CHECK: max backedge-taken count is -1
+
+; Function Attrs: norecurse nounwind
+define void @foo2(i32* nocapture %A, i32 %n, i32 %s) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.05
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %add = add nsw i32 %i.05, %s
+  %cmp = icmp slt i32 %add, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+


_______________________________________________
llvm-commits mailing list
llvm-commits at lists.llvm.org<mailto:llvm-commits at lists.llvm.org>
http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160919/06430af1/attachment.html>