<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Fri, Sep 16, 2016 at 7:38 AM, David L Kreitzer via llvm-commits <span dir="ltr"><<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: dlkreitz<br>

Date: Fri Sep 16 09:38:13 2016<br>

New Revision: 281732<br>

<br>

URL: <a href="http://llvm.org/viewvc/llvm-project?rev=281732&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project?rev=281732&view=rev</a><br>

Log:<br>

Reapplying r278731 after fixing the problem that caused it to be reverted.<br>

<br>

Enhance SCEV to compute the trip count for some loops with unknown stride.<br>

<br>

Patch by Pankaj Chawla<br>

<br>

Differential Revision: <a href="https://reviews.llvm.org/D22377" rel="noreferrer" target="_blank">https://reviews.llvm.org/<wbr>D22377</a><br>

<br>

Added:<br>

    llvm/trunk/test/Analysis/<wbr>ScalarEvolution/trip-count-<wbr>unknown-stride.ll<br>

Modified:<br>

    llvm/trunk/include/llvm/<wbr>Analysis/ScalarEvolution.h<br>

    llvm/trunk/lib/Analysis/<wbr>ScalarEvolution.cpp<br>

<br>

Modified: llvm/trunk/include/llvm/<wbr>Analysis/ScalarEvolution.h<br>

URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/ScalarEvolution.h?rev=281732&r1=281731&r2=281732&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/include/<wbr>llvm/Analysis/ScalarEvolution.<wbr>h?rev=281732&r1=281731&r2=<wbr>281732&view=diff</a><br>

==============================<wbr>==============================<wbr>==================<br>

--- llvm/trunk/include/llvm/<wbr>Analysis/ScalarEvolution.h (original)<br>

+++ llvm/trunk/include/llvm/<wbr>Analysis/ScalarEvolution.h Fri Sep 16 09:38:13 2016<br>

@@ -803,6 +803,13 @@ namespace llvm {<br>

     /// Cache for \c loopHasNoAbnormalExits.<br>

     DenseMap<const Loop *, bool> LoopHasNoAbnormalExits;<br>

<br>

+    /// Cache for \c loopHasNoSideEffects.<br>

+    DenseMap<const Loop *, bool> LoopHasNoSideEffects;<br>

+<br>

+    /// Returns true if \p L contains no instruction that can have side effects<br>

+    /// (i.e. via throwing an exception, volatile or atomic access).<br>

+    bool loopHasNoSideEffects(const Loop *L);<br>

+<br>

     /// Returns true if \p L contains no instruction that can abnormally exit<br>

     /// the loop (i.e. via throwing an exception, by terminating the thread<br>

     /// cleanly or by infinite looping in a called function).  Strictly<br>

<br>

Modified: llvm/trunk/lib/Analysis/<wbr>ScalarEvolution.cpp<br>

URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ScalarEvolution.cpp?rev=281732&r1=281731&r2=281732&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/lib/<wbr>Analysis/ScalarEvolution.cpp?<wbr>rev=281732&r1=281731&r2=<wbr>281732&view=diff</a><br>

==============================<wbr>==============================<wbr>==================<br>

--- llvm/trunk/lib/Analysis/<wbr>ScalarEvolution.cpp (original)<br>

+++ llvm/trunk/lib/Analysis/<wbr>ScalarEvolution.cpp Fri Sep 16 09:38:13 2016<br>

@@ -4953,6 +4953,28 @@ bool ScalarEvolution::<wbr>isAddRecNeverPoiso<br>

   return LatchControlDependentOnPoison && loopHasNoAbnormalExits(L);<br>

 }<br>

<br>

+bool ScalarEvolution::<wbr>loopHasNoSideEffects(const Loop *L) {<br>

+  auto Itr = LoopHasNoSideEffects.find(L);<br>

+  if (Itr == LoopHasNoSideEffects.end()) {<br>

+    auto NoSideEffectsInBB = [&](BasicBlock *BB) {<br>

+      return all_of(*BB, [](Instruction &I) {<br>

+        // Non-atomic, non-volatile stores are ok.<br>

+        if (auto *SI = dyn_cast<StoreInst>(&I))<br>

+          return SI->isSimple();<br></blockquote><div><br></div><div>A loop which stores to a global variable has no side effects?</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

+<br>

+        return !I.mayHaveSideEffects();<br>

+      });<br>

+    };<br>

+<br>

+    auto InsertPair = LoopHasNoSideEffects.insert(<br>

+        {L, all_of(L->getBlocks(), NoSideEffectsInBB)});<br>

+    assert(InsertPair.second && "We just checked!");<br>

+    Itr = InsertPair.first;<br>

+  }<br>

+<br>

+  return Itr->second;<br>

+}<br>

+<br>

 bool ScalarEvolution::<wbr>loopHasNoAbnormalExits(const Loop *L) {<br>

   auto Itr = LoopHasNoAbnormalExits.find(L)<wbr>;<br>

   if (Itr == LoopHasNoAbnormalExits.end()) {<br>

@@ -5540,6 +5562,7 @@ void ScalarEvolution::forgetLoop(<wbr>const L<br>

     forgetLoop(I);<br>

<br>

   LoopHasNoAbnormalExits.erase(<wbr>L);<br>

+  LoopHasNoSideEffects.erase(L);<br>

 }<br>

<br>

 void ScalarEvolution::forgetValue(<wbr>Value *V) {<br>

@@ -8614,6 +8637,8 @@ bool ScalarEvolution::<wbr>isImpliedCondOpera<br>

<br>

 bool ScalarEvolution::<wbr>doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,<br>

                                          bool IsSigned, bool NoWrap) {<br>

+  assert(isKnownPositive(Stride) && "Positive stride expected!");<br>

+<br>

   if (NoWrap) return false;<br>

<br>

   unsigned BitWidth = getTypeSizeInBits(RHS-><wbr>getType());<br>

@@ -8682,11 +8707,15 @@ ScalarEvolution::<wbr>howManyLessThans(const<br>

     return getCouldNotCompute();<br>

<br>

   const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);<br>

-  if (!IV && AllowPredicates)<br>

+  bool PredicatedIV = false;<br>

+<br>

+  if (!IV && AllowPredicates) {<br>

     // Try to make this an AddRec using runtime tests, in the first X<br>

     // iterations of this loop, where X is the SCEV expression found by the<br>

     // algorithm below.<br>

     IV = convertSCEVToAddRecWithPredica<wbr>tes(LHS, L, P);<br>

+    PredicatedIV = true;<br>

+  }<br>

<br>

   // Avoid weird loops<br>

   if (!IV || IV->getLoop() != L || !IV->isAffine())<br>

@@ -8697,15 +8726,62 @@ ScalarEvolution::<wbr>howManyLessThans(const<br>

<br>

   const SCEV *Stride = IV->getStepRecurrence(*this);<br>

<br>

-  // Avoid negative or zero stride values<br>

-  if (!isKnownPositive(Stride))<br>

-    return getCouldNotCompute();<br>

+  bool PositiveStride = isKnownPositive(Stride);<br>

<br>

-  // Avoid proven overflow cases: this will ensure that the backedge taken count<br>

-  // will not generate any unsigned overflow. Relaxed no-overflow conditions<br>

-  // exploit NoWrapFlags, allowing to optimize in presence of undefined<br>

-  // behaviors like the case of C language.<br>

-  if (!Stride->isOne() && doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap))<br>

+  // Avoid negative or zero stride values.<br>

+  if (!PositiveStride) {<br>

+    // We can compute the correct backedge taken count for loops with unknown<br>

+    // strides if we can prove that the loop is not an infinite loop with side<br>

+    // effects. Here's the loop structure we are trying to handle -<br>

+    //<br>

+    // i = start<br>

+    // do {<br>

+    //   A[i] = i;<br>

+    //   i += s;<br>

+    // } while (i < end);<br>

+    //<br>

+    // The backedge taken count for such loops is evaluated as -<br>

+    // (max(end, start + stride) - start - 1) /u stride<br>

+    //<br>

+    // The additional preconditions that we need to check to prove correctness<br>

+    // of the above formula is as follows -<br>

+    //<br>

+    // a) IV is either nuw or nsw depending upon signedness (indicated by the<br>

+    //    NoWrap flag).<br>

+    // b) loop is single exit with no side effects.<br>

+    //<br>

+    //<br>

+    // Precondition a) implies that if the stride is negative, this is a single<br>

+    // trip loop. The backedge taken count formula reduces to zero in this case.<br>

+    //<br>

+    // Precondition b) implies that the unknown stride cannot be zero otherwise<br>

+    // we have UB.<br>

+    //<br>

+    // The positive stride case is the same as isKnownPositive(Stride) returning<br>

+    // true (original behavior of the function).<br>

+    //<br>

+    // We want to make sure that the stride is truly unknown as there are edge<br>

+    // cases where ScalarEvolution propagates no wrap flags to the<br>

+    // post-increment/decrement IV even though the increment/decrement operation<br>

+    // itself is wrapping. The computed backedge taken count may be wrong in<br>

+    // such cases. This is prevented by checking that the stride is not known to<br>

+    // be either positive or non-positive. For example, no wrap flags are<br>

+    // propagated to the post-increment IV of this loop with a trip count of 2 -<br>

+    //<br>

+    // unsigned char i;<br>

+    // for(i=127; i<128; i+=129)<br>

+    //   A[i] = i;<br>

+    //<br>

+    if (PredicatedIV || !NoWrap || isKnownNonPositive(Stride) ||<br>

+        !loopHasNoSideEffects(L))<br>

+      return getCouldNotCompute();<br>

+<br>

+  } else if (!Stride->isOne() &&<br>

+             doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap))<br>

+    // Avoid proven overflow cases: this will ensure that the backedge taken<br>

+    // count will not generate any unsigned overflow. Relaxed no-overflow<br>

+    // conditions exploit NoWrapFlags, allowing to optimize in presence of<br>

+    // undefined behaviors like the case of C language.<br>

     return getCouldNotCompute();<br>

<br>

   ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT<br>

@@ -8720,12 +8796,21 @@ ScalarEvolution::<wbr>howManyLessThans(const<br>

   APInt MinStart = IsSigned ? getSignedRange(Start).<wbr>getSignedMin()<br>

                             : getUnsignedRange(Start).<wbr>getUnsignedMin();<br>

<br>

-  APInt MinStride = IsSigned ? getSignedRange(Stride).<wbr>getSignedMin()<br>

-                             : getUnsignedRange(Stride).<wbr>getUnsignedMin();<br>

-<br>

   unsigned BitWidth = getTypeSizeInBits(LHS-><wbr>getType());<br>

-  APInt Limit = IsSigned ? APInt::getSignedMaxValue(<wbr>BitWidth) - (MinStride - 1)<br>

-                         : APInt::getMaxValue(BitWidth) - (MinStride - 1);<br>

+<br>

+  APInt StrideForMaxBECount;<br>

+<br>

+  if (PositiveStride)<br>

+    StrideForMaxBECount = IsSigned ? getSignedRange(Stride).<wbr>getSignedMin()<br>

+                                   : getUnsignedRange(Stride).<wbr>getUnsignedMin();<br>

+  else<br>

+    // Using a stride of 1 is safe when computing max backedge taken count for<br>

+    // a loop with unknown stride.<br>

+    StrideForMaxBECount = APInt(BitWidth, 1, IsSigned);<br>

+<br>

+  APInt Limit =<br>

+      IsSigned ? APInt::getSignedMaxValue(<wbr>BitWidth) - (StrideForMaxBECount - 1)<br>

+               : APInt::getMaxValue(BitWidth) - (StrideForMaxBECount - 1);<br>

<br>

   // Although End can be a MAX expression we estimate MaxEnd considering only<br>

   // the case End = RHS. This is safe because in the other case (End - Start)<br>

@@ -8739,7 +8824,7 @@ ScalarEvolution::<wbr>howManyLessThans(const<br>

     MaxBECount = BECount;<br>

   else<br>

     MaxBECount = computeBECount(getConstant(<wbr>MaxEnd - MinStart),<br>

-                                getConstant(MinStride), false);<br>

+                                getConstant(<wbr>StrideForMaxBECount), false);<br>

<br>

   if (isa<SCEVCouldNotCompute>(<wbr>MaxBECount))<br>

     MaxBECount = BECount;<br>

<br>

Added: llvm/trunk/test/Analysis/<wbr>ScalarEvolution/trip-count-<wbr>unknown-stride.ll<br>

URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll?rev=281732&view=auto" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/test/<wbr>Analysis/ScalarEvolution/trip-<wbr>count-unknown-stride.ll?rev=<wbr>281732&view=auto</a><br>

==============================<wbr>==============================<wbr>==================<br>

--- llvm/trunk/test/Analysis/<wbr>ScalarEvolution/trip-count-<wbr>unknown-stride.ll (added)<br>

+++ llvm/trunk/test/Analysis/<wbr>ScalarEvolution/trip-count-<wbr>unknown-stride.ll Fri Sep 16 09:38:13 2016<br>

@@ -0,0 +1,62 @@<br>

+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s<br>

+<br>

+; ScalarEvolution should be able to compute trip count of the loop by proving<br>

+; that this is not an infinite loop with side effects.<br>

+<br>

+; CHECK: Determining loop execution counts for: @foo1<br>

+; CHECK: backedge-taken count is ((-1 + %n) /u %s)<br>

+<br>

+; We should have a conservative estimate for the max backedge taken count for<br>

+; loops with unknown stride.<br>

+; CHECK: max backedge-taken count is -1<br>

+<br>

+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:<wbr>32-n8:16:32-S128"<br>

+<br>

+; Function Attrs: norecurse nounwind<br>

+define void @foo1(i32* nocapture %A, i32 %n, i32 %s) #0 {<br>

+entry:<br>

+  %cmp4 = icmp sgt i32 %n, 0<br>

+  br i1 %cmp4, label %for.body, label %for.end<br>

+<br>

+for.body:                                         ; preds = %entry, %for.body<br>

+  %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]<br>

+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.05<br>

+  %0 = load i32, i32* %arrayidx, align 4<br>

+  %inc = add nsw i32 %0, 1<br>

+  store i32 %inc, i32* %arrayidx, align 4<br>

+  %add = add nsw i32 %i.05, %s<br>

+  %cmp = icmp slt i32 %add, %n<br>

+  br i1 %cmp, label %for.body, label %for.end<br>

+<br>

+for.end:                                          ; preds = %for.body, %entry<br>

+  ret void<br>

+}<br>

+<br>

+<br>

+; Check that we are able to compute trip count of a loop without an entry guard.<br>

+; CHECK: Determining loop execution counts for: @foo2<br>

+; CHECK: backedge-taken count is ((-1 + (%n smax %s)) /u %s)<br>

+<br>

+; We should have a conservative estimate for the max backedge taken count for<br>

+; loops with unknown stride.<br>

+; CHECK: max backedge-taken count is -1<br>

+<br>

+; Function Attrs: norecurse nounwind<br>

+define void @foo2(i32* nocapture %A, i32 %n, i32 %s) #0 {<br>

+entry:<br>

+  br label %for.body<br>

+<br>

+for.body:                                         ; preds = %entry, %for.body<br>

+  %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]<br>

+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.05<br>

+  %0 = load i32, i32* %arrayidx, align 4<br>

+  %inc = add nsw i32 %0, 1<br>

+  store i32 %inc, i32* %arrayidx, align 4<br>

+  %add = add nsw i32 %i.05, %s<br>

+  %cmp = icmp slt i32 %add, %n<br>

+  br i1 %cmp, label %for.body, label %for.end<br>

+<br>

+for.end:                                          ; preds = %for.body, %entry<br>

+  ret void<br>

+}<br>

+<br>

<br>

<br>

______________________________<wbr>_________________<br>

llvm-commits mailing list<br>

<a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a><br>

<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/<wbr>mailman/listinfo/llvm-commits</a><br>

</blockquote></div><br></div></div>