[llvm] 38540d7 - [SCEV] Compute exit counts for unsigned IVs using mustprogress semantics

Mon Jun 7 11:24:24 PDT 2021

Author: Philip Reames
Date: 2021-06-07T11:24:00-07:00
New Revision: 38540d71c74c2f63a77993e7bfb6e52e4b0da0fc

URL: https://github.com/llvm/llvm-project/commit/38540d71c74c2f63a77993e7bfb6e52e4b0da0fc
DIFF: https://github.com/llvm/llvm-project/commit/38540d71c74c2f63a77993e7bfb6e52e4b0da0fc.diff

LOG: [SCEV] Compute exit counts for unsigned IVs using mustprogress semantics

The motivation here is simple loops with unsigned induction variables w/non-one steps. A toy example would be:
for (unsigned i = 0; i < N; i += 2) { body; }

Given C/C++ semantics, we do not get the nuw flag on the induction variable. Given that lack, we currently can't compute a bound for this loop. We can do better for many cases, depending on the contents of "body".

The basic intuition behind this patch is as follows:
* A step which evenly divides the iteration space must wrap through the same numbers repeatedly. And thus, we can ignore potential cornercases where we exit after the n-th wrap through uint32_max.
* Per C++ rules, infinite loops without side effects are UB. We already have code in SCEV which relies on this.  In LLVM, this is tied to the mustprogress attribute.

Together, these let us conclude that the trip count of this loop must come before unsigned overflow unless the body would form a well defined infinite loop.

A couple notes for those reading along:
* I reused the loop properties code which is overly conservative for this case. I may follow up in another patch to generalize it for the actual UB rules.
* We could cache the n(s/u)w facts. I left that out because doing a pre-patch which cached existing inference showed a lot of diffs I had trouble fully explaining. I plan to get back to this, but I don't want it on the critical path.

Differential Revision: https://reviews.llvm.org/D103118

Added: 
    llvm/test/Analysis/ScalarEvolution/lt-overflow.ll

Modified: 
    llvm/include/llvm/Analysis/ScalarEvolution.h
    llvm/lib/Analysis/ScalarEvolution.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index 68406c90dd27e..f78f00145108d 100644

--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -1542,6 +1542,10 @@ class ScalarEvolution {
     return getLoopProperties(L).HasNoAbnormalExits;
   }
 
+  /// Return true if this loop is finite by assumption.  That is,
+  /// to be infinite, it must also be undefined.
+  bool loopIsFiniteByAssumption(const Loop *L);
+
   /// Compute a LoopDisposition value.
   LoopDisposition computeLoopDisposition(const SCEV *S, const Loop *L);
 

diff  --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 7a8369b9ea013..273b2ec632202 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -6573,6 +6573,17 @@ ScalarEvolution::getLoopProperties(const Loop *L) {
   return Itr->second;
 }
 
+bool ScalarEvolution::loopIsFiniteByAssumption(const Loop *L) {
+  // TODO: Use the loop metadata form of mustprogress as well.
+  if (!L->getHeader()->getParent()->mustProgress())
+    return false;
+
+  // A loop without side effects must be finite.
+  // TODO: The check used here is very conservative.  It's only *specific*
+  // side effects which are well defined in infinite loops.
+  return loopHasNoSideEffects(L);
+}
+
 const SCEV *ScalarEvolution::createSCEV(Value *V) {
   if (!isSCEVable(V->getType()))
     return getUnknown(V);
@@ -11462,11 +11473,45 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
         !loopHasNoSideEffects(L))
       return getCouldNotCompute();
   } else if (!Stride->isOne() && !NoWrap) {
+    auto isUBOnWrap = [&]() {
+      // Can we prove this loop *must* be UB if overflow of IV occurs?
+      // Reasoning goes as follows:
+      // * Suppose the IV did self wrap.
+      // * If Stride evenly divides the iteration space, then once wrap
+      //   occurs, the loop must revisit the same values.
+      // * We know that RHS is invariant, and that none of those values
+      //   caused this exit to be taken previously.  Thus, this exit is
+      //   dynamically dead.
+      // * If this is the sole exit, then a dead exit implies the loop
+      //   must be infinite if there are no abnormal exits.
+      // * If the loop were infinite, then it must either not be mustprogress
+      //   or have side effects. Otherwise, it must be UB.
+      // * It can't (by assumption), be UB so we have contradicted our
+      //   premise and can conclude the IV did not in fact self-wrap.
+      // From no-self-wrap, we need to then prove no-(un)signed-wrap.  This
+      // follows trivially from the fact that every (un)signed-wrapped, but
+      // not self-wrapped value must be LT than the last value before
+      // (un)signed wrap.  Since we know that last value didn't exit, nor
+      // will any smaller one.
+
+      if (!isLoopInvariant(RHS, L))
+        return false;
+
+      auto *StrideC = dyn_cast<SCEVConstant>(Stride);
+      if (!StrideC || !StrideC->getAPInt().isPowerOf2())
+        return false;
+
+      if (!ControlsExit || !loopHasNoAbnormalExits(L))
+        return false;
+
+      return loopIsFiniteByAssumption(L);
+    };
+
     // Avoid proven overflow cases: this will ensure that the backedge taken
     // count will not generate any unsigned overflow. Relaxed no-overflow
     // conditions exploit NoWrapFlags, allowing to optimize in presence of
     // undefined behaviors like the case of C language.
-    if (canIVOverflowOnLT(RHS, Stride, IsSigned))
+    if (canIVOverflowOnLT(RHS, Stride, IsSigned) && !isUBOnWrap())
       return getCouldNotCompute();
   }
 

diff  --git a/llvm/test/Analysis/ScalarEvolution/lt-overflow.ll b/llvm/test/Analysis/ScalarEvolution/lt-overflow.ll
new file mode 100644
index 0000000000000..3d1c565821579
--- /dev/null
+++ b/llvm/test/Analysis/ScalarEvolution/lt-overflow.ll
@@ -0,0 +1,187 @@
+
+; RUN: opt %s -analyze -scalar-evolution -enable-new-pm=0 -scalar-evolution-classify-expressions=0 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; A collection of tests focused on exercising logic to prove no-unsigned wrap
+; from mustprogress semantics of loops.
+
+; CHECK: Determining loop execution counts for: @test
+; CHECK: Loop %for.body: backedge-taken count is ((-1 + (2 umax %N)) /u 2)
+; CHECK: Determining loop execution counts for: @test_preinc
+; CHECK: Loop %for.body: backedge-taken count is ((1 + %N) /u 2)
+; CHECK: Determining loop execution counts for: @test_well_defined_infinite_st
+; CHECK: Loop %for.body: Unpredictable backedge-taken count.
+; CHECK: Determining loop execution counts for: @test_well_defined_infinite_ld
+; CHECK: Loop %for.body: Unpredictable backedge-taken count.
+; CHECK: Determining loop execution counts for: @test_no_mustprogress
+; CHECK: Loop %for.body: Unpredictable backedge-taken count.
+; CHECK: Determining loop execution counts for: @test_1024
+; CHECK: Loop %for.body: backedge-taken count is ((-1 + (1024 umax %N)) /u 1024)
+; CHECK: Determining loop execution counts for: @test_uneven_divide
+; CHECK: Loop %for.body: Unpredictable backedge-taken count.
+; CHECK: Determining loop execution counts for: @test_non_invariant_rhs
+; CHECK: Loop %for.body: Unpredictable backedge-taken count.
+; CHECK: Determining loop execution counts for: @test_abnormal_exit
+; CHECK: Loop %for.body: Unpredictable backedge-taken count.
+; CHECK: Determining loop execution counts for: @test_other_exit
+; CHECK: Loop %for.body: <multiple exits> Unpredictable backedge-taken count.
+
+define void @test(i32 %N) mustprogress {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
+  %iv.next = add i32 %iv, 2
+  %cmp = icmp ult i32 %iv.next, %N
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+}
+
+define void @test_preinc(i32 %N) mustprogress {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
+  %iv.next = add i32 %iv, 2
+  %cmp = icmp ult i32 %iv, %N
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+
+}
+
+ at G = external global i32
+
+define void @test_well_defined_infinite_st(i32 %N) mustprogress {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
+  %iv.next = add i32 %iv, 2
+  store volatile i32 0, i32* @G
+  %cmp = icmp ult i32 %iv.next, %N
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+}
+
+define void @test_well_defined_infinite_ld(i32 %N) mustprogress {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
+  %iv.next = add i32 %iv, 2
+  %val = load volatile i32, i32* @G
+  %cmp = icmp ult i32 %iv.next, %N
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+}
+
+define void @test_no_mustprogress(i32 %N) {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
+  %iv.next = add i32 %iv, 2
+  %cmp = icmp ult i32 %iv.next, %N
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+
+}
+
+
+define void @test_1024(i32 %N) mustprogress {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
+  %iv.next = add i32 %iv, 1024
+  %cmp = icmp ult i32 %iv.next, %N
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+}
+
+define void @test_uneven_divide(i32 %N) mustprogress {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
+  %iv.next = add i32 %iv, 3
+  %cmp = icmp ult i32 %iv.next, %N
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+}
+
+define void @test_non_invariant_rhs() mustprogress {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
+  %iv.next = add i32 %iv, 2
+  %N = load i32, i32* @G
+  %cmp = icmp ult i32 %iv.next, %N
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+}
+
+declare void @mayexit()
+
+define void @test_abnormal_exit(i32 %N) mustprogress {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
+  %iv.next = add i32 %iv, 2
+  call void @mayexit()
+  %cmp = icmp ult i32 %iv.next, %N
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+}
+
+
+define void @test_other_exit(i32 %N) mustprogress {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i32 [ %iv.next, %for.latch ], [ 0, %entry ]
+  %iv.next = add i32 %iv, 2
+  %cmp1 = icmp ult i32 %iv.next, 20
+  br i1 %cmp1, label %for.latch, label %for.cond.cleanup
+
+for.latch:
+  %cmp2 = icmp ult i32 %iv.next, %N
+  br i1 %cmp2, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+}
+
+