[llvm] 32b72c3 - Recommit "[SCEV] Use nw flag and symbolic iteration count to sharpen ranges of AddRecs"

Fri Oct 16 03:36:11 PDT 2020

Author: Max Kazantsev
Date: 2020-10-16T17:35:13+07:00
New Revision: 32b72c3165bf65cca2e8e6197b59eb4c4b60392a

URL: https://github.com/llvm/llvm-project/commit/32b72c3165bf65cca2e8e6197b59eb4c4b60392a
DIFF: https://github.com/llvm/llvm-project/commit/32b72c3165bf65cca2e8e6197b59eb4c4b60392a.diff

LOG: Recommit "[SCEV] Use nw flag and symbolic iteration count to sharpen ranges of AddRecs"

It was reverted because of negative compile time impact. In this version,
less powerful proof methods are used (non-recursive reasoning only), and
scope limited to constant End values to avoid explision of complex proofs.

Differential Revision: https://reviews.llvm.org/D89381

Added: 
    

Modified: 
    llvm/include/llvm/Analysis/ScalarEvolution.h
    llvm/lib/Analysis/ScalarEvolution.cpp
    llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll
    llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll
    llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index 4581df6428af..2b905e4510bd 100644

--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -1489,6 +1489,13 @@ class ScalarEvolution {
   ConstantRange getRangeForAffineAR(const SCEV *Start, const SCEV *Stop,
                                     const SCEV *MaxBECount, unsigned BitWidth);
 
+  /// Determines the range for the affine non-self-wrapping SCEVAddRecExpr {\p
+  /// Start,+,\p Stop}<nw>.
+  ConstantRange getRangeForAffineNoSelfWrappingAR(const SCEVAddRecExpr *AddRec,
+                                                  const SCEV *MaxBECount,
+                                                  unsigned BitWidth,
+                                                  RangeSignHint SignHint);
+
   /// Try to compute a range for the affine SCEVAddRecExpr {\p Start,+,\p
   /// Stop} by "factoring out" a ternary expression from the add recurrence.
   /// Helper called by \c getRange.

diff  --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 9d5a842058d9..68d736020300 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -5509,6 +5509,17 @@ ScalarEvolution::getRangeRef(const SCEV *S,
         ConservativeResult =
             ConservativeResult.intersectWith(RangeFromFactoring, RangeType);
       }
+
+      // Now try symbolic BE count and more powerful methods.
+      MaxBECount = computeMaxBackedgeTakenCount(AddRec->getLoop());
+      if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
+          getTypeSizeInBits(MaxBECount->getType()) <= BitWidth &&
+          AddRec->hasNoSelfWrap()) {
+        auto RangeFromAffineNew = getRangeForAffineNoSelfWrappingAR(
+            AddRec, MaxBECount, BitWidth, SignHint);
+        ConservativeResult =
+            ConservativeResult.intersectWith(RangeFromAffineNew, RangeType);
+      }
     }
 
     return setRange(AddRec, SignHint, std::move(ConservativeResult));
@@ -5678,6 +5689,67 @@ ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start,
   return SR.intersectWith(UR, ConstantRange::Smallest);
 }
 
+ConstantRange ScalarEvolution::getRangeForAffineNoSelfWrappingAR(
+    const SCEVAddRecExpr *AddRec, const SCEV *MaxBECount, unsigned BitWidth,
+    ScalarEvolution::RangeSignHint SignHint) {
+  assert(AddRec->isAffine() && "Non-affine AddRecs are not suppored!\n");
+  assert(AddRec->hasNoSelfWrap() &&
+         "This only works for non-self-wrapping AddRecs!");
+  const bool IsSigned = SignHint == HINT_RANGE_SIGNED;
+  const SCEV *Step = AddRec->getStepRecurrence(*this);
+  // Let's make sure that we can prove that we do not self-wrap during
+  // MaxBECount iterations. We need this because MaxBECount is a maximum
+  // iteration count estimate, and we might infer nw from some exit for which we
+  // do not know max exit count (or any other side reasoning).
+  // TODO: Turn into assert at some point.
+  MaxBECount = getNoopOrZeroExtend(MaxBECount, AddRec->getType());
+  const SCEV *RangeWidth = getNegativeSCEV(getOne(AddRec->getType()));
+  const SCEV *StepAbs = getUMinExpr(Step, getNegativeSCEV(Step));
+  const SCEV *MaxItersWithoutWrap = getUDivExpr(RangeWidth, StepAbs);
+  if (!isKnownPredicate(ICmpInst::ICMP_ULE, MaxBECount, MaxItersWithoutWrap))
+    return ConstantRange::getFull(BitWidth);
+
+  ICmpInst::Predicate LEPred =
+      IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
+  ICmpInst::Predicate GEPred =
+      IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
+  const SCEV *Start = AddRec->getStart();
+  const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this);
+  // We could handle non-constant End, but it harms compile time a lot.
+  if (!isa<SCEVConstant>(End))
+    return ConstantRange::getFull(BitWidth);
+
+  // We know that there is no self-wrap. Let's take Start and End values and
+  // look at all intermediate values V1, V2, ..., Vn that IndVar takes during
+  // the iteration. They either lie inside the range [Min(Start, End),
+  // Max(Start, End)] or outside it:
+  //
+  // Case 1:   RangeMin    ...    Start V1 ... VN End ...           RangeMax;
+  // Case 2:   RangeMin Vk ... V1 Start    ...    End Vn ... Vk + 1 RangeMax;
+  //
+  // No self wrap flag guarantees that the intermediate values cannot be BOTH
+  // outside and inside the range [Min(Start, End), Max(Start, End)]. Using that
+  // knowledge, let's try to prove that we are dealing with Case 1. It is so if
+  // Start <= End and step is positive, or Start >= End and step is negative.
+  ConstantRange StartRange =
+      IsSigned ? getSignedRange(Start) : getUnsignedRange(Start);
+  ConstantRange EndRange =
+      IsSigned ? getSignedRange(End) : getUnsignedRange(End);
+  ConstantRange RangeBetween = StartRange.unionWith(EndRange);
+  // If they already cover full iteration space, we will know nothing useful
+  // even if we prove what we want to prove.
+  if (RangeBetween.isFullSet())
+    return RangeBetween;
+
+  if (isKnownPositive(Step) &&
+      isKnownViaNonRecursiveReasoning(LEPred, Start, End))
+    return RangeBetween;
+  else if (isKnownNegative(Step) &&
+           isKnownViaNonRecursiveReasoning(GEPred, Start, End))
+    return RangeBetween;
+  return ConstantRange::getFull(BitWidth);
+}
+
 ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start,
                                                     const SCEV *Step,
                                                     const SCEV *MaxBECount,

diff  --git a/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll b/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll
index f9cb89c97161..bac6deeab202 100644
--- a/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll
+++ b/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll
@@ -7,7 +7,7 @@ define i32 @test_01(i32 %start, i32* %p, i32* %q) {
 ; CHECK-NEXT:    %0 = zext i32 %start to i64
 ; CHECK-NEXT:    --> (zext i32 %start to i64) U: [0,4294967296) S: [0,4294967296)
 ; CHECK-NEXT:    %indvars.iv = phi i64 [ %indvars.iv.next, %backedge ], [ %0, %entry ]
-; CHECK-NEXT:    --> {(zext i32 %start to i64),+,-1}<nsw><%loop> U: [-4294967295,4294967296) S: [-4294967295,4294967296) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {(zext i32 %start to i64),+,-1}<nsw><%loop> U: [0,4294967296) S: [0,4294967296) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv = phi i32 [ %start, %entry ], [ %iv.next, %backedge ]
 ; CHECK-NEXT:    --> {%start,+,-1}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = add i32 %iv, -1
@@ -21,7 +21,7 @@ define i32 @test_01(i32 %start, i32* %p, i32* %q) {
 ; CHECK-NEXT:    %stop = load i32, i32* %load.addr, align 4
 ; CHECK-NEXT:    --> %stop U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
 ; CHECK-NEXT:    %indvars.iv.next = add nsw i64 %indvars.iv, -1
-; CHECK-NEXT:    --> {(-1 + (zext i32 %start to i64))<nsw>,+,-1}<nsw><%loop> U: [-4294967296,4294967295) S: [-4294967296,4294967295) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {(-1 + (zext i32 %start to i64))<nsw>,+,-1}<nsw><%loop> U: [-4294967296,4294967295) S: [-1,4294967295) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @test_01
 ; CHECK-NEXT:  Loop %loop: <multiple exits> Unpredictable backedge-taken count.
 ; CHECK-NEXT:    exit count for loop: (zext i32 %start to i64)

diff  --git a/llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll b/llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll
index 9fb7977c207f..4738369c0aca 100644
--- a/llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll
+++ b/llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll
@@ -474,7 +474,7 @@ define void @test_10(i32 %n) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 90
 ; CHECK-NEXT:    [[UMIN:%.*]] = select i1 [[TMP2]], i64 [[TMP1]], i64 90
-; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[UMIN]], -99
+; CHECK-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[UMIN]], -99
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ -100, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]

diff  --git a/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll b/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
index 5cc288c58e68..090efa44d9c0 100644
--- a/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
+++ b/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
@@ -196,7 +196,7 @@ define void @promote_latch_condition_decrementing_loop_01(i32* %p, i32* %a) {
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[TMP0]], [[PREHEADER]] ]
 ; CHECK-NEXT:    [[EL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    store atomic i32 0, i32* [[EL]] unordered, align 4
-; CHECK-NEXT:    [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[LOOPCOND:%.*]] = icmp ult i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
 ; CHECK-NEXT:    br i1 [[LOOPCOND]], label [[LOOPEXIT_LOOPEXIT:%.*]], label [[LOOP]]
 ;
@@ -241,7 +241,7 @@ define void @promote_latch_condition_decrementing_loop_02(i32* %p, i32* %a) {
 ; CHECK-NEXT:    [[EL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    store atomic i32 0, i32* [[EL]] unordered, align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
-; CHECK-NEXT:    [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[LOOPCOND:%.*]] = icmp ult i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    br i1 [[LOOPCOND]], label [[LOOPEXIT_LOOPEXIT:%.*]], label [[LOOP]]
 ;
 
@@ -285,7 +285,7 @@ define void @promote_latch_condition_decrementing_loop_03(i32* %p, i32* %a) {
 ; CHECK-NEXT:    [[EL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    store atomic i32 0, i32* [[EL]] unordered, align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
-; CHECK-NEXT:    [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[LOOPCOND:%.*]] = icmp ult i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    br i1 [[LOOPCOND]], label [[LOOPEXIT_LOOPEXIT:%.*]], label [[LOOP]]
 ;
 
@@ -336,7 +336,7 @@ define void @promote_latch_condition_decrementing_loop_04(i32* %p, i32* %a, i1 %
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[TMP0]], [[PREHEADER]] ]
 ; CHECK-NEXT:    [[EL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    store atomic i32 0, i32* [[EL]] unordered, align 4
-; CHECK-NEXT:    [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[LOOPCOND:%.*]] = icmp ult i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
 ; CHECK-NEXT:    br i1 [[LOOPCOND]], label [[LOOPEXIT_LOOPEXIT:%.*]], label [[LOOP]]
 ;