[llvm] 74c8c2d - Revert "Recommit "[SCEV] Use nw flag and symbolic iteration count to sharpen ranges of AddRecs""

Fri Oct 16 12:36:46 PDT 2020

Author: Nikita Popov
Date: 2020-10-16T21:36:33+02:00
New Revision: 74c8c2d903e2c849a2132ae798018f8bbe00ea21

URL: https://github.com/llvm/llvm-project/commit/74c8c2d903e2c849a2132ae798018f8bbe00ea21
DIFF: https://github.com/llvm/llvm-project/commit/74c8c2d903e2c849a2132ae798018f8bbe00ea21.diff

LOG: Revert "Recommit "[SCEV] Use nw flag and symbolic iteration count to sharpen ranges of AddRecs""

This reverts commit 32b72c3165bf65cca2e8e6197b59eb4c4b60392a.

While better than before, this change still introduces a large
compile-time regression (>3% on mafft):
https://llvm-compile-time-tracker.com/compare.php?from=fbd62fe60fb2281ca33da35dc25ca3c87ec0bb51&to=32b72c3165bf65cca2e8e6197b59eb4c4b60392a&stat=instructions

Additionally, the logic here doesn't look quite right to me,
I will comment in more detail on the differential revision.

Added: 
    

Modified: 
    llvm/include/llvm/Analysis/ScalarEvolution.h
    llvm/lib/Analysis/ScalarEvolution.cpp
    llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll
    llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll
    llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index 2b905e4510bd..4581df6428af 100644

--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -1489,13 +1489,6 @@ class ScalarEvolution {
   ConstantRange getRangeForAffineAR(const SCEV *Start, const SCEV *Stop,
                                     const SCEV *MaxBECount, unsigned BitWidth);
 
-  /// Determines the range for the affine non-self-wrapping SCEVAddRecExpr {\p
-  /// Start,+,\p Stop}<nw>.
-  ConstantRange getRangeForAffineNoSelfWrappingAR(const SCEVAddRecExpr *AddRec,
-                                                  const SCEV *MaxBECount,
-                                                  unsigned BitWidth,
-                                                  RangeSignHint SignHint);
-
   /// Try to compute a range for the affine SCEVAddRecExpr {\p Start,+,\p
   /// Stop} by "factoring out" a ternary expression from the add recurrence.
   /// Helper called by \c getRange.

diff  --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 68d736020300..9d5a842058d9 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -5509,17 +5509,6 @@ ScalarEvolution::getRangeRef(const SCEV *S,
         ConservativeResult =
             ConservativeResult.intersectWith(RangeFromFactoring, RangeType);
       }
-
-      // Now try symbolic BE count and more powerful methods.
-      MaxBECount = computeMaxBackedgeTakenCount(AddRec->getLoop());
-      if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
-          getTypeSizeInBits(MaxBECount->getType()) <= BitWidth &&
-          AddRec->hasNoSelfWrap()) {
-        auto RangeFromAffineNew = getRangeForAffineNoSelfWrappingAR(
-            AddRec, MaxBECount, BitWidth, SignHint);
-        ConservativeResult =
-            ConservativeResult.intersectWith(RangeFromAffineNew, RangeType);
-      }
     }
 
     return setRange(AddRec, SignHint, std::move(ConservativeResult));
@@ -5689,67 +5678,6 @@ ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start,
   return SR.intersectWith(UR, ConstantRange::Smallest);
 }
 
-ConstantRange ScalarEvolution::getRangeForAffineNoSelfWrappingAR(
-    const SCEVAddRecExpr *AddRec, const SCEV *MaxBECount, unsigned BitWidth,
-    ScalarEvolution::RangeSignHint SignHint) {
-  assert(AddRec->isAffine() && "Non-affine AddRecs are not suppored!\n");
-  assert(AddRec->hasNoSelfWrap() &&
-         "This only works for non-self-wrapping AddRecs!");
-  const bool IsSigned = SignHint == HINT_RANGE_SIGNED;
-  const SCEV *Step = AddRec->getStepRecurrence(*this);
-  // Let's make sure that we can prove that we do not self-wrap during
-  // MaxBECount iterations. We need this because MaxBECount is a maximum
-  // iteration count estimate, and we might infer nw from some exit for which we
-  // do not know max exit count (or any other side reasoning).
-  // TODO: Turn into assert at some point.
-  MaxBECount = getNoopOrZeroExtend(MaxBECount, AddRec->getType());
-  const SCEV *RangeWidth = getNegativeSCEV(getOne(AddRec->getType()));
-  const SCEV *StepAbs = getUMinExpr(Step, getNegativeSCEV(Step));
-  const SCEV *MaxItersWithoutWrap = getUDivExpr(RangeWidth, StepAbs);
-  if (!isKnownPredicate(ICmpInst::ICMP_ULE, MaxBECount, MaxItersWithoutWrap))
-    return ConstantRange::getFull(BitWidth);
-
-  ICmpInst::Predicate LEPred =
-      IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
-  ICmpInst::Predicate GEPred =
-      IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
-  const SCEV *Start = AddRec->getStart();
-  const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this);
-  // We could handle non-constant End, but it harms compile time a lot.
-  if (!isa<SCEVConstant>(End))
-    return ConstantRange::getFull(BitWidth);
-
-  // We know that there is no self-wrap. Let's take Start and End values and
-  // look at all intermediate values V1, V2, ..., Vn that IndVar takes during
-  // the iteration. They either lie inside the range [Min(Start, End),
-  // Max(Start, End)] or outside it:
-  //
-  // Case 1:   RangeMin    ...    Start V1 ... VN End ...           RangeMax;
-  // Case 2:   RangeMin Vk ... V1 Start    ...    End Vn ... Vk + 1 RangeMax;
-  //
-  // No self wrap flag guarantees that the intermediate values cannot be BOTH
-  // outside and inside the range [Min(Start, End), Max(Start, End)]. Using that
-  // knowledge, let's try to prove that we are dealing with Case 1. It is so if
-  // Start <= End and step is positive, or Start >= End and step is negative.
-  ConstantRange StartRange =
-      IsSigned ? getSignedRange(Start) : getUnsignedRange(Start);
-  ConstantRange EndRange =
-      IsSigned ? getSignedRange(End) : getUnsignedRange(End);
-  ConstantRange RangeBetween = StartRange.unionWith(EndRange);
-  // If they already cover full iteration space, we will know nothing useful
-  // even if we prove what we want to prove.
-  if (RangeBetween.isFullSet())
-    return RangeBetween;
-
-  if (isKnownPositive(Step) &&
-      isKnownViaNonRecursiveReasoning(LEPred, Start, End))
-    return RangeBetween;
-  else if (isKnownNegative(Step) &&
-           isKnownViaNonRecursiveReasoning(GEPred, Start, End))
-    return RangeBetween;
-  return ConstantRange::getFull(BitWidth);
-}
-
 ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start,
                                                     const SCEV *Step,
                                                     const SCEV *MaxBECount,

diff  --git a/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll b/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll
index bac6deeab202..f9cb89c97161 100644
--- a/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll
+++ b/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll
@@ -7,7 +7,7 @@ define i32 @test_01(i32 %start, i32* %p, i32* %q) {
 ; CHECK-NEXT:    %0 = zext i32 %start to i64
 ; CHECK-NEXT:    --> (zext i32 %start to i64) U: [0,4294967296) S: [0,4294967296)
 ; CHECK-NEXT:    %indvars.iv = phi i64 [ %indvars.iv.next, %backedge ], [ %0, %entry ]
-; CHECK-NEXT:    --> {(zext i32 %start to i64),+,-1}<nsw><%loop> U: [0,4294967296) S: [0,4294967296) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {(zext i32 %start to i64),+,-1}<nsw><%loop> U: [-4294967295,4294967296) S: [-4294967295,4294967296) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv = phi i32 [ %start, %entry ], [ %iv.next, %backedge ]
 ; CHECK-NEXT:    --> {%start,+,-1}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = add i32 %iv, -1
@@ -21,7 +21,7 @@ define i32 @test_01(i32 %start, i32* %p, i32* %q) {
 ; CHECK-NEXT:    %stop = load i32, i32* %load.addr, align 4
 ; CHECK-NEXT:    --> %stop U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
 ; CHECK-NEXT:    %indvars.iv.next = add nsw i64 %indvars.iv, -1
-; CHECK-NEXT:    --> {(-1 + (zext i32 %start to i64))<nsw>,+,-1}<nsw><%loop> U: [-4294967296,4294967295) S: [-1,4294967295) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {(-1 + (zext i32 %start to i64))<nsw>,+,-1}<nsw><%loop> U: [-4294967296,4294967295) S: [-4294967296,4294967295) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @test_01
 ; CHECK-NEXT:  Loop %loop: <multiple exits> Unpredictable backedge-taken count.
 ; CHECK-NEXT:    exit count for loop: (zext i32 %start to i64)

diff  --git a/llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll b/llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll
index 4738369c0aca..9fb7977c207f 100644
--- a/llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll
+++ b/llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll
@@ -474,7 +474,7 @@ define void @test_10(i32 %n) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 90
 ; CHECK-NEXT:    [[UMIN:%.*]] = select i1 [[TMP2]], i64 [[TMP1]], i64 90
-; CHECK-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[UMIN]], -99
+; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[UMIN]], -99
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ -100, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]

diff  --git a/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll b/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
index 090efa44d9c0..5cc288c58e68 100644
--- a/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
+++ b/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
@@ -196,7 +196,7 @@ define void @promote_latch_condition_decrementing_loop_01(i32* %p, i32* %a) {
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[TMP0]], [[PREHEADER]] ]
 ; CHECK-NEXT:    [[EL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    store atomic i32 0, i32* [[EL]] unordered, align 4
-; CHECK-NEXT:    [[LOOPCOND:%.*]] = icmp ult i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
 ; CHECK-NEXT:    br i1 [[LOOPCOND]], label [[LOOPEXIT_LOOPEXIT:%.*]], label [[LOOP]]
 ;
@@ -241,7 +241,7 @@ define void @promote_latch_condition_decrementing_loop_02(i32* %p, i32* %a) {
 ; CHECK-NEXT:    [[EL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    store atomic i32 0, i32* [[EL]] unordered, align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
-; CHECK-NEXT:    [[LOOPCOND:%.*]] = icmp ult i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    br i1 [[LOOPCOND]], label [[LOOPEXIT_LOOPEXIT:%.*]], label [[LOOP]]
 ;
 
@@ -285,7 +285,7 @@ define void @promote_latch_condition_decrementing_loop_03(i32* %p, i32* %a) {
 ; CHECK-NEXT:    [[EL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    store atomic i32 0, i32* [[EL]] unordered, align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
-; CHECK-NEXT:    [[LOOPCOND:%.*]] = icmp ult i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    br i1 [[LOOPCOND]], label [[LOOPEXIT_LOOPEXIT:%.*]], label [[LOOP]]
 ;
 
@@ -336,7 +336,7 @@ define void @promote_latch_condition_decrementing_loop_04(i32* %p, i32* %a, i1 %
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[TMP0]], [[PREHEADER]] ]
 ; CHECK-NEXT:    [[EL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    store atomic i32 0, i32* [[EL]] unordered, align 4
-; CHECK-NEXT:    [[LOOPCOND:%.*]] = icmp ult i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
 ; CHECK-NEXT:    br i1 [[LOOPCOND]], label [[LOOPEXIT_LOOPEXIT:%.*]], label [[LOOP]]
 ;