[llvm] 0a357ad - [SCEV] Support non-constant step in howFarToZero (#94411)

Wed Jun 5 08:05:12 PDT 2024

Author: Philip Reames
Date: 2024-06-05T08:05:07-07:00
New Revision: 0a357adc75aa6f9d6b9fe815a5da937128cfee56

URL: https://github.com/llvm/llvm-project/commit/0a357adc75aa6f9d6b9fe815a5da937128cfee56
DIFF: https://github.com/llvm/llvm-project/commit/0a357adc75aa6f9d6b9fe815a5da937128cfee56.diff

LOG: [SCEV] Support non-constant step in howFarToZero (#94411)

VF * vscale is the canonical step for a scalably vectorized loop, and
LFTR canonicalizes to NE loop tests, so having our trip count logic be
unable to compute trip counts for such loops is unfortunate.

The existing code needed minimal generalization to handle non-constant
strides. The tricky cases to be sure we handle correctly are: zero, and
-1 (due to the special case of abs(-1) being non-positive).

This patch does the full generalization in terms of code structure, but
in practice, this seems unlikely to benefit
anything beyond the (C * vscale) case. I did some quick investigation,
and it seems the context free non-zero, and sign checks are basically
never disproved for arbitrary scales. I think we have alternate tactics
available for these, but I'm going to return to that in a separate
patch.

Added: 
    

Modified: 
    llvm/lib/Analysis/ScalarEvolution.cpp
    llvm/test/Analysis/ScalarEvolution/scalable-vector.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 3b9aa9ab623f8..9808308cbfed9 100644

--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -10483,15 +10483,9 @@ ScalarEvolution::ExitLimit ScalarEvolution::howFarToZero(const SCEV *V,
   // Get the initial value for the loop.
   const SCEV *Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop());
   const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop());
-
-  // For now we handle only constant steps.
-  //
-  // TODO: Handle a nonconstant Step given AddRec<NUW>. If the
-  // AddRec is NUW, then (in an unsigned sense) it cannot be counting up to wrap
-  // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step.
-  // We have not yet seen any such cases.
   const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step);
-  if (!StepC || StepC->getValue()->isZero())
+
+  if (!isLoopInvariant(Step, L) || !isKnownNonZero(Step))
     return getCouldNotCompute();
 
   // For positive steps (counting up until unsigned overflow):
@@ -10499,13 +10493,16 @@ ScalarEvolution::ExitLimit ScalarEvolution::howFarToZero(const SCEV *V,
   // For negative steps (counting down to zero):
   //   N = Start/-Step
   // First compute the unsigned distance from zero in the direction of Step.
-  bool CountDown = StepC->getAPInt().isNegative();
-  const SCEV *Distance = CountDown ? Start : getNegativeSCEV(Start);
+  bool CountDown = isKnownNegative(Step);
+  if (!CountDown && !isKnownNonNegative(Step))
+    return getCouldNotCompute();
 
+  const SCEV *Distance = CountDown ? Start : getNegativeSCEV(Start);
   // Handle unitary steps, which cannot wraparound.
   // 1*N = -Start; -1*N = Start (mod 2^BW), so:
   //   N = Distance (as unsigned)
-  if (StepC->getValue()->isOne() || StepC->getValue()->isMinusOne()) {
+  if (StepC &&
+      (StepC->getValue()->isOne() || StepC->getValue()->isMinusOne())) {
     APInt MaxBECount = getUnsignedRangeMax(applyLoopGuards(Distance, L));
     MaxBECount = APIntOps::umin(MaxBECount, getUnsignedRangeMax(Distance));
 
@@ -10550,6 +10547,8 @@ ScalarEvolution::ExitLimit ScalarEvolution::howFarToZero(const SCEV *V,
   }
 
   // Solve the general equation.
+  if (!StepC)
+    return getCouldNotCompute();
   const SCEV *E = SolveLinEquationWithOverflow(StepC->getAPInt(),
                                                getNegativeSCEV(Start), *this);
 

diff  --git a/llvm/test/Analysis/ScalarEvolution/scalable-vector.ll b/llvm/test/Analysis/ScalarEvolution/scalable-vector.ll
index 81434d1bf064c..0a3ec4d66301b 100644
--- a/llvm/test/Analysis/ScalarEvolution/scalable-vector.ll
+++ b/llvm/test/Analysis/ScalarEvolution/scalable-vector.ll
@@ -91,13 +91,14 @@ define void @vscale_step_ne_tripcount(i64 %N) vscale_range(2, 1024) {
 ; CHECK-NEXT:    %n.vec = sub i64 %n.rnd.up, %n.mod.vf
 ; CHECK-NEXT:    --> (4 * vscale * ((-1 + (4 * vscale)<nuw><nsw> + %N) /u (4 * vscale)<nuw><nsw>)) U: [0,-3) S: [-9223372036854775808,9223372036854775805)
 ; CHECK-NEXT:    %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; CHECK-NEXT:    --> {0,+,(4 * vscale)<nuw><nsw>}<nuw><%vector.body> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: <<Unknown>> LoopDispositions: { %vector.body: Computable }
+; CHECK-NEXT:    --> {0,+,(4 * vscale)<nuw><nsw>}<nuw><%vector.body> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: (4 * vscale * ((-1 * vscale * (4 + (-4 * ((-1 + (4 * vscale)<nuw><nsw> + %N) /u (4 * vscale)<nuw><nsw>))<nsw>)<nsw>) /u (4 * vscale)<nuw><nsw>)) LoopDispositions: { %vector.body: Computable }
 ; CHECK-NEXT:    %index.next = add nuw i64 %index, %2
-; CHECK-NEXT:    --> {(4 * vscale)<nuw><nsw>,+,(4 * vscale)<nuw><nsw>}<nuw><%vector.body> U: [8,-3) S: [-9223372036854775808,9223372036854775805) Exits: <<Unknown>> LoopDispositions: { %vector.body: Computable }
+; CHECK-NEXT:    --> {(4 * vscale)<nuw><nsw>,+,(4 * vscale)<nuw><nsw>}<nuw><%vector.body> U: [8,-3) S: [-9223372036854775808,9223372036854775805) Exits: (vscale * (4 + (4 * ((-1 * vscale * (4 + (-4 * ((-1 + (4 * vscale)<nuw><nsw> + %N) /u (4 * vscale)<nuw><nsw>))<nsw>)<nsw>) /u (4 * vscale)<nuw><nsw>))<nuw><nsw>)<nuw>) LoopDispositions: { %vector.body: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @vscale_step_ne_tripcount
-; CHECK-NEXT:  Loop %vector.body: Unpredictable backedge-taken count.
-; CHECK-NEXT:  Loop %vector.body: Unpredictable constant max backedge-taken count.
-; CHECK-NEXT:  Loop %vector.body: Unpredictable symbolic max backedge-taken count.
+; CHECK-NEXT:  Loop %vector.body: backedge-taken count is ((-1 * vscale * (4 + (-4 * ((-1 + (4 * vscale)<nuw><nsw> + %N) /u (4 * vscale)<nuw><nsw>))<nsw>)<nsw>) /u (4 * vscale)<nuw><nsw>)
+; CHECK-NEXT:  Loop %vector.body: constant max backedge-taken count is i64 2305843009213693951
+; CHECK-NEXT:  Loop %vector.body: symbolic max backedge-taken count is ((-1 * vscale * (4 + (-4 * ((-1 + (4 * vscale)<nuw><nsw> + %N) /u (4 * vscale)<nuw><nsw>))<nsw>)<nsw>) /u (4 * vscale)<nuw><nsw>)
+; CHECK-NEXT:  Loop %vector.body: Trip multiple is 1
 ;
 entry:
   %0 = sub i64 -1, %N