[llvm] 932ae48 - [IndVars] Improve handling of multi-exit loops with known symbolic counts

Mon Jan 23 21:58:43 PST 2023

Author: Max Kazantsev
Date: 2023-01-24T12:46:48+07:00
New Revision: 932ae48c273c590449649ed3cc7b752511a85665

URL: https://github.com/llvm/llvm-project/commit/932ae48c273c590449649ed3cc7b752511a85665
DIFF: https://github.com/llvm/llvm-project/commit/932ae48c273c590449649ed3cc7b752511a85665.diff

LOG: [IndVars] Improve handling of multi-exit loops with known symbolic counts

This patch does two things, both related to support of multi-exit loops with
many exits that have known symbolic max exit count. They can theoretically
go independently, but I don't know how to write a test showing separate
impact.

Part 1: `SkipLastIter` can be set to `true` not when a particular exit has exit
count same as the whole loop (and therefore it must exit on the last iteration),
but when the aggregate of first few exits has umin same as whole loop exit count.
It means that it's not known which of them will exit exactly, but one of them will.

Part 2: when `SkipLastIter` is set, and exit count is `umin(a, b, c)`, instead of
`umin(a, b, c) - 1` use `umin(a - 1, b - 1, c - 1)`. We don't care about overflows
here, but the further logic knows how to deal with umin by element, but the
`SCEVAddExpr` node will confuse it.

Differential Revision: https://reviews.llvm.org/D141361
Reviewed By: nikic

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
    llvm/test/Transforms/IndVarSimplify/turn-to-invariant.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 6e1aabae06d3..c834e51b5f29 100644

--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1403,8 +1403,17 @@ createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB,
   }
 
   if (SkipLastIter) {
-    const SCEV *One = SE->getOne(MaxIter->getType());
-    MaxIter = SE->getMinusSCEV(MaxIter, One);
+    // Semantically skip last iter is "subtract 1, do not bother about unsigned
+    // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal
+    // with umin in a smart way, but umin(a, b) - 1 will likely not simplify.
+    // So we manually construct umin(a - 1, b - 1).
+    SmallVector<const SCEV *, 4> Elements;
+    if (auto *UMin = dyn_cast<SCEVUMinExpr>(MaxIter)) {
+      for (auto *Op : UMin->operands())
+        Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType())));
+      MaxIter = SE->getUMinFromMismatchedTypes(Elements);
+    } else
+      MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType()));
   }
 
   // Check if there is a loop-invariant predicate equivalent to our check.
@@ -1724,6 +1733,19 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
 
   bool Changed = false;
   bool SkipLastIter = false;
+  const SCEV *CurrMaxExit = SE->getCouldNotCompute();
+  auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) {
+    if (SkipLastIter || isa<SCEVCouldNotCompute>(MaxExitCount))
+      return;
+    if (isa<SCEVCouldNotCompute>(CurrMaxExit))
+      CurrMaxExit = MaxExitCount;
+    else
+      CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount);
+    // If the loop has more than 1 iteration, all further checks will be
+    // executed 1 iteration less.
+    if (CurrMaxExit == MaxBECount)
+      SkipLastIter = true;
+  };
   SmallSet<const SCEV *, 8> DominatingExactExitCounts;
   for (BasicBlock *ExitingBB : ExitingBlocks) {
     const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB);
@@ -1759,17 +1781,11 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
         Changed = true;
       else if (SkipLastIter && OptimizeCond(true))
         Changed = true;
-      if (MaxBECount == MaxExitCount)
-        // If the loop has more than 1 iteration, all further checks will be
-        // executed 1 iteration less.
-        SkipLastIter = true;
+      UpdateSkipLastIter(MaxExitCount);
       continue;
     }
 
-    if (MaxBECount == MaxExitCount)
-      // If the loop has more than 1 iteration, all further checks will be
-      // executed 1 iteration less.
-      SkipLastIter = true;
+    UpdateSkipLastIter(ExactExitCount);
 
     // If we know we'd exit on the first iteration, rewrite the exit to
     // reflect this.  This does not imply the loop must exit through this

diff  --git a/llvm/test/Transforms/IndVarSimplify/turn-to-invariant.ll b/llvm/test/Transforms/IndVarSimplify/turn-to-invariant.ll
index 809468f8d4e9..bd5b17ace2e9 100644
--- a/llvm/test/Transforms/IndVarSimplify/turn-to-invariant.ll
+++ b/llvm/test/Transforms/IndVarSimplify/turn-to-invariant.ll
@@ -842,13 +842,14 @@ failed:
   ret i32 -3
 }
 
-; TODO: Same as test_litter_conditions, but an extra check with known exact exit count is preventing the opt.
+; Same as test_litter_conditions, but an extra check with known exact exit count is preventing the opt.
 define i32 @test_litter_conditions_constant(i32 %start, i32 %len) {
 ; CHECK-LABEL: @test_litter_conditions_constant(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[START:%.*]], -1
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[START]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
 ; CHECK-NEXT:    [[CANONICAL_IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[CANONICAL_IV_NEXT:%.*]], [[BACKEDGE]] ]
 ; CHECK-NEXT:    [[CONSTANT_CHECK:%.*]] = icmp ult i32 [[CANONICAL_IV]], 65635
 ; CHECK-NEXT:    br i1 [[CONSTANT_CHECK]], label [[CONSTANT_CHECK_PASSED:%.*]], label [[CONSTANT_CHECK_FAILED:%.*]]
@@ -858,10 +859,9 @@ define i32 @test_litter_conditions_constant(i32 %start, i32 %len) {
 ; CHECK-NEXT:    [[AND_1:%.*]] = and i1 [[ZERO_CHECK]], [[FAKE_1]]
 ; CHECK-NEXT:    br i1 [[AND_1]], label [[RANGE_CHECK_BLOCK:%.*]], label [[FAILED_1:%.*]]
 ; CHECK:       range_check_block:
-; CHECK-NEXT:    [[IV_MINUS_1:%.*]] = add i32 [[IV]], -1
-; CHECK-NEXT:    [[RANGE_CHECK:%.*]] = icmp ult i32 [[IV_MINUS_1]], [[LEN:%.*]]
+; CHECK-NEXT:    [[RANGE_CHECK_FIRST_ITER:%.*]] = icmp ult i32 [[TMP0]], [[LEN:%.*]]
 ; CHECK-NEXT:    [[FAKE_2:%.*]] = call i1 @cond()
-; CHECK-NEXT:    [[AND_2:%.*]] = and i1 [[RANGE_CHECK]], [[FAKE_2]]
+; CHECK-NEXT:    [[AND_2:%.*]] = and i1 [[RANGE_CHECK_FIRST_ITER]], [[FAKE_2]]
 ; CHECK-NEXT:    br i1 [[AND_2]], label [[BACKEDGE]], label [[FAILED_2:%.*]]
 ; CHECK:       backedge:
 ; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], -1