[llvm] 602916d - [IndVars] Apply more optimistic SkipLastIter for AND/OR conditions

Mon Jan 23 21:45:21 PST 2023

Author: Max Kazantsev
Date: 2023-01-24T12:26:22+07:00
New Revision: 602916d2defa95cf4bec76e458be4375dbe70e4a

URL: https://github.com/llvm/llvm-project/commit/602916d2defa95cf4bec76e458be4375dbe70e4a
DIFF: https://github.com/llvm/llvm-project/commit/602916d2defa95cf4bec76e458be4375dbe70e4a.diff

LOG: [IndVars] Apply more optimistic SkipLastIter for AND/OR conditions

When exit by condition `C1` dominates exit by condition `C2`, and
max symbolic exit count for `C1` matches those for loop, we will
apply more optimistic logic to `C2` by setting `SkipLastIter` for it,
meaning that it will do 1 iteration less because the dominating branch
must exit on the last loop iteration.

But when we have a single exit by condition `C1 & C2`, we cannot
apply the same logic, because there is no dominating condition.

However, if we can prove that the symbolic max exit count of `C1 & C2`
matches those of `C1`, it means that for `C2` we can assume that it
doesn't matter on the last iteration (because the whole thing is `false`
because `C1` must be `false`). Therefore, in this situation, we can handle
`C2` as if it had `SkipLastIter`.

Differential Revision: https://reviews.llvm.org/D139934
Reviewed By: nikic

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
    llvm/test/Transforms/IndVarSimplify/X86/widening-vs-and-elimination.ll
    llvm/test/Transforms/IndVarSimplify/turn-to-invariant.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index afc78e2255929..6e1aabae06d36 100644

--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1465,11 +1465,47 @@ static bool optimizeLoopExitWithUnknownExitCount(
           LeafConditions.push_back(ICmp);
   } while (!Worklist.empty());
 
+  // If the current basic block has the same exit count as the whole loop, and
+  // it consists of multiple icmp's, try to collect all icmp's that give exact
+  // same exit count. For all other icmp's, we could use one less iteration,
+  // because their value on the last iteration doesn't really matter.
+  SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter;
+  if (!SkipLastIter && LeafConditions.size() > 1 &&
+      SE->getExitCount(L, ExitingBB,
+                       ScalarEvolution::ExitCountKind::SymbolicMaximum) ==
+          MaxIter)
+    for (auto *ICmp : LeafConditions) {
+      auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted,
+                                             /*ControlsExit*/ false);
+      auto *ExitMax = EL.SymbolicMaxNotTaken;
+      if (isa<SCEVCouldNotCompute>(ExitMax))
+        continue;
+      // They could be of 
diff erent types (specifically this happens after
+      // IV widening).
+      auto *WiderType =
+          SE->getWiderType(ExitMax->getType(), MaxIter->getType());
+      auto *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType);
+      auto *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType);
+      if (WideExitMax == WideMaxIter)
+        ICmpsFailingOnLastIter.insert(ICmp);
+    }
+
   bool Changed = false;
-  for (auto *OldCond : LeafConditions)
+  for (auto *OldCond : LeafConditions) {
+    // Skip last iteration for this icmp under one of two conditions:
+    // - We do it for all conditions;
+    // - There is another ICmp that would fail on last iter, so this one doesn't
+    // really matter.
+    bool OptimisticSkipLastIter = SkipLastIter;
+    if (!OptimisticSkipLastIter) {
+      if (ICmpsFailingOnLastIter.size() > 1)
+        OptimisticSkipLastIter = true;
+      else if (ICmpsFailingOnLastIter.size() == 1)
+        OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond);
+    }
     if (auto Replaced =
             createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted,
-                              SkipLastIter, SE, Rewriter)) {
+                              OptimisticSkipLastIter, SE, Rewriter)) {
       Changed = true;
       auto *NewCond = *Replaced;
       if (auto *NCI = dyn_cast<Instruction>(NewCond)) {
@@ -1481,7 +1517,11 @@ static bool optimizeLoopExitWithUnknownExitCount(
       assert(OldCond->hasOneUse() && "Must be!");
       OldCond->replaceAllUsesWith(NewCond);
       DeadInsts.push_back(OldCond);
+      // Make sure we no longer consider this condition as failing on last
+      // iteration.
+      ICmpsFailingOnLastIter.erase(OldCond);
     }
+  }
   return Changed;
 }
 

diff  --git a/llvm/test/Transforms/IndVarSimplify/X86/widening-vs-and-elimination.ll b/llvm/test/Transforms/IndVarSimplify/X86/widening-vs-and-elimination.ll
index fad21e9938c19..ba8f4f0448983 100644
--- a/llvm/test/Transforms/IndVarSimplify/X86/widening-vs-and-elimination.ll
+++ b/llvm/test/Transforms/IndVarSimplify/X86/widening-vs-and-elimination.ll
@@ -9,15 +9,15 @@ define void @test_01(i32 %start, i32 %limit) {
 ; WIDENING_ON-LABEL: @test_01(
 ; WIDENING_ON-NEXT:  bb:
 ; WIDENING_ON-NEXT:    [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64
+; WIDENING_ON-NEXT:    [[TMP1:%.*]] = add nsw i64 [[TMP0]], -1
+; WIDENING_ON-NEXT:    [[TMP2:%.*]] = zext i32 [[LIMIT:%.*]] to i64
 ; WIDENING_ON-NEXT:    br label [[LOOP:%.*]]
 ; WIDENING_ON:       loop:
 ; WIDENING_ON-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[TMP0]], [[BB:%.*]] ]
-; WIDENING_ON-NEXT:    [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], -1
 ; WIDENING_ON-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1
 ; WIDENING_ON-NEXT:    [[NOT_ZERO:%.*]] = icmp ne i64 [[INDVARS_IV]], 0
-; WIDENING_ON-NEXT:    [[TMP2:%.*]] = zext i32 [[LIMIT:%.*]] to i64
-; WIDENING_ON-NEXT:    [[RANGE_CHECK_WIDE:%.*]] = icmp ult i64 [[TMP1]], [[TMP2]]
-; WIDENING_ON-NEXT:    [[AND:%.*]] = and i1 [[NOT_ZERO]], [[RANGE_CHECK_WIDE]]
+; WIDENING_ON-NEXT:    [[RANGE_CHECK_WIDE_FIRST_ITER:%.*]] = icmp ult i64 [[TMP1]], [[TMP2]]
+; WIDENING_ON-NEXT:    [[AND:%.*]] = and i1 [[NOT_ZERO]], [[RANGE_CHECK_WIDE_FIRST_ITER]]
 ; WIDENING_ON-NEXT:    br i1 [[AND]], label [[BACKEDGE]], label [[EXIT:%.*]]
 ; WIDENING_ON:       backedge:
 ; WIDENING_ON-NEXT:    br label [[LOOP]]
@@ -26,13 +26,14 @@ define void @test_01(i32 %start, i32 %limit) {
 ;
 ; WIDENING_OFF-LABEL: @test_01(
 ; WIDENING_OFF-NEXT:  bb:
+; WIDENING_OFF-NEXT:    [[TMP0:%.*]] = add i32 [[START:%.*]], -1
 ; WIDENING_OFF-NEXT:    br label [[LOOP:%.*]]
 ; WIDENING_OFF:       loop:
-; WIDENING_OFF-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[START:%.*]], [[BB:%.*]] ]
+; WIDENING_OFF-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[START]], [[BB:%.*]] ]
 ; WIDENING_OFF-NEXT:    [[IV_NEXT]] = add i32 [[IV]], -1
 ; WIDENING_OFF-NEXT:    [[NOT_ZERO:%.*]] = icmp ne i32 [[IV]], 0
-; WIDENING_OFF-NEXT:    [[RANGE_CHECK:%.*]] = icmp ult i32 [[IV_NEXT]], [[LIMIT:%.*]]
-; WIDENING_OFF-NEXT:    [[AND:%.*]] = and i1 [[NOT_ZERO]], [[RANGE_CHECK]]
+; WIDENING_OFF-NEXT:    [[RANGE_CHECK_FIRST_ITER:%.*]] = icmp ult i32 [[TMP0]], [[LIMIT:%.*]]
+; WIDENING_OFF-NEXT:    [[AND:%.*]] = and i1 [[NOT_ZERO]], [[RANGE_CHECK_FIRST_ITER]]
 ; WIDENING_OFF-NEXT:    br i1 [[AND]], label [[BACKEDGE]], label [[EXIT:%.*]]
 ; WIDENING_OFF:       backedge:
 ; WIDENING_OFF-NEXT:    [[ZEXT:%.*]] = zext i32 [[IV_NEXT]] to i64

diff  --git a/llvm/test/Transforms/IndVarSimplify/turn-to-invariant.ll b/llvm/test/Transforms/IndVarSimplify/turn-to-invariant.ll
index d6ef997519679..809468f8d4e91 100644
--- a/llvm/test/Transforms/IndVarSimplify/turn-to-invariant.ll
+++ b/llvm/test/Transforms/IndVarSimplify/turn-to-invariant.ll
@@ -536,20 +536,20 @@ failed_2:
   ret i32 -2
 }
 
-; TODO: This test is equivalent to @test_simple_case, with only 
diff erence
-;       that both checks are merged together into one 'and' check. This
-;       should not prevent turning the range check into invariant.
-;       https://alive2.llvm.org/ce/z/G-2ERB
+; This test is equivalent to @test_simple_case, with only 
diff erence
+; that both checks are merged together into one 'and' check. This
+; should not prevent turning the range check into invariant.
+; https://alive2.llvm.org/ce/z/G-2ERB
 define i32 @test_and_conditions(i32 %start, i32 %len) {
 ; CHECK-LABEL: @test_and_conditions(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[START:%.*]], -1
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[START]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
 ; CHECK-NEXT:    [[ZERO_CHECK:%.*]] = icmp ne i32 [[IV]], 0
-; CHECK-NEXT:    [[IV_MINUS_1:%.*]] = add i32 [[IV]], -1
-; CHECK-NEXT:    [[RANGE_CHECK:%.*]] = icmp ult i32 [[IV_MINUS_1]], [[LEN:%.*]]
-; CHECK-NEXT:    [[BOTH_CHECKS:%.*]] = and i1 [[ZERO_CHECK]], [[RANGE_CHECK]]
+; CHECK-NEXT:    [[RANGE_CHECK_FIRST_ITER:%.*]] = icmp ult i32 [[TMP0]], [[LEN:%.*]]
+; CHECK-NEXT:    [[BOTH_CHECKS:%.*]] = and i1 [[ZERO_CHECK]], [[RANGE_CHECK_FIRST_ITER]]
 ; CHECK-NEXT:    br i1 [[BOTH_CHECKS]], label [[BACKEDGE]], label [[FAILED:%.*]]
 ; CHECK:       backedge:
 ; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], -1
@@ -584,18 +584,18 @@ failed:
   ret i32 -3
 }
 
-; TODO: Same as test_and_conditions, but swapped exit block branches,
-;       and condition is expressed as OR. Still optimizable.
+; Same as test_and_conditions, but swapped exit block branches,
+; and condition is expressed as OR. Still optimizable.
 define i32 @test_and_conditions_inverse(i32 %start, i32 %len) {
 ; CHECK-LABEL: @test_and_conditions_inverse(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[START:%.*]], -1
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[START]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
 ; CHECK-NEXT:    [[ZERO_CHECK_FAILED:%.*]] = icmp eq i32 [[IV]], 0
-; CHECK-NEXT:    [[IV_MINUS_1:%.*]] = add i32 [[IV]], -1
-; CHECK-NEXT:    [[RANGE_CHECK_FAILED:%.*]] = icmp uge i32 [[IV_MINUS_1]], [[LEN:%.*]]
-; CHECK-NEXT:    [[EITHER_CHECK:%.*]] = or i1 [[ZERO_CHECK_FAILED]], [[RANGE_CHECK_FAILED]]
+; CHECK-NEXT:    [[RANGE_CHECK_FAILED_FIRST_ITER:%.*]] = icmp uge i32 [[TMP0]], [[LEN:%.*]]
+; CHECK-NEXT:    [[EITHER_CHECK:%.*]] = or i1 [[ZERO_CHECK_FAILED]], [[RANGE_CHECK_FAILED_FIRST_ITER]]
 ; CHECK-NEXT:    br i1 [[EITHER_CHECK]], label [[FAILED:%.*]], label [[BACKEDGE]]
 ; CHECK:       backedge:
 ; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], -1
@@ -752,17 +752,17 @@ failed_2:
   ret i32 -2
 }
 
-; TODO: Same as test_and_conditions, but with logical AND.
+; Same as test_and_conditions, but with logical AND.
 define i32 @test_and_conditions_logical_and(i32 %start, i32 %len) {
 ; CHECK-LABEL: @test_and_conditions_logical_and(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[START:%.*]], -1
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[START]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
 ; CHECK-NEXT:    [[ZERO_CHECK:%.*]] = icmp ne i32 [[IV]], 0
-; CHECK-NEXT:    [[IV_MINUS_1:%.*]] = add i32 [[IV]], -1
-; CHECK-NEXT:    [[RANGE_CHECK:%.*]] = icmp ult i32 [[IV_MINUS_1]], [[LEN:%.*]]
-; CHECK-NEXT:    [[BOTH_CHECKS:%.*]] = select i1 [[ZERO_CHECK]], i1 [[RANGE_CHECK]], i1 false
+; CHECK-NEXT:    [[RANGE_CHECK_FIRST_ITER:%.*]] = icmp ult i32 [[TMP0]], [[LEN:%.*]]
+; CHECK-NEXT:    [[BOTH_CHECKS:%.*]] = select i1 [[ZERO_CHECK]], i1 [[RANGE_CHECK_FIRST_ITER]], i1 false
 ; CHECK-NEXT:    br i1 [[BOTH_CHECKS]], label [[BACKEDGE]], label [[FAILED:%.*]]
 ; CHECK:       backedge:
 ; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], -1
@@ -797,17 +797,17 @@ failed:
   ret i32 -3
 }
 
-; TODO: Same as test_and_conditions_inverse, but with logical OR.
+; Same as test_and_conditions_inverse, but with logical OR.
 define i32 @test_and_conditions_inverse_logical_or(i32 %start, i32 %len) {
 ; CHECK-LABEL: @test_and_conditions_inverse_logical_or(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[START:%.*]], -1
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[START]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
 ; CHECK-NEXT:    [[ZERO_CHECK_FAILED:%.*]] = icmp eq i32 [[IV]], 0
-; CHECK-NEXT:    [[IV_MINUS_1:%.*]] = add i32 [[IV]], -1
-; CHECK-NEXT:    [[RANGE_CHECK_FAILED:%.*]] = icmp uge i32 [[IV_MINUS_1]], [[LEN:%.*]]
-; CHECK-NEXT:    [[EITHER_CHECK:%.*]] = select i1 [[ZERO_CHECK_FAILED]], i1 true, i1 [[RANGE_CHECK_FAILED]]
+; CHECK-NEXT:    [[RANGE_CHECK_FAILED_FIRST_ITER:%.*]] = icmp uge i32 [[TMP0]], [[LEN:%.*]]
+; CHECK-NEXT:    [[EITHER_CHECK:%.*]] = select i1 [[ZERO_CHECK_FAILED]], i1 true, i1 [[RANGE_CHECK_FAILED_FIRST_ITER]]
 ; CHECK-NEXT:    br i1 [[EITHER_CHECK]], label [[FAILED:%.*]], label [[BACKEDGE]]
 ; CHECK:       backedge:
 ; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], -1