[llvm] [SCEV] Support ule/sle exit counts via widening (PR #92206)

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Tue May 14 19:26:20 PDT 2024


https://github.com/nikic created https://github.com/llvm/llvm-project/pull/92206

If we have an exit condition of the form IV <= Limit, we will first try to convert it into IV < Limit+1 or IV-1 < Limit based on range info (in icmp simplification). If that fails, we try to convert it to IV < Limit + 1 based on controlling exits in non-infinite loops.

However, if all else fails, we can still determine the exit count by rewriting to ext(IV) < ext(Limit) + 1, where the zero/sign extension ensures that the addition does not overflow.

Proof: https://alive2.llvm.org/ce/z/iR-iYd

>From 3fa691d0f2104932547983059090a7c594822cc0 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Wed, 15 May 2024 10:41:06 +0900
Subject: [PATCH] [SCEV] Support ule/sle exit counts via widening

If we have an exit condition of the form IV <= Limit, we will first
try to convert it into IV < Limit+1 or IV-1 < Limit based on range
info (in icmp simplification). If that fails, we try to convert it
to IV < Limit + 1 based on controlling exits in non-infinite loops.

However, if all else fails, we can still determine the exit count
by rewriting to ext(IV) < ext(Limit) + 1, where the zero/sign
extension ensures that the addition does not overflow.

Proof: https://alive2.llvm.org/ce/z/iR-iYd
---
 llvm/lib/Analysis/ScalarEvolution.cpp         | 17 ++++++-
 .../ScalarEvolution/exit-count-non-strict.ll  | 50 +++++++++++--------
 llvm/test/CodeGen/PowerPC/ctrloop-le.ll       | 15 ++----
 .../IndVarSimplify/AArch64/widen-loop-comp.ll | 26 +++++-----
 4 files changed, 64 insertions(+), 44 deletions(-)

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 254d79183a1e9..2e3f5a0aa7724 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -9198,8 +9198,21 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromICmp(
     // Since the loop is finite, an invariant RHS cannot include the boundary
     // value, otherwise it would loop forever.
     if (!EnableFiniteLoopControl || !ControllingFiniteLoop ||
-        !isLoopInvariant(RHS, L))
-      break;
+        !isLoopInvariant(RHS, L)) {
+      // Otherwise, perform the addition in a wider type, to avoid overflow.
+      auto *OldType = dyn_cast<IntegerType>(LHS->getType());
+      if (!OldType)
+        break;
+      auto *NewType =
+          Type::getIntNTy(OldType->getContext(), OldType->getBitWidth() * 2);
+      if (ICmpInst::isSigned(Pred)) {
+        LHS = getSignExtendExpr(LHS, NewType);
+        RHS = getSignExtendExpr(RHS, NewType);
+      } else {
+        LHS = getZeroExtendExpr(LHS, NewType);
+        RHS = getZeroExtendExpr(RHS, NewType);
+      }
+    }
     RHS = getAddExpr(getOne(RHS->getType()), RHS);
     [[fallthrough]];
   case ICmpInst::ICMP_SLT:
diff --git a/llvm/test/Analysis/ScalarEvolution/exit-count-non-strict.ll b/llvm/test/Analysis/ScalarEvolution/exit-count-non-strict.ll
index 2117c779f4b37..e9faf98eee449 100644
--- a/llvm/test/Analysis/ScalarEvolution/exit-count-non-strict.ll
+++ b/llvm/test/Analysis/ScalarEvolution/exit-count-non-strict.ll
@@ -4,13 +4,14 @@
 define void @ule_from_zero(i32 %M, i32 %N) {
 ; CHECK-LABEL: 'ule_from_zero'
 ; CHECK-NEXT:  Determining loop execution counts for: @ule_from_zero
-; CHECK-NEXT:  Loop %loop: <multiple exits> Unpredictable backedge-taken count.
-; CHECK-NEXT:    exit count for loop: ***COULDNOTCOMPUTE***
+; CHECK-NEXT:  Loop %loop: <multiple exits> backedge-taken count is ((zext i32 %N to i64) umin (1 + (zext i32 %M to i64))<nuw><nsw>)
+; CHECK-NEXT:    exit count for loop: (1 + (zext i32 %M to i64))<nuw><nsw>
 ; CHECK-NEXT:    exit count for latch: %N
-; CHECK-NEXT:  Loop %loop: constant max backedge-taken count is i32 -1
-; CHECK-NEXT:  Loop %loop: symbolic max backedge-taken count is %N
-; CHECK-NEXT:    symbolic max exit count for loop: ***COULDNOTCOMPUTE***
+; CHECK-NEXT:  Loop %loop: constant max backedge-taken count is i64 4294967295
+; CHECK-NEXT:  Loop %loop: symbolic max backedge-taken count is ((zext i32 %N to i64) umin (1 + (zext i32 %M to i64))<nuw><nsw>)
+; CHECK-NEXT:    symbolic max exit count for loop: (1 + (zext i32 %M to i64))<nuw><nsw>
 ; CHECK-NEXT:    symbolic max exit count for latch: %N
+; CHECK-NEXT:  Loop %loop: Trip multiple is 1
 ;
 entry:
   br label %loop
@@ -61,13 +62,14 @@ exit:
 define void @ule_from_unknown(i32 %M, i32 %N, i32 %S) {
 ; CHECK-LABEL: 'ule_from_unknown'
 ; CHECK-NEXT:  Determining loop execution counts for: @ule_from_unknown
-; CHECK-NEXT:  Loop %loop: <multiple exits> Unpredictable backedge-taken count.
-; CHECK-NEXT:    exit count for loop: ***COULDNOTCOMPUTE***
+; CHECK-NEXT:  Loop %loop: <multiple exits> backedge-taken count is (((-1 * (zext i32 %S to i64))<nsw> + ((zext i32 %S to i64) umax (1 + (zext i32 %M to i64))<nuw><nsw>)) umin_seq (zext i32 ((-1 * %S) + %N) to i64))
+; CHECK-NEXT:    exit count for loop: ((-1 * (zext i32 %S to i64))<nsw> + ((zext i32 %S to i64) umax (1 + (zext i32 %M to i64))<nuw><nsw>))
 ; CHECK-NEXT:    exit count for latch: ((-1 * %S) + %N)
-; CHECK-NEXT:  Loop %loop: constant max backedge-taken count is i32 -1
-; CHECK-NEXT:  Loop %loop: symbolic max backedge-taken count is ((-1 * %S) + %N)
-; CHECK-NEXT:    symbolic max exit count for loop: ***COULDNOTCOMPUTE***
+; CHECK-NEXT:  Loop %loop: constant max backedge-taken count is i64 4294967295
+; CHECK-NEXT:  Loop %loop: symbolic max backedge-taken count is (((-1 * (zext i32 %S to i64))<nsw> + ((zext i32 %S to i64) umax (1 + (zext i32 %M to i64))<nuw><nsw>)) umin_seq (zext i32 ((-1 * %S) + %N) to i64))
+; CHECK-NEXT:    symbolic max exit count for loop: ((-1 * (zext i32 %S to i64))<nsw> + ((zext i32 %S to i64) umax (1 + (zext i32 %M to i64))<nuw><nsw>))
 ; CHECK-NEXT:    symbolic max exit count for latch: ((-1 * %S) + %N)
+; CHECK-NEXT:  Loop %loop: Trip multiple is 1
 ;
 entry:
   br label %loop
@@ -96,6 +98,9 @@ define void @ule_from_zero_no_nuw(i32 %M, i32 %N) {
 ; CHECK-NEXT:  Loop %loop: symbolic max backedge-taken count is %N
 ; CHECK-NEXT:    symbolic max exit count for loop: ***COULDNOTCOMPUTE***
 ; CHECK-NEXT:    symbolic max exit count for latch: %N
+; CHECK-NEXT:  Loop %loop: Predicated backedge-taken count is ((zext i32 %N to i64) umin (1 + (zext i32 %M to i64))<nuw><nsw>)
+; CHECK-NEXT:   Predicates:
+; CHECK-NEXT:      {0,+,1}<%loop> Added Flags: <nusw>
 ;
 entry:
   br label %loop
@@ -117,13 +122,14 @@ exit:
 define void @sle_from_int_min(i32 %M, i32 %N) {
 ; CHECK-LABEL: 'sle_from_int_min'
 ; CHECK-NEXT:  Determining loop execution counts for: @sle_from_int_min
-; CHECK-NEXT:  Loop %loop: <multiple exits> Unpredictable backedge-taken count.
-; CHECK-NEXT:    exit count for loop: ***COULDNOTCOMPUTE***
+; CHECK-NEXT:  Loop %loop: <multiple exits> backedge-taken count is ((zext i32 (-2147483648 + %N) to i64) umin (2147483649 + (sext i32 %M to i64))<nsw>)
+; CHECK-NEXT:    exit count for loop: (2147483649 + (sext i32 %M to i64))<nsw>
 ; CHECK-NEXT:    exit count for latch: (-2147483648 + %N)
-; CHECK-NEXT:  Loop %loop: constant max backedge-taken count is i32 -1
-; CHECK-NEXT:  Loop %loop: symbolic max backedge-taken count is (-2147483648 + %N)
-; CHECK-NEXT:    symbolic max exit count for loop: ***COULDNOTCOMPUTE***
+; CHECK-NEXT:  Loop %loop: constant max backedge-taken count is i64 4294967295
+; CHECK-NEXT:  Loop %loop: symbolic max backedge-taken count is ((zext i32 (-2147483648 + %N) to i64) umin (2147483649 + (sext i32 %M to i64))<nsw>)
+; CHECK-NEXT:    symbolic max exit count for loop: (2147483649 + (sext i32 %M to i64))<nsw>
 ; CHECK-NEXT:    symbolic max exit count for latch: (-2147483648 + %N)
+; CHECK-NEXT:  Loop %loop: Trip multiple is 1
 ;
 entry:
   br label %loop
@@ -174,13 +180,14 @@ exit:
 define void @sle_from_unknown(i32 %M, i32 %N, i32 %S) {
 ; CHECK-LABEL: 'sle_from_unknown'
 ; CHECK-NEXT:  Determining loop execution counts for: @sle_from_unknown
-; CHECK-NEXT:  Loop %loop: <multiple exits> Unpredictable backedge-taken count.
-; CHECK-NEXT:    exit count for loop: ***COULDNOTCOMPUTE***
+; CHECK-NEXT:  Loop %loop: <multiple exits> backedge-taken count is (((-1 * (sext i32 %S to i64))<nsw> + ((sext i32 %S to i64) smax (1 + (sext i32 %M to i64))<nsw>)) umin_seq (zext i32 ((-1 * %S) + %N) to i64))
+; CHECK-NEXT:    exit count for loop: ((-1 * (sext i32 %S to i64))<nsw> + ((sext i32 %S to i64) smax (1 + (sext i32 %M to i64))<nsw>))
 ; CHECK-NEXT:    exit count for latch: ((-1 * %S) + %N)
-; CHECK-NEXT:  Loop %loop: constant max backedge-taken count is i32 -1
-; CHECK-NEXT:  Loop %loop: symbolic max backedge-taken count is ((-1 * %S) + %N)
-; CHECK-NEXT:    symbolic max exit count for loop: ***COULDNOTCOMPUTE***
+; CHECK-NEXT:  Loop %loop: constant max backedge-taken count is i64 4294967295
+; CHECK-NEXT:  Loop %loop: symbolic max backedge-taken count is (((-1 * (sext i32 %S to i64))<nsw> + ((sext i32 %S to i64) smax (1 + (sext i32 %M to i64))<nsw>)) umin_seq (zext i32 ((-1 * %S) + %N) to i64))
+; CHECK-NEXT:    symbolic max exit count for loop: ((-1 * (sext i32 %S to i64))<nsw> + ((sext i32 %S to i64) smax (1 + (sext i32 %M to i64))<nsw>))
 ; CHECK-NEXT:    symbolic max exit count for latch: ((-1 * %S) + %N)
+; CHECK-NEXT:  Loop %loop: Trip multiple is 1
 ;
 entry:
   br label %loop
@@ -209,6 +216,9 @@ define void @sle_from_int_min_no_nsw(i32 %M, i32 %N) {
 ; CHECK-NEXT:  Loop %loop: symbolic max backedge-taken count is (-2147483648 + %N)
 ; CHECK-NEXT:    symbolic max exit count for loop: ***COULDNOTCOMPUTE***
 ; CHECK-NEXT:    symbolic max exit count for latch: (-2147483648 + %N)
+; CHECK-NEXT:  Loop %loop: Predicated backedge-taken count is ((zext i32 (-2147483648 + %N) to i64) umin (2147483649 + (sext i32 %M to i64))<nsw>)
+; CHECK-NEXT:   Predicates:
+; CHECK-NEXT:      {-2147483648,+,1}<%loop> Added Flags: <nssw>
 ;
 entry:
   br label %loop
diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-le.ll b/llvm/test/CodeGen/PowerPC/ctrloop-le.ll
index 599e540e898a7..08ecd8970d836 100644
--- a/llvm/test/CodeGen/PowerPC/ctrloop-le.ll
+++ b/llvm/test/CodeGen/PowerPC/ctrloop-le.ll
@@ -293,8 +293,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 
 ; CHECK: test_pos1_rr_sle
-; FIXME: Support this loop!
-; CHECK-NOT: bdnz
+; CHECK: bdnz
 ; a < b
 define void @test_pos1_rr_sle(ptr nocapture %p, i32 %a, i32 %b) nounwind {
 entry:
@@ -323,8 +322,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 
 ; CHECK: test_pos2_rr_sle
-; FIXME: Support this loop!
-; CHECK-NOT: bdnz
+; CHECK: bdnz
 ; a < b
 define void @test_pos2_rr_sle(ptr nocapture %p, i32 %a, i32 %b) nounwind {
 entry:
@@ -353,8 +351,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 
 ; CHECK: test_pos4_rr_sle
-; FIXME: Support this loop!
-; CHECK-NOT: bdnz
+; CHECK: bdnz
 ; a < b
 define void @test_pos4_rr_sle(ptr nocapture %p, i32 %a, i32 %b) nounwind {
 entry:
@@ -383,8 +380,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 
 ; CHECK: test_pos8_rr_sle
-; FIXME: Support this loop!
-; CHECK-NOT: bdnz
+; CHECK: bdnz
 ; a < b
 define void @test_pos8_rr_sle(ptr nocapture %p, i32 %a, i32 %b) nounwind {
 entry:
@@ -413,8 +409,7 @@ for.end:                                          ; preds = %for.body, %entry
 
 
 ; CHECK: test_pos16_rr_sle
-; FIXME: Support this loop!
-; CHECK-NOT: bdnz
+; CHECK: bdnz
 ; a < b
 define void @test_pos16_rr_sle(ptr nocapture %p, i32 %a, i32 %b) nounwind {
 entry:
diff --git a/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll b/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll
index c5f656c870a23..99541b398226a 100644
--- a/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll
+++ b/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll
@@ -265,16 +265,17 @@ define i32 @test5(ptr %a, i32 %b) {
 ; CHECK-LABEL: @test5(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[B:%.*]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
 ; CHECK-NEXT:    br label [[FOR_COND:%.*]]
 ; CHECK:       for.cond:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[SUM_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ule i64 [[INDVARS_IV]], [[TMP0]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ADD]] = add nsw i32 [[SUM_0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ADD]] = add nsw i32 [[SUM_0]], [[TMP2]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    br label [[FOR_COND]]
 ; CHECK:       for.end:
@@ -349,22 +350,23 @@ define i32 @test7(ptr %a, i32 %b) {
 ; CHECK-LABEL: @test7(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[B:%.*]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
 ; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[B]], i32 -1)
-; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[SMAX]], 2
-; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[SMAX]], 2
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP2]] to i64
 ; CHECK-NEXT:    br label [[FOR_COND:%.*]]
 ; CHECK:       for.cond:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[SUM_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ule i64 [[INDVARS_IV]], [[TMP0]]
-; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ADD]] = add nsw i32 [[SUM_0]], [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ADD]] = add nsw i32 [[SUM_0]], [[TMP3]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND]], label [[FOR_END]]
+; CHECK-NEXT:    [[EXITCOND2:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND2]], label [[FOR_COND]], label [[FOR_END]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0]], [[FOR_BODY]] ], [ [[SUM_0]], [[FOR_COND]] ]
 ; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]



More information about the llvm-commits mailing list