[llvm] [SCEV] Support ule/sle exit counts via widening (PR #92206)
via llvm-commits
llvm-commits at lists.llvm.org
Tue May 14 19:26:48 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Nikita Popov (nikic)
<details>
<summary>Changes</summary>
If we have an exit condition of the form IV <= Limit, we will first try to convert it into IV < Limit+1 or IV-1 < Limit based on range info (in icmp simplification). If that fails, we try to convert it to IV < Limit + 1 based on controlling exits in non-infinite loops.
However, if all else fails, we can still determine the exit count by rewriting to ext(IV) < ext(Limit) + 1, where the zero/sign extension ensures that the addition does not overflow.
Proof: https://alive2.llvm.org/ce/z/iR-iYd
---
Full diff: https://github.com/llvm/llvm-project/pull/92206.diff
4 Files Affected:
- (modified) llvm/lib/Analysis/ScalarEvolution.cpp (+15-2)
- (modified) llvm/test/Analysis/ScalarEvolution/exit-count-non-strict.ll (+30-20)
- (modified) llvm/test/CodeGen/PowerPC/ctrloop-le.ll (+5-10)
- (modified) llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll (+14-12)
``````````diff
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 254d79183a1e9..2e3f5a0aa7724 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -9198,8 +9198,21 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromICmp(
// Since the loop is finite, an invariant RHS cannot include the boundary
// value, otherwise it would loop forever.
if (!EnableFiniteLoopControl || !ControllingFiniteLoop ||
- !isLoopInvariant(RHS, L))
- break;
+ !isLoopInvariant(RHS, L)) {
+ // Otherwise, perform the addition in a wider type, to avoid overflow.
+ auto *OldType = dyn_cast<IntegerType>(LHS->getType());
+ if (!OldType)
+ break;
+ auto *NewType =
+ Type::getIntNTy(OldType->getContext(), OldType->getBitWidth() * 2);
+ if (ICmpInst::isSigned(Pred)) {
+ LHS = getSignExtendExpr(LHS, NewType);
+ RHS = getSignExtendExpr(RHS, NewType);
+ } else {
+ LHS = getZeroExtendExpr(LHS, NewType);
+ RHS = getZeroExtendExpr(RHS, NewType);
+ }
+ }
RHS = getAddExpr(getOne(RHS->getType()), RHS);
[[fallthrough]];
case ICmpInst::ICMP_SLT:
diff --git a/llvm/test/Analysis/ScalarEvolution/exit-count-non-strict.ll b/llvm/test/Analysis/ScalarEvolution/exit-count-non-strict.ll
index 2117c779f4b37..e9faf98eee449 100644
--- a/llvm/test/Analysis/ScalarEvolution/exit-count-non-strict.ll
+++ b/llvm/test/Analysis/ScalarEvolution/exit-count-non-strict.ll
@@ -4,13 +4,14 @@
define void @ule_from_zero(i32 %M, i32 %N) {
; CHECK-LABEL: 'ule_from_zero'
; CHECK-NEXT: Determining loop execution counts for: @ule_from_zero
-; CHECK-NEXT: Loop %loop: <multiple exits> Unpredictable backedge-taken count.
-; CHECK-NEXT: exit count for loop: ***COULDNOTCOMPUTE***
+; CHECK-NEXT: Loop %loop: <multiple exits> backedge-taken count is ((zext i32 %N to i64) umin (1 + (zext i32 %M to i64))<nuw><nsw>)
+; CHECK-NEXT: exit count for loop: (1 + (zext i32 %M to i64))<nuw><nsw>
; CHECK-NEXT: exit count for latch: %N
-; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i32 -1
-; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %N
-; CHECK-NEXT: symbolic max exit count for loop: ***COULDNOTCOMPUTE***
+; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 4294967295
+; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((zext i32 %N to i64) umin (1 + (zext i32 %M to i64))<nuw><nsw>)
+; CHECK-NEXT: symbolic max exit count for loop: (1 + (zext i32 %M to i64))<nuw><nsw>
; CHECK-NEXT: symbolic max exit count for latch: %N
+; CHECK-NEXT: Loop %loop: Trip multiple is 1
;
entry:
br label %loop
@@ -61,13 +62,14 @@ exit:
define void @ule_from_unknown(i32 %M, i32 %N, i32 %S) {
; CHECK-LABEL: 'ule_from_unknown'
; CHECK-NEXT: Determining loop execution counts for: @ule_from_unknown
-; CHECK-NEXT: Loop %loop: <multiple exits> Unpredictable backedge-taken count.
-; CHECK-NEXT: exit count for loop: ***COULDNOTCOMPUTE***
+; CHECK-NEXT: Loop %loop: <multiple exits> backedge-taken count is (((-1 * (zext i32 %S to i64))<nsw> + ((zext i32 %S to i64) umax (1 + (zext i32 %M to i64))<nuw><nsw>)) umin_seq (zext i32 ((-1 * %S) + %N) to i64))
+; CHECK-NEXT: exit count for loop: ((-1 * (zext i32 %S to i64))<nsw> + ((zext i32 %S to i64) umax (1 + (zext i32 %M to i64))<nuw><nsw>))
; CHECK-NEXT: exit count for latch: ((-1 * %S) + %N)
-; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i32 -1
-; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-1 * %S) + %N)
-; CHECK-NEXT: symbolic max exit count for loop: ***COULDNOTCOMPUTE***
+; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 4294967295
+; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (((-1 * (zext i32 %S to i64))<nsw> + ((zext i32 %S to i64) umax (1 + (zext i32 %M to i64))<nuw><nsw>)) umin_seq (zext i32 ((-1 * %S) + %N) to i64))
+; CHECK-NEXT: symbolic max exit count for loop: ((-1 * (zext i32 %S to i64))<nsw> + ((zext i32 %S to i64) umax (1 + (zext i32 %M to i64))<nuw><nsw>))
; CHECK-NEXT: symbolic max exit count for latch: ((-1 * %S) + %N)
+; CHECK-NEXT: Loop %loop: Trip multiple is 1
;
entry:
br label %loop
@@ -96,6 +98,9 @@ define void @ule_from_zero_no_nuw(i32 %M, i32 %N) {
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is %N
; CHECK-NEXT: symbolic max exit count for loop: ***COULDNOTCOMPUTE***
; CHECK-NEXT: symbolic max exit count for latch: %N
+; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((zext i32 %N to i64) umin (1 + (zext i32 %M to i64))<nuw><nsw>)
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT: {0,+,1}<%loop> Added Flags: <nusw>
;
entry:
br label %loop
@@ -117,13 +122,14 @@ exit:
define void @sle_from_int_min(i32 %M, i32 %N) {
; CHECK-LABEL: 'sle_from_int_min'
; CHECK-NEXT: Determining loop execution counts for: @sle_from_int_min
-; CHECK-NEXT: Loop %loop: <multiple exits> Unpredictable backedge-taken count.
-; CHECK-NEXT: exit count for loop: ***COULDNOTCOMPUTE***
+; CHECK-NEXT: Loop %loop: <multiple exits> backedge-taken count is ((zext i32 (-2147483648 + %N) to i64) umin (2147483649 + (sext i32 %M to i64))<nsw>)
+; CHECK-NEXT: exit count for loop: (2147483649 + (sext i32 %M to i64))<nsw>
; CHECK-NEXT: exit count for latch: (-2147483648 + %N)
-; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i32 -1
-; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (-2147483648 + %N)
-; CHECK-NEXT: symbolic max exit count for loop: ***COULDNOTCOMPUTE***
+; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 4294967295
+; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((zext i32 (-2147483648 + %N) to i64) umin (2147483649 + (sext i32 %M to i64))<nsw>)
+; CHECK-NEXT: symbolic max exit count for loop: (2147483649 + (sext i32 %M to i64))<nsw>
; CHECK-NEXT: symbolic max exit count for latch: (-2147483648 + %N)
+; CHECK-NEXT: Loop %loop: Trip multiple is 1
;
entry:
br label %loop
@@ -174,13 +180,14 @@ exit:
define void @sle_from_unknown(i32 %M, i32 %N, i32 %S) {
; CHECK-LABEL: 'sle_from_unknown'
; CHECK-NEXT: Determining loop execution counts for: @sle_from_unknown
-; CHECK-NEXT: Loop %loop: <multiple exits> Unpredictable backedge-taken count.
-; CHECK-NEXT: exit count for loop: ***COULDNOTCOMPUTE***
+; CHECK-NEXT: Loop %loop: <multiple exits> backedge-taken count is (((-1 * (sext i32 %S to i64))<nsw> + ((sext i32 %S to i64) smax (1 + (sext i32 %M to i64))<nsw>)) umin_seq (zext i32 ((-1 * %S) + %N) to i64))
+; CHECK-NEXT: exit count for loop: ((-1 * (sext i32 %S to i64))<nsw> + ((sext i32 %S to i64) smax (1 + (sext i32 %M to i64))<nsw>))
; CHECK-NEXT: exit count for latch: ((-1 * %S) + %N)
-; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i32 -1
-; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-1 * %S) + %N)
-; CHECK-NEXT: symbolic max exit count for loop: ***COULDNOTCOMPUTE***
+; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 4294967295
+; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (((-1 * (sext i32 %S to i64))<nsw> + ((sext i32 %S to i64) smax (1 + (sext i32 %M to i64))<nsw>)) umin_seq (zext i32 ((-1 * %S) + %N) to i64))
+; CHECK-NEXT: symbolic max exit count for loop: ((-1 * (sext i32 %S to i64))<nsw> + ((sext i32 %S to i64) smax (1 + (sext i32 %M to i64))<nsw>))
; CHECK-NEXT: symbolic max exit count for latch: ((-1 * %S) + %N)
+; CHECK-NEXT: Loop %loop: Trip multiple is 1
;
entry:
br label %loop
@@ -209,6 +216,9 @@ define void @sle_from_int_min_no_nsw(i32 %M, i32 %N) {
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (-2147483648 + %N)
; CHECK-NEXT: symbolic max exit count for loop: ***COULDNOTCOMPUTE***
; CHECK-NEXT: symbolic max exit count for latch: (-2147483648 + %N)
+; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((zext i32 (-2147483648 + %N) to i64) umin (2147483649 + (sext i32 %M to i64))<nsw>)
+; CHECK-NEXT: Predicates:
+; CHECK-NEXT: {-2147483648,+,1}<%loop> Added Flags: <nssw>
;
entry:
br label %loop
diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-le.ll b/llvm/test/CodeGen/PowerPC/ctrloop-le.ll
index 599e540e898a7..08ecd8970d836 100644
--- a/llvm/test/CodeGen/PowerPC/ctrloop-le.ll
+++ b/llvm/test/CodeGen/PowerPC/ctrloop-le.ll
@@ -293,8 +293,7 @@ for.end: ; preds = %for.body, %entry
; CHECK: test_pos1_rr_sle
-; FIXME: Support this loop!
-; CHECK-NOT: bdnz
+; CHECK: bdnz
; a < b
define void @test_pos1_rr_sle(ptr nocapture %p, i32 %a, i32 %b) nounwind {
entry:
@@ -323,8 +322,7 @@ for.end: ; preds = %for.body, %entry
; CHECK: test_pos2_rr_sle
-; FIXME: Support this loop!
-; CHECK-NOT: bdnz
+; CHECK: bdnz
; a < b
define void @test_pos2_rr_sle(ptr nocapture %p, i32 %a, i32 %b) nounwind {
entry:
@@ -353,8 +351,7 @@ for.end: ; preds = %for.body, %entry
; CHECK: test_pos4_rr_sle
-; FIXME: Support this loop!
-; CHECK-NOT: bdnz
+; CHECK: bdnz
; a < b
define void @test_pos4_rr_sle(ptr nocapture %p, i32 %a, i32 %b) nounwind {
entry:
@@ -383,8 +380,7 @@ for.end: ; preds = %for.body, %entry
; CHECK: test_pos8_rr_sle
-; FIXME: Support this loop!
-; CHECK-NOT: bdnz
+; CHECK: bdnz
; a < b
define void @test_pos8_rr_sle(ptr nocapture %p, i32 %a, i32 %b) nounwind {
entry:
@@ -413,8 +409,7 @@ for.end: ; preds = %for.body, %entry
; CHECK: test_pos16_rr_sle
-; FIXME: Support this loop!
-; CHECK-NOT: bdnz
+; CHECK: bdnz
; a < b
define void @test_pos16_rr_sle(ptr nocapture %p, i32 %a, i32 %b) nounwind {
entry:
diff --git a/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll b/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll
index c5f656c870a23..99541b398226a 100644
--- a/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll
+++ b/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll
@@ -265,16 +265,17 @@ define i32 @test5(ptr %a, i32 %b) {
; CHECK-LABEL: @test5(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[B:%.*]] to i64
+; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
; CHECK-NEXT: br label [[FOR_COND:%.*]]
; CHECK: for.cond:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[SUM_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[CMP:%.*]] = icmp ule i64 [[INDVARS_IV]], [[TMP0]]
-; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], [[TMP1]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT: [[ADD]] = add nsw i32 [[SUM_0]], [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[ADD]] = add nsw i32 [[SUM_0]], [[TMP2]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: br label [[FOR_COND]]
; CHECK: for.end:
@@ -349,22 +350,23 @@ define i32 @test7(ptr %a, i32 %b) {
; CHECK-LABEL: @test7(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[B:%.*]] to i64
+; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[B]], i32 -1)
-; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SMAX]], 2
-; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SMAX]], 2
+; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP2]] to i64
; CHECK-NEXT: br label [[FOR_COND:%.*]]
; CHECK: for.cond:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[SUM_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[CMP:%.*]] = icmp ule i64 [[INDVARS_IV]], [[TMP0]]
-; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], [[TMP1]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT: [[ADD]] = add nsw i32 [[SUM_0]], [[TMP2]]
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[ADD]] = add nsw i32 [[SUM_0]], [[TMP3]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND]], label [[FOR_END]]
+; CHECK-NEXT: [[EXITCOND2:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT: br i1 [[EXITCOND2]], label [[FOR_COND]], label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0]], [[FOR_BODY]] ], [ [[SUM_0]], [[FOR_COND]] ]
; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
``````````
</details>
https://github.com/llvm/llvm-project/pull/92206
More information about the llvm-commits
mailing list