[llvm] df9c5bd - [SCEV] Support umin/smin in SCEVLoopGuardRewriter
Max Kazantsev via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 19 22:05:07 PST 2023
Author: Max Kazantsev
Date: 2023-02-20T13:05:00+07:00
New Revision: df9c5bd8d2e6a060b342aa4ac46a9c3313ba2282
URL: https://github.com/llvm/llvm-project/commit/df9c5bd8d2e6a060b342aa4ac46a9c3313ba2282
DIFF: https://github.com/llvm/llvm-project/commit/df9c5bd8d2e6a060b342aa4ac46a9c3313ba2282.diff
LOG: [SCEV] Support umin/smin in SCEVLoopGuardRewriter
Adds support for these SCEVs to cover more cases.
Differential Revision: https://reviews.llvm.org/D143259
Reviewed By: dmakogon, fhahn
Added:
Modified:
llvm/lib/Analysis/ScalarEvolution.cpp
llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll
llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll
llvm/test/CodeGen/Thumb2/mve-blockplacement.ll
Removed:
################################################################################
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 36cb779787066..de4d934174570 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -14965,6 +14965,20 @@ class SCEVLoopGuardRewriter : public SCEVRewriteVisitor<SCEVLoopGuardRewriter> {
Expr);
return I->second;
}
+
+ const SCEV *visitUMinExpr(const SCEVUMinExpr *Expr) {
+ auto I = Map.find(Expr);
+ if (I == Map.end())
+ return SCEVRewriteVisitor<SCEVLoopGuardRewriter>::visitUMinExpr(Expr);
+ return I->second;
+ }
+
+ const SCEV *visitSMinExpr(const SCEVSMinExpr *Expr) {
+ auto I = Map.find(Expr);
+ if (I == Map.end())
+ return SCEVRewriteVisitor<SCEVLoopGuardRewriter>::visitSMinExpr(Expr);
+ return I->second;
+ }
};
const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) {
diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll
index 5d7548de7a7d8..b7792d9f97c16 100644
--- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll
+++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll
@@ -90,7 +90,7 @@ exit:
ret i32 0
}
-; TODO: Same as rewrite_zext_min_max, but the loop is guarded by narrow check.
+; Same as rewrite_zext_min_max, but the loop is guarded by narrow check.
define i32 @rewrite_zext_min_max_narrow_check(i32 %N, ptr %arr) {
; CHECK-LABEL: 'rewrite_zext_min_max_narrow_check'
; CHECK-NEXT: Classifying expressions for: @rewrite_zext_min_max_narrow_check
@@ -101,14 +101,14 @@ define i32 @rewrite_zext_min_max_narrow_check(i32 %N, ptr %arr) {
; CHECK-NEXT: %n.vec = and i64 %ext, 28
; CHECK-NEXT: --> (4 * ((zext i32 (16 umin %N) to i64) /u 4))<nuw><nsw> U: [0,17) S: [0,17)
; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ]
-; CHECK-NEXT: --> {0,+,4}<nuw><%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: (4 * ((-4 + (4 * ((zext i32 (16 umin %N) to i64) /u 4))<nuw><nsw>)<nsw> /u 4))<nuw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {0,+,4}<nuw><%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * ((zext i32 (16 umin %N) to i64) /u 4))<nuw><nsw>)<nsw> /u 4))<nuw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index
-; CHECK-NEXT: --> {%arr,+,16}<%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * ((zext i32 (16 umin %N) to i64) /u 4))<nuw><nsw>)<nsw> /u 4)) + %arr) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {%arr,+,16}<nuw><%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * ((zext i32 (16 umin %N) to i64) /u 4))<nuw><nsw>)<nsw> /u 4)) + %arr) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %index.next = add nuw i64 %index, 4
-; CHECK-NEXT: --> {4,+,4}<nuw><%loop> U: [4,-3) S: [-9223372036854775808,9223372036854775805) Exits: (4 + (4 * ((-4 + (4 * ((zext i32 (16 umin %N) to i64) /u 4))<nuw><nsw>)<nsw> /u 4))<nuw>) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {4,+,4}<nuw><%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * ((zext i32 (16 umin %N) to i64) /u 4))<nuw><nsw>)<nsw> /u 4))<nuw>) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @rewrite_zext_min_max_narrow_check
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * ((zext i32 (16 umin %N) to i64) /u 4))<nuw><nsw>)<nsw> /u 4)
-; CHECK-NEXT: Loop %loop: constant max backedge-taken count is 4611686018427387903
+; CHECK-NEXT: Loop %loop: constant max backedge-taken count is 3
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-4 + (4 * ((zext i32 (16 umin %N) to i64) /u 4))<nuw><nsw>)<nsw> /u 4)
; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-4 + (4 * ((zext i32 (16 umin %N) to i64) /u 4))<nuw><nsw>)<nsw> /u 4)
; CHECK-NEXT: Predicates:
@@ -137,7 +137,7 @@ exit:
ret i32 0
}
-; TODO: This is same as rewrite_zext_min_max, but zext and umin are swapped.
+; This is same as rewrite_zext_min_max, but zext and umin are swapped.
; It should be able to prove the same exit count.
define i32 @rewrite_min_max_zext(i32 %N, ptr %arr) {
; CHECK-LABEL: 'rewrite_min_max_zext'
@@ -149,14 +149,14 @@ define i32 @rewrite_min_max_zext(i32 %N, ptr %arr) {
; CHECK-NEXT: %n.vec = and i64 %umin, 28
; CHECK-NEXT: --> (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw> U: [0,17) S: [0,17)
; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ]
-; CHECK-NEXT: --> {0,+,4}<nuw><%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: (4 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw>)<nsw> /u 4))<nuw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {0,+,4}<nuw><%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw>)<nsw> /u 4))<nuw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index
-; CHECK-NEXT: --> {%arr,+,16}<%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw>)<nsw> /u 4)) + %arr) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {%arr,+,16}<nuw><%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw>)<nsw> /u 4)) + %arr) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %index.next = add nuw i64 %index, 4
-; CHECK-NEXT: --> {4,+,4}<nuw><%loop> U: [4,-3) S: [-9223372036854775808,9223372036854775805) Exits: (4 + (4 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw>)<nsw> /u 4))<nuw>) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {4,+,4}<nuw><%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw>)<nsw> /u 4))<nuw>) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @rewrite_min_max_zext
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw>)<nsw> /u 4)
-; CHECK-NEXT: Loop %loop: constant max backedge-taken count is 4611686018427387903
+; CHECK-NEXT: Loop %loop: constant max backedge-taken count is 3
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw>)<nsw> /u 4)
; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-4 + (4 * ((16 umin (zext i32 %N to i64)) /u 4))<nuw><nsw>)<nsw> /u 4)
; CHECK-NEXT: Predicates:
@@ -233,7 +233,7 @@ exit:
ret i32 0
}
-; TODO: same as rewrite_sext_min_max, but the loop is guarded by narrow check.
+; same as rewrite_sext_min_max, but the loop is guarded by narrow check.
; It should be able to prove the same exit count.
define i32 @rewrite_sext_min_max_narrow_check(i32 %N, ptr %arr) {
; CHECK-LABEL: 'rewrite_sext_min_max_narrow_check'
@@ -245,14 +245,14 @@ define i32 @rewrite_sext_min_max_narrow_check(i32 %N, ptr %arr) {
; CHECK-NEXT: %n.vec = and i64 %ext, 28
; CHECK-NEXT: --> (4 * (zext i3 (trunc i64 ((sext i32 (16 smin %N) to i64) /u 4) to i3) to i64))<nuw><nsw> U: [0,29) S: [0,29)
; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ]
-; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,-9223372036854775808) S: [0,9223372036854775805) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((sext i32 (16 smin %N) to i64) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4))<nuw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((sext i32 (16 smin %N) to i64) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4))<nuw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index
; CHECK-NEXT: --> {%arr,+,16}<nuw><%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * (zext i3 (trunc i64 ((sext i32 (16 smin %N) to i64) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4)) + %arr) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %index.next = add nsw i64 %index, 4
-; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,-9223372036854775808) S: [4,9223372036854775805) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((sext i32 (16 smin %N) to i64) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4))<nuw>) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((sext i32 (16 smin %N) to i64) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4))<nuw>) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @rewrite_sext_min_max_narrow_check
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((sext i32 (16 smin %N) to i64) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4)
-; CHECK-NEXT: Loop %loop: constant max backedge-taken count is 4611686018427387903
+; CHECK-NEXT: Loop %loop: constant max backedge-taken count is 3
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((sext i32 (16 smin %N) to i64) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4)
; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((sext i32 (16 smin %N) to i64) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4)
; CHECK-NEXT: Predicates:
@@ -281,7 +281,7 @@ exit:
ret i32 0
}
-; TODO: This is a signed version of rewrite_min_max_zext.
+; This is a signed version of rewrite_min_max_zext.
; It should be able to prove the same exit count.
define i32 @rewrite_min_max_sext(i32 %N, ptr %arr) {
; CHECK-LABEL: 'rewrite_min_max_sext'
@@ -293,14 +293,14 @@ define i32 @rewrite_min_max_sext(i32 %N, ptr %arr) {
; CHECK-NEXT: %n.vec = and i64 %smin, 28
; CHECK-NEXT: --> (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw> U: [0,29) S: [0,29)
; CHECK-NEXT: %index = phi i64 [ 0, %loop.ph ], [ %index.next, %loop ]
-; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,-9223372036854775808) S: [0,9223372036854775805) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4))<nuw> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {0,+,4}<nuw><nsw><%loop> U: [0,13) S: [0,13) Exits: (4 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4))<nuw> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %gep = getelementptr inbounds i32, ptr %arr, i64 %index
; CHECK-NEXT: --> {%arr,+,16}<nuw><%loop> U: full-set S: full-set Exits: ((16 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4)) + %arr) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %index.next = add nsw i64 %index, 4
-; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,-9223372036854775808) S: [4,9223372036854775805) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4))<nuw>) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {4,+,4}<nuw><nsw><%loop> U: [4,17) S: [4,17) Exits: (4 + (4 * ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4))<nuw>) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @rewrite_min_max_sext
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4)
-; CHECK-NEXT: Loop %loop: constant max backedge-taken count is 4611686018427387903
+; CHECK-NEXT: Loop %loop: constant max backedge-taken count is 3
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4)
; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-4 + (4 * (zext i3 (trunc i64 ((16 smin (sext i32 %N to i64)) /u 4) to i3) to i64))<nuw><nsw>)<nsw> /u 4)
; CHECK-NEXT: Predicates:
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll
index 115f1cb6b3dfd..431e604358cbc 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count-minmax.ll
@@ -10,12 +10,12 @@ define void @nomulitply(i32 noundef %a, i32 noundef %b) {
; CHECK-NEXT: %cond = select i1 %cmp, i32 %a, i32 %b
; CHECK-NEXT: --> (%a umin %b) U: full-set S: full-set
; CHECK-NEXT: %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (-1 + (%a umin %b)) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%for.body> U: [0,2147483647) S: [0,2147483647) Exits: (-1 + (%a umin %b)) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %inc = add nuw nsw i32 %i.08, 1
-; CHECK-NEXT: --> {1,+,1}<nuw><%for.body> U: [1,0) S: [1,0) Exits: (%a umin %b) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%for.body> U: [1,-2147483648) S: [1,-2147483648) Exits: (%a umin %b) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: Determining loop execution counts for: @nomulitply
; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + (%a umin %b))
-; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is -2
+; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is 2147483646
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + (%a umin %b))
; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + (%a umin %b))
; CHECK-NEXT: Predicates:
@@ -56,16 +56,16 @@ define void @umin(i32 noundef %a, i32 noundef %b) {
; CHECK-NEXT: %cond = select i1 %cmp, i32 %mul, i32 %mul1
; CHECK-NEXT: --> ((2 * %a) umin (4 * %b)) U: [0,-3) S: [-2147483648,2147483647)
; CHECK-NEXT: %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (-1 + ((2 * %a) umin (4 * %b))) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%for.body> U: [0,2147483647) S: [0,2147483647) Exits: (-1 + ((2 * %a) umin (4 * %b))) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %inc = add nuw nsw i32 %i.011, 1
-; CHECK-NEXT: --> {1,+,1}<nuw><%for.body> U: [1,-3) S: [1,-3) Exits: ((2 * %a) umin (4 * %b)) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%for.body> U: [1,-2147483648) S: [1,-2147483648) Exits: ((2 * %a) umin (4 * %b)) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: Determining loop execution counts for: @umin
; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + ((2 * %a) umin (4 * %b)))
-; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is -5
+; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is 2147483646
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + ((2 * %a) umin (4 * %b)))
; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + ((2 * %a) umin (4 * %b)))
; CHECK-NEXT: Predicates:
-; CHECK: Loop %for.body: Trip multiple is 2
+; CHECK: Loop %for.body: Trip multiple is 1
;
; void umin(unsigned a, unsigned b) {
; a *= 2;
@@ -156,16 +156,16 @@ define void @smin(i32 noundef %a, i32 noundef %b) {
; CHECK-NEXT: %cond = select i1 %cmp, i32 %mul, i32 %mul1
; CHECK-NEXT: --> ((2 * %a)<nsw> smin (4 * %b)<nsw>) U: [0,-1) S: [-2147483648,2147483645)
; CHECK-NEXT: %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (-1 + ((2 * %a)<nsw> smin (4 * %b)<nsw>)) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%for.body> U: [0,2147483647) S: [0,2147483647) Exits: (-1 + ((2 * %a)<nsw> smin (4 * %b)<nsw>)) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %inc = add nuw nsw i32 %i.011, 1
-; CHECK-NEXT: --> {1,+,1}<nuw><%for.body> U: [1,-1) S: [1,-1) Exits: ((2 * %a)<nsw> smin (4 * %b)<nsw>) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%for.body> U: [1,-2147483648) S: [1,-2147483648) Exits: ((2 * %a)<nsw> smin (4 * %b)<nsw>) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: Determining loop execution counts for: @smin
; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + ((2 * %a)<nsw> smin (4 * %b)<nsw>))
-; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is -3
+; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is 2147483646
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + ((2 * %a)<nsw> smin (4 * %b)<nsw>))
; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + ((2 * %a)<nsw> smin (4 * %b)<nsw>))
; CHECK-NEXT: Predicates:
-; CHECK: Loop %for.body: Trip multiple is 2
+; CHECK: Loop %for.body: Trip multiple is 1
;
; void smin(signed a, signed b) {
; a *= 2;
@@ -292,12 +292,12 @@ define void @umin-3and6(i32 noundef %a, i32 noundef %b) {
; CHECK-NEXT: %cond = select i1 %cmp, i32 %mul, i32 %mul1
; CHECK-NEXT: --> ((3 * %a) umin (6 * %b)) U: [0,-1) S: full-set
; CHECK-NEXT: %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (-1 + ((3 * %a) umin (6 * %b))) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%for.body> U: [0,2147483647) S: [0,2147483647) Exits: (-1 + ((3 * %a) umin (6 * %b))) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %inc = add nuw nsw i32 %i.011, 1
-; CHECK-NEXT: --> {1,+,1}<nuw><%for.body> U: [1,-1) S: [1,-1) Exits: ((3 * %a) umin (6 * %b)) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%for.body> U: [1,-2147483648) S: [1,-2147483648) Exits: ((3 * %a) umin (6 * %b)) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: Determining loop execution counts for: @umin-3and6
; CHECK-NEXT: Loop %for.body: backedge-taken count is (-1 + ((3 * %a) umin (6 * %b)))
-; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is -3
+; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is 2147483646
; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + ((3 * %a) umin (6 * %b)))
; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + ((3 * %a) umin (6 * %b)))
; CHECK-NEXT: Predicates:
diff --git a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll
index 879d110c72e3d..2d7126db12fab 100644
--- a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll
@@ -335,10 +335,10 @@ define i32 @d(i64 %e, i32 %f, i64 %g, i32 %h) {
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEXT: .pad #4
; CHECK-NEXT: sub sp, #4
-; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13}
-; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
-; CHECK-NEXT: .pad #24
-; CHECK-NEXT: sub sp, #24
+; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK-NEXT: .pad #16
+; CHECK-NEXT: sub sp, #16
; CHECK-NEXT: mov r12, r1
; CHECK-NEXT: subs r1, r0, #1
; CHECK-NEXT: sbcs r1, r12, #0
@@ -346,54 +346,50 @@ define i32 @d(i64 %e, i32 %f, i64 %g, i32 %h) {
; CHECK-NEXT: @ %bb.1: @ %for.cond2.preheader.lr.ph
; CHECK-NEXT: movs r3, #1
; CHECK-NEXT: cmp r2, #1
-; CHECK-NEXT: csel r7, r2, r3, lt
-; CHECK-NEXT: mov r10, r2
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: cmp r7, #3
+; CHECK-NEXT: csel lr, r2, r3, lt
+; CHECK-NEXT: movw r4, #43691
+; CHECK-NEXT: mov r1, lr
+; CHECK-NEXT: cmp.w lr, #3
; CHECK-NEXT: it ls
; CHECK-NEXT: movls r1, #3
-; CHECK-NEXT: movw r2, #43691
-; CHECK-NEXT: subs r1, r1, r7
-; CHECK-NEXT: movt r2, #43690
+; CHECK-NEXT: movt r4, #43690
+; CHECK-NEXT: sub.w r1, r1, lr
+; CHECK-NEXT: ldr r6, [sp, #128]
; CHECK-NEXT: adds r1, #2
-; CHECK-NEXT: ldr r4, [sp, #120]
-; CHECK-NEXT: movw r11, :lower16:c
-; CHECK-NEXT: str r7, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: umull r1, r2, r1, r2
-; CHECK-NEXT: movt r11, :upper16:c
+; CHECK-NEXT: movw r8, :lower16:c
+; CHECK-NEXT: movt r8, :upper16:c
+; CHECK-NEXT: mov.w r9, #12
+; CHECK-NEXT: umull r1, r4, r1, r4
+; CHECK-NEXT: @ implicit-def: $r10
+; CHECK-NEXT: @ implicit-def: $r5
+; CHECK-NEXT: @ implicit-def: $r11
+; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: movs r1, #4
-; CHECK-NEXT: @ implicit-def: $r8
-; CHECK-NEXT: @ implicit-def: $r9
-; CHECK-NEXT: movs r5, #12
-; CHECK-NEXT: strd r12, r0, [sp, #4] @ 8-byte Folded Spill
-; CHECK-NEXT: add.w r6, r3, r2, lsr #1
-; CHECK-NEXT: add.w r1, r1, r2, lsr #1
-; CHECK-NEXT: movw r2, #65532
-; CHECK-NEXT: vdup.32 q6, r6
-; CHECK-NEXT: movt r2, #32767
-; CHECK-NEXT: ands r1, r2
-; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT: subs r1, #4
-; CHECK-NEXT: add.w r1, r3, r1, lsr #2
-; CHECK-NEXT: str r1, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: strd r2, r12, [sp, #4] @ 8-byte Folded Spill
+; CHECK-NEXT: add.w r3, r3, r4, lsr #1
+; CHECK-NEXT: add.w r1, r1, r4, lsr #1
+; CHECK-NEXT: movw r4, #65532
+; CHECK-NEXT: vdup.32 q6, r3
+; CHECK-NEXT: movt r4, #32767
+; CHECK-NEXT: and.w r7, r1, r4
; CHECK-NEXT: adr r1, .LCPI1_0
+; CHECK-NEXT: vdup.32 q7, r3
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: adr r1, .LCPI1_1
; CHECK-NEXT: vldrw.u32 q5, [r1]
-; CHECK-NEXT: vadd.i32 q4, q0, r7
-; CHECK-NEXT: @ implicit-def: $r7
+; CHECK-NEXT: vadd.i32 q4, q0, lr
; CHECK-NEXT: b .LBB1_4
; CHECK-NEXT: .LBB1_2: @ %for.body6.preheader
; CHECK-NEXT: @ in Loop: Header=BB1_4 Depth=1
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: cmn.w r9, #4
+; CHECK-NEXT: mov r0, r11
+; CHECK-NEXT: cmn.w r11, #4
; CHECK-NEXT: it le
; CHECK-NEXT: mvnle r0, #3
; CHECK-NEXT: movw r2, #18725
; CHECK-NEXT: adds r0, #6
; CHECK-NEXT: movt r2, #9362
-; CHECK-NEXT: sub.w r1, r0, r9
-; CHECK-NEXT: movs r7, #0
+; CHECK-NEXT: sub.w r1, r0, r11
+; CHECK-NEXT: mov.w r10, #0
; CHECK-NEXT: umull r2, r3, r1, r2
; CHECK-NEXT: subs r2, r1, r3
; CHECK-NEXT: add.w r2, r3, r2, lsr #1
@@ -402,14 +398,14 @@ define i32 @d(i64 %e, i32 %f, i64 %g, i32 %h) {
; CHECK-NEXT: sub.w r2, r3, r2, lsr #2
; CHECK-NEXT: subs r1, r2, r1
; CHECK-NEXT: add r0, r1
-; CHECK-NEXT: add.w r9, r0, #7
-; CHECK-NEXT: ldrd r12, r0, [sp, #4] @ 8-byte Folded Reload
+; CHECK-NEXT: add.w r11, r0, #7
+; CHECK-NEXT: ldrd r12, r0, [sp, #8] @ 8-byte Folded Reload
; CHECK-NEXT: .LBB1_3: @ %for.cond.cleanup5
; CHECK-NEXT: @ in Loop: Header=BB1_4 Depth=1
-; CHECK-NEXT: add.w r8, r8, #2
-; CHECK-NEXT: subs.w r1, r8, r0
-; CHECK-NEXT: asr.w r2, r8, #31
-; CHECK-NEXT: sbcs.w r1, r2, r12
+; CHECK-NEXT: adds r5, #2
+; CHECK-NEXT: subs r1, r5, r0
+; CHECK-NEXT: asr.w r3, r5, #31
+; CHECK-NEXT: sbcs.w r1, r3, r12
; CHECK-NEXT: bge.w .LBB1_28
; CHECK-NEXT: .LBB1_4: @ %for.cond2.preheader
; CHECK-NEXT: @ =>This Loop Header: Depth=1
@@ -417,29 +413,33 @@ define i32 @d(i64 %e, i32 %f, i64 %g, i32 %h) {
; CHECK-NEXT: @ Child Loop BB1_8 Depth 2
; CHECK-NEXT: @ Child Loop BB1_10 Depth 3
; CHECK-NEXT: @ Child Loop BB1_12 Depth 3
-; CHECK-NEXT: cmp.w r9, #2
+; CHECK-NEXT: cmp.w r11, #2
; CHECK-NEXT: bgt .LBB1_3
; CHECK-NEXT: @ %bb.5: @ %for.body6.lr.ph
; CHECK-NEXT: @ in Loop: Header=BB1_4 Depth=1
-; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: cmp r0, #5
+; CHECK-NEXT: cmp.w lr, #5
; CHECK-NEXT: bhi .LBB1_15
; CHECK-NEXT: @ %bb.6: @ %for.body6.us.preheader
; CHECK-NEXT: @ in Loop: Header=BB1_4 Depth=1
-; CHECK-NEXT: ldrd r2, r3, [sp, #112]
+; CHECK-NEXT: ldrd r2, r3, [sp, #120]
; CHECK-NEXT: movs r0, #32
; CHECK-NEXT: movs r1, #0
+; CHECK-NEXT: mov r4, r7
+; CHECK-NEXT: mov r7, lr
; CHECK-NEXT: bl __aeabi_ldivmod
-; CHECK-NEXT: ldrd r12, r0, [sp, #4] @ 8-byte Folded Reload
; CHECK-NEXT: vdup.32 q0, r2
-; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: ldrd r2, r12, [sp, #4] @ 8-byte Folded Reload
+; CHECK-NEXT: mov lr, r7
+; CHECK-NEXT: mov r7, r4
+; CHECK-NEXT: mov r3, r11
; CHECK-NEXT: b .LBB1_8
; CHECK-NEXT: .LBB1_7: @ %for.cond.cleanup17.us
; CHECK-NEXT: @ in Loop: Header=BB1_8 Depth=2
-; CHECK-NEXT: add.w r9, r3, #7
+; CHECK-NEXT: add.w r11, r3, #7
; CHECK-NEXT: cmn.w r3, #4
-; CHECK-NEXT: mov.w r7, #0
-; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: mov.w r10, #0
+; CHECK-NEXT: mov r3, r11
; CHECK-NEXT: bge .LBB1_3
; CHECK-NEXT: .LBB1_8: @ %for.body6.us
; CHECK-NEXT: @ Parent Loop BB1_4 Depth=1
@@ -447,103 +447,106 @@ define i32 @d(i64 %e, i32 %f, i64 %g, i32 %h) {
; CHECK-NEXT: @ Child Loop BB1_10 Depth 3
; CHECK-NEXT: @ Child Loop BB1_12 Depth 3
; CHECK-NEXT: movs r1, #0
-; CHECK-NEXT: cmp.w r10, #0
-; CHECK-NEXT: beq .LBB1_11
+; CHECK-NEXT: cbz r2, .LBB1_11
; CHECK-NEXT: @ %bb.9: @ %for.body13.us51.preheader
; CHECK-NEXT: @ in Loop: Header=BB1_8 Depth=2
-; CHECK-NEXT: movw r2, :lower16:a
+; CHECK-NEXT: movw r4, :lower16:a
; CHECK-NEXT: vmov q1, q4
-; CHECK-NEXT: movt r2, :upper16:a
-; CHECK-NEXT: str r1, [r2]
-; CHECK-NEXT: movw r2, :lower16:b
-; CHECK-NEXT: movt r2, :upper16:b
-; CHECK-NEXT: str r1, [r2]
-; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: dlstp.32 lr, r6
+; CHECK-NEXT: movt r4, :upper16:a
+; CHECK-NEXT: str r1, [r4]
+; CHECK-NEXT: movw r4, :lower16:b
+; CHECK-NEXT: movt r4, :upper16:b
+; CHECK-NEXT: str r1, [r4]
+; CHECK-NEXT: mov r4, r7
; CHECK-NEXT: .LBB1_10: @ %vector.body111
; CHECK-NEXT: @ Parent Loop BB1_4 Depth=1
; CHECK-NEXT: @ Parent Loop BB1_8 Depth=2
; CHECK-NEXT: @ => This Inner Loop Header: Depth=3
+; CHECK-NEXT: vqadd.u32 q2, q5, r1
+; CHECK-NEXT: subs r4, #4
+; CHECK-NEXT: vcmp.u32 hi, q7, q2
; CHECK-NEXT: vshl.i32 q2, q1, #2
-; CHECK-NEXT: vadd.i32 q2, q2, r11
-; CHECK-NEXT: vadd.i32 q1, q1, r5
-; CHECK-NEXT: vstrw.32 q0, [q2]
-; CHECK-NEXT: letp lr, .LBB1_10
+; CHECK-NEXT: add.w r1, r1, #4
+; CHECK-NEXT: vadd.i32 q2, q2, r8
+; CHECK-NEXT: vadd.i32 q1, q1, r9
+; CHECK-NEXT: vpst
+; CHECK-NEXT: vstrwt.32 q0, [q2]
+; CHECK-NEXT: bne .LBB1_10
; CHECK-NEXT: b .LBB1_13
; CHECK-NEXT: .LBB1_11: @ %vector.body.preheader
; CHECK-NEXT: @ in Loop: Header=BB1_8 Depth=2
-; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: mov r4, r7
; CHECK-NEXT: vmov q1, q4
; CHECK-NEXT: .LBB1_12: @ %vector.body
; CHECK-NEXT: @ Parent Loop BB1_4 Depth=1
; CHECK-NEXT: @ Parent Loop BB1_8 Depth=2
; CHECK-NEXT: @ => This Inner Loop Header: Depth=3
; CHECK-NEXT: vqadd.u32 q2, q5, r1
-; CHECK-NEXT: subs r2, #4
+; CHECK-NEXT: subs r4, #4
; CHECK-NEXT: vcmp.u32 hi, q6, q2
; CHECK-NEXT: vshl.i32 q2, q1, #2
; CHECK-NEXT: add.w r1, r1, #4
-; CHECK-NEXT: vadd.i32 q2, q2, r11
-; CHECK-NEXT: vadd.i32 q1, q1, r5
+; CHECK-NEXT: vadd.i32 q2, q2, r8
+; CHECK-NEXT: vadd.i32 q1, q1, r9
; CHECK-NEXT: vpst
; CHECK-NEXT: vstrwt.32 q0, [q2]
; CHECK-NEXT: bne .LBB1_12
; CHECK-NEXT: .LBB1_13: @ %for.cond9.for.cond15.preheader_crit_edge.us
; CHECK-NEXT: @ in Loop: Header=BB1_8 Depth=2
-; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: beq .LBB1_7
; CHECK-NEXT: @ %bb.14: @ %for.cond9.for.cond15.preheader_crit_edge.us
; CHECK-NEXT: @ in Loop: Header=BB1_8 Depth=2
-; CHECK-NEXT: eor r1, r7, #1
+; CHECK-NEXT: eor r1, r10, #1
; CHECK-NEXT: lsls r1, r1, #31
; CHECK-NEXT: bne .LBB1_7
; CHECK-NEXT: b .LBB1_26
; CHECK-NEXT: .LBB1_15: @ %for.body6.lr.ph.split
; CHECK-NEXT: @ in Loop: Header=BB1_4 Depth=1
-; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: beq.w .LBB1_2
; CHECK-NEXT: @ %bb.16: @ in Loop: Header=BB1_4 Depth=1
-; CHECK-NEXT: ldrd r12, r0, [sp, #4] @ 8-byte Folded Reload
-; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: ldrd r12, r0, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT: mov r3, r11
; CHECK-NEXT: .LBB1_17: @ %for.body6.us60
; CHECK-NEXT: @ Parent Loop BB1_4 Depth=1
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
-; CHECK-NEXT: lsls r1, r7, #31
+; CHECK-NEXT: lsls.w r1, r10, #31
; CHECK-NEXT: bne .LBB1_27
; CHECK-NEXT: @ %bb.18: @ %for.cond.cleanup17.us63
; CHECK-NEXT: @ in Loop: Header=BB1_17 Depth=2
-; CHECK-NEXT: cmn.w r2, #4
+; CHECK-NEXT: cmn.w r3, #4
; CHECK-NEXT: bge .LBB1_22
; CHECK-NEXT: @ %bb.19: @ %for.cond.cleanup17.us63.1
; CHECK-NEXT: @ in Loop: Header=BB1_17 Depth=2
-; CHECK-NEXT: cmn.w r2, #12
+; CHECK-NEXT: cmn.w r3, #12
; CHECK-NEXT: bgt .LBB1_23
; CHECK-NEXT: @ %bb.20: @ %for.cond.cleanup17.us63.2
; CHECK-NEXT: @ in Loop: Header=BB1_17 Depth=2
-; CHECK-NEXT: cmn.w r2, #19
+; CHECK-NEXT: cmn.w r3, #19
; CHECK-NEXT: bgt .LBB1_24
; CHECK-NEXT: @ %bb.21: @ %for.cond.cleanup17.us63.3
; CHECK-NEXT: @ in Loop: Header=BB1_17 Depth=2
-; CHECK-NEXT: add.w r9, r2, #28
-; CHECK-NEXT: cmn.w r2, #25
-; CHECK-NEXT: mov.w r7, #0
-; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: add.w r11, r3, #28
+; CHECK-NEXT: cmn.w r3, #25
+; CHECK-NEXT: mov.w r10, #0
+; CHECK-NEXT: mov r3, r11
; CHECK-NEXT: blt .LBB1_17
; CHECK-NEXT: b .LBB1_3
; CHECK-NEXT: .LBB1_22: @ %for.cond.cleanup5.loopexit134.split.loop.exit139
; CHECK-NEXT: @ in Loop: Header=BB1_4 Depth=1
-; CHECK-NEXT: add.w r9, r2, #7
+; CHECK-NEXT: add.w r11, r3, #7
; CHECK-NEXT: b .LBB1_25
; CHECK-NEXT: .LBB1_23: @ %for.cond.cleanup5.loopexit134.split.loop.exit137
; CHECK-NEXT: @ in Loop: Header=BB1_4 Depth=1
-; CHECK-NEXT: add.w r9, r2, #14
+; CHECK-NEXT: add.w r11, r3, #14
; CHECK-NEXT: b .LBB1_25
; CHECK-NEXT: .LBB1_24: @ %for.cond.cleanup5.loopexit134.split.loop.exit135
; CHECK-NEXT: @ in Loop: Header=BB1_4 Depth=1
-; CHECK-NEXT: add.w r9, r2, #21
+; CHECK-NEXT: add.w r11, r3, #21
; CHECK-NEXT: .LBB1_25: @ %for.cond.cleanup5
; CHECK-NEXT: @ in Loop: Header=BB1_4 Depth=1
-; CHECK-NEXT: movs r7, #0
+; CHECK-NEXT: mov.w r10, #0
; CHECK-NEXT: b .LBB1_3
; CHECK-NEXT: .LBB1_26: @ %for.inc19.us
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
@@ -552,8 +555,8 @@ define i32 @d(i64 %e, i32 %f, i64 %g, i32 %h) {
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: b .LBB1_27
; CHECK-NEXT: .LBB1_28: @ %for.cond.cleanup
-; CHECK-NEXT: add sp, #24
-; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT: add sp, #16
+; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
; CHECK-NEXT: .p2align 4
More information about the llvm-commits
mailing list