[PATCH] D125792: [SCEV] Use nowrap flags to infer that start > start - stride in howManyLessThans

Tue May 17 07:34:02 PDT 2022

bsmith created this revision.
bsmith added reviewers: paulwalker-arm, peterwaller-arm.
Herald added a subscriber: hiraditya.
Herald added a project: All.
bsmith requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

If the induction variable being looked at has a nowrap increment, then we
can infer that start > start - stride, otherwise the value produced would
be poison.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D125792

Files:
  llvm/lib/Analysis/ScalarEvolution.cpp
  llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll
  llvm/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll
  llvm/test/Transforms/LoopReroll/nonconst_lb.ll


Index: llvm/test/Transforms/LoopReroll/nonconst_lb.ll
===================================================================

--- llvm/test/Transforms/LoopReroll/nonconst_lb.ll
+++ llvm/test/Transforms/LoopReroll/nonconst_lb.ll
@@ -17,24 +17,22 @@
 ; CHECK-NEXT:    [[CMP34:%.*]] = icmp slt i32 [[M:%.*]], [[N:%.*]]
 ; CHECK-NEXT:    br i1 [[CMP34]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
 ; CHECK:       for.body.preheader:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[M]], 4
-; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N]], i32 [[TMP0]])
-; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[SMAX]], -1
-; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[TMP1]], [[M]]
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr i32 [[TMP2]], 2
-; CHECK-NEXT:    [[TMP4:%.*]] = shl nuw i32 [[TMP3]], 2
-; CHECK-NEXT:    [[TMP5:%.*]] = add nuw nsw i32 [[TMP4]], 3
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[M]]
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 2
+; CHECK-NEXT:    [[TMP4:%.*]] = add nuw nsw i32 [[TMP3]], 3
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVAR:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[M]], [[INDVAR]]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[TMP6]]
-; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[MUL:%.*]] = shl nsw i32 [[TMP7]], 2
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP6]]
+; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[M]], [[INDVAR]]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[TMP5]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[MUL:%.*]] = shl nsw i32 [[TMP6]], 2
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP5]]
 ; CHECK-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVAR]], [[TMP5]]
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVAR]], [[TMP4]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.end.loopexit:
 ; CHECK-NEXT:    br label [[FOR_END]]
Index: llvm/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll
===================================================================
--- llvm/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll
+++ llvm/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll
@@ -4,7 +4,7 @@
 ; that this is not an infinite loop with side effects.
 
 ; CHECK-LABEL: Determining loop execution counts for: @foo1
-; CHECK: backedge-taken count is ((-1 + (%n smax %s)) /u %s)
+; CHECK: backedge-taken count is ((-1 + %n) /u %s)
 
 ; We should have a conservative estimate for the max backedge taken count for
 ; loops with unknown stride.
Index: llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll
===================================================================
--- llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll
+++ llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll
@@ -125,13 +125,13 @@
 ; CHECK-NEXT:    %init = getelementptr inbounds i32, i32* %startptr, i64 2000
 ; CHECK-NEXT:    --> (8000 + %startptr) U: full-set S: full-set
 ; CHECK-NEXT:    %iv = phi i32* [ %init, %entry ], [ %iv.next, %loop ]
-; CHECK-NEXT:    --> {(8000 + %startptr),+,4}<nuw><%loop> U: full-set S: full-set Exits: (8000 + (4 * ((-8001 + (-1 * (ptrtoint i32* %startptr to i64)) + ((8004 + (ptrtoint i32* %startptr to i64)) umax (ptrtoint i32* %endptr to i64))) /u 4))<nuw> + %startptr) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {(8000 + %startptr),+,4}<nuw><%loop> U: full-set S: full-set Exits: (8000 + (4 * ((-8001 + (-1 * (ptrtoint i32* %startptr to i64)) + (ptrtoint i32* %endptr to i64)) /u 4))<nuw> + %startptr) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = getelementptr inbounds i32, i32* %iv, i64 1
-; CHECK-NEXT:    --> {(8004 + %startptr),+,4}<nuw><%loop> U: full-set S: full-set Exits: (8004 + (4 * ((-8001 + (-1 * (ptrtoint i32* %startptr to i64)) + ((8004 + (ptrtoint i32* %startptr to i64)) umax (ptrtoint i32* %endptr to i64))) /u 4))<nuw> + %startptr) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {(8004 + %startptr),+,4}<nuw><%loop> U: full-set S: full-set Exits: (8004 + (4 * ((-8001 + (-1 * (ptrtoint i32* %startptr to i64)) + (ptrtoint i32* %endptr to i64)) /u 4))<nuw> + %startptr) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @pointer_iv_nowrap_guard
-; CHECK-NEXT:  Loop %loop: backedge-taken count is ((-8001 + (-1 * (ptrtoint i32* %startptr to i64)) + ((8004 + (ptrtoint i32* %startptr to i64)) umax (ptrtoint i32* %endptr to i64))) /u 4)
+; CHECK-NEXT:  Loop %loop: backedge-taken count is ((-8001 + (-1 * (ptrtoint i32* %startptr to i64)) + (ptrtoint i32* %endptr to i64)) /u 4)
 ; CHECK-NEXT:  Loop %loop: max backedge-taken count is 4611686018427387903
-; CHECK-NEXT:  Loop %loop: Predicated backedge-taken count is ((-8001 + (-1 * (ptrtoint i32* %startptr to i64)) + ((8004 + (ptrtoint i32* %startptr to i64)) umax (ptrtoint i32* %endptr to i64))) /u 4)
+; CHECK-NEXT:  Loop %loop: Predicated backedge-taken count is ((-8001 + (-1 * (ptrtoint i32* %startptr to i64)) + (ptrtoint i32* %endptr to i64)) /u 4)
 ; CHECK-NEXT:   Predicates:
 ; CHECK:       Loop %loop: Trip multiple is 1
 ;
Index: llvm/lib/Analysis/ScalarEvolution.cpp
===================================================================
--- llvm/lib/Analysis/ScalarEvolution.cpp
+++ llvm/lib/Analysis/ScalarEvolution.cpp
@@ -12411,7 +12411,8 @@
   assert(isAvailableAtLoopEntry(OrigStart, L) && "Must be!");
   assert(isAvailableAtLoopEntry(OrigRHS, L) && "Must be!");
   // Can we prove (max(RHS,Start) > Start - Stride?
-  if (isLoopEntryGuardedByCond(L, Cond, OrigStartMinusStride, OrigStart) &&
+  if ((isLoopEntryGuardedByCond(L, Cond, OrigStartMinusStride, OrigStart) ||
+       NoWrap) &&
       isLoopEntryGuardedByCond(L, Cond, OrigStartMinusStride, OrigRHS)) {
     // In this case, we can use a refined formula for computing backedge taken
     // count.  The general formula remains:


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D125792.430058.patch
Type: text/x-patch
Size: 6508 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220517/a206d9b8/attachment.bin>