[llvm] 6afcc54 - [SCEV] Infer no-self-wrap via constant ranges

Wed Mar 22 12:06:38 PDT 2023

Author: Philip Reames
Date: 2023-03-22T12:06:28-07:00
New Revision: 6afcc54ac7d68fa2b28f0e7cbf9dc1d4ac7fb95e

URL: https://github.com/llvm/llvm-project/commit/6afcc54ac7d68fa2b28f0e7cbf9dc1d4ac7fb95e
DIFF: https://github.com/llvm/llvm-project/commit/6afcc54ac7d68fa2b28f0e7cbf9dc1d4ac7fb95e.diff

LOG: [SCEV] Infer no-self-wrap via constant ranges

Without this, pointer IVs in loops with small constant trip counts couldn't be proven no-self-wrap. This came up in a new LSR transform, but may also benefit other SCEV consumers as well.

Differential Revision: https://reviews.llvm.org/D146596

Added: 
    

Modified: 
    llvm/lib/Analysis/ScalarEvolution.cpp
    llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
    llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll
    llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll
    llvm/test/Transforms/LoopRotate/pr51981-scev-problem.ll
    llvm/test/Transforms/LoopVersioning/lcssa.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index df525f4d6be7a..df872f61906c8 100644

--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -4988,6 +4988,18 @@ ScalarEvolution::proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR) {
 
   SCEV::NoWrapFlags Result = SCEV::FlagAnyWrap;
 
+  if (!AR->hasNoSelfWrap()) {
+    const SCEV *BECount = getConstantMaxBackedgeTakenCount(AR->getLoop());
+    if (const SCEVConstant *BECountMax = dyn_cast<SCEVConstant>(BECount)) {
+      ConstantRange StepCR = getSignedRange(AR->getStepRecurrence(*this));
+      const APInt &BECountAP = BECountMax->getAPInt();
+      unsigned NoOverflowBitWidth =
+        BECountAP.getActiveBits() + StepCR.getMinSignedBits();
+      if (NoOverflowBitWidth <= getTypeSizeInBits(AR->getType()))
+        Result = ScalarEvolution::setFlags(Result, SCEV::FlagNW);
+    }
+  }
+
   if (!AR->hasNoSignedWrap()) {
     ConstantRange AddRecRange = getSignedRange(AR);
     ConstantRange IncRange = getSignedRange(AR->getStepRecurrence(*this));

diff  --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 82312de71c72c..fbdc436e0d37e 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -6769,14 +6769,7 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
     // iteration. The simplest case to consider is a candidate IV which is
     // narrower than the trip count (and thus original IV), but this can
     // also happen due to non-unit strides on the candidate IVs.
-    // TODO: This check should be replaceable with PostInc->hasNoSelfWrap(),
-    // but in practice we appear to be missing inference for cases we should
-    // be able to catch.
-    ConstantRange StepCR = SE.getSignedRange(AddRec->getStepRecurrence(SE));
-    ConstantRange BECountCR = SE.getUnsignedRange(BECount);
-    unsigned NoOverflowBitWidth = BECountCR.getActiveBits() + StepCR.getMinSignedBits();
-    unsigned ARBitWidth = SE.getTypeSizeInBits(AddRec->getType());
-    if (NoOverflowBitWidth > ARBitWidth)
+    if (!AddRec->hasNoSelfWrap())
       continue;
 
     const SCEVAddRecExpr *PostInc = AddRec->getPostIncExpr(SE);

diff  --git a/llvm/test/Analysis/ScalarEvolution/
diff erent-loops-recs.ll b/llvm/test/Analysis/ScalarEvolution/
diff erent-loops-recs.ll
index 44081f32d0af6..60b2e9d50dd59 100644
--- a/llvm/test/Analysis/ScalarEvolution/
diff erent-loops-recs.ll
+++ b/llvm/test/Analysis/ScalarEvolution/
diff erent-loops-recs.ll
@@ -94,7 +94,7 @@ define void @test_01(i32 %a, i32 %b) {
 ; CHECK:       %s3 = add i32 %is1, %phi5
 ; CHECK-NEXT:  -->  {{{{}}(59 + (2 * %a) + %b),+,6}<%loop1>,+,2}<nw><%loop2>
 ; CHECK:       %s4 = add i32 %phi2, %is2
-; CHECK-NEXT:  -->  {{{{}}(159 + (2 * %b)),+,2}<%loop1>,+,6}<%loop2>
+; CHECK-NEXT:  -->  {{{{}}(159 + (2 * %b)),+,2}<nw><%loop1>,+,6}<%loop2>
 ; CHECK:       %s5 = add i32 %is1, %is2
 ; CHECK-NEXT:  -->  {{{{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<%loop2>
 ; CHECK:       %s6 = add i32 %is2, %is1

diff  --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll
index e1acec162d3c8..d4d3a9e13e277 100644
--- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll
+++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll
@@ -1633,9 +1633,9 @@ define i32 @ptr_induction_ult_1(ptr %a, ptr %b) {
 ; CHECK-LABEL: 'ptr_induction_ult_1'
 ; CHECK-NEXT:  Classifying expressions for: @ptr_induction_ult_1
 ; CHECK-NEXT:    %ptr.iv = phi ptr [ %ptr.iv.next, %loop ], [ %a, %entry ]
-; CHECK-NEXT:    --> {%a,+,4}<%loop> U: full-set S: full-set Exits: %a LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {%a,+,4}<nw><%loop> U: full-set S: full-set Exits: %a LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %ptr.iv.next = getelementptr i32, ptr %ptr.iv, i64 1
-; CHECK-NEXT:    --> {(4 + %a),+,4}<%loop> U: full-set S: full-set Exits: (4 + %a) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {(4 + %a),+,4}<nw><%loop> U: full-set S: full-set Exits: (4 + %a) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @ptr_induction_ult_1
 ; CHECK-NEXT:  Loop %loop: backedge-taken count is 0
 ; CHECK-NEXT:  Loop %loop: constant max backedge-taken count is 0
@@ -1686,18 +1686,17 @@ exit:
   ret i32 0
 }
 
-; TODO: The pointer induction variable can be implied No Self Wrap.
 define void @gep_addrec_nw(ptr %a) {
 ; CHECK-LABEL: 'gep_addrec_nw'
 ; CHECK-NEXT:  Classifying expressions for: @gep_addrec_nw
 ; CHECK-NEXT:    %lsr.iv1 = phi ptr [ %uglygep2, %for.body ], [ %a, %entry ]
-; CHECK-NEXT:    --> {%a,+,4}<%for.body> U: full-set S: full-set Exits: (1512 + %a) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:    --> {%a,+,4}<nw><%for.body> U: full-set S: full-set Exits: (1512 + %a) LoopDispositions: { %for.body: Computable }
 ; CHECK-NEXT:    %lsr.iv = phi i64 [ %lsr.iv.next, %for.body ], [ 379, %entry ]
 ; CHECK-NEXT:    --> {379,+,-1}<nsw><%for.body> U: [1,380) S: [1,380) Exits: 1 LoopDispositions: { %for.body: Computable }
 ; CHECK-NEXT:    %lsr.iv.next = add nsw i64 %lsr.iv, -1
 ; CHECK-NEXT:    --> {378,+,-1}<nsw><%for.body> U: [0,379) S: [0,379) Exits: 0 LoopDispositions: { %for.body: Computable }
 ; CHECK-NEXT:    %uglygep2 = getelementptr i8, ptr %lsr.iv1, i64 4
-; CHECK-NEXT:    --> {(4 + %a),+,4}<%for.body> U: full-set S: full-set Exits: (1516 + %a) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:    --> {(4 + %a),+,4}<nw><%for.body> U: full-set S: full-set Exits: (1516 + %a) LoopDispositions: { %for.body: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @gep_addrec_nw
 ; CHECK-NEXT:  Loop %for.body: backedge-taken count is 378
 ; CHECK-NEXT:  Loop %for.body: constant max backedge-taken count is 378

diff  --git a/llvm/test/Transforms/LoopRotate/pr51981-scev-problem.ll b/llvm/test/Transforms/LoopRotate/pr51981-scev-problem.ll
index dd5031c56722f..2c2c88b6acb20 100644
--- a/llvm/test/Transforms/LoopRotate/pr51981-scev-problem.ll
+++ b/llvm/test/Transforms/LoopRotate/pr51981-scev-problem.ll
@@ -22,7 +22,7 @@
 ; CHECK-SCEV:   %narrow = trunc i32 %wide to i16
 ; CHECK-SCEV:   -->  (trunc i32 %wide to i16) U: full-set S: full-set               Exits: <<Unknown>>              LoopDispositions: { %loop.outer.header: Variant, %loop.inner: Invariant }
 ; CHECK-SCEV:   %iv = phi i16 [ %narrow, %loop.inner.ph ], [ %iv.plus, %loop.inner ]
-; CHECK-SCEV:   -->  {(trunc i32 %wide to i16),+,1}<%loop.inner> U: full-set S: full-set           Exits: (-1 + (700 umax (1 + (trunc i32 %wide to i16))))               LoopDispositions: { %loop.inner: Computable, %loop.outer.header: Variant }
+; CHECK-SCEV:   -->  {(trunc i32 %wide to i16),+,1}<nw><%loop.inner> U: full-set S: full-set           Exits: (-1 + (700 umax (1 + (trunc i32 %wide to i16))))               LoopDispositions: { %loop.inner: Computable, %loop.outer.header: Variant }
 ;
 ; CHECK-SCEV: Classifying expressions for: @test_function
 ; CHECK-SCEV:   %wide1 = load i32, ptr @offset, align 1
@@ -32,7 +32,7 @@
 ; CHECK-SCEV:   %narrow = trunc i32 %wide2 to i16
 ; CHECK-SCEV:   -->  (trunc i32 %wide2 to i16) U: full-set S: full-set               Exits: <<Unknown>>              LoopDispositions: { %loop.inner.ph: Variant, %loop.inner: Invariant }
 ; CHECK-SCEV:   %iv = phi i16 [ %narrow, %loop.inner.ph ], [ %iv.plus, %loop.inner ]
-; CHECK-SCEV:   -->  {(trunc i32 %wide2 to i16),+,1}<%loop.inner> U: full-set S: full-set           Exits: (-1 + (700 umax (1 + (trunc i32 %wide2 to i16))))               LoopDispositions: { %loop.inner: Computable, %loop.inner.ph: Variant }
+; CHECK-SCEV:   -->  {(trunc i32 %wide2 to i16),+,1}<nw><%loop.inner> U: full-set S: full-set           Exits: (-1 + (700 umax (1 + (trunc i32 %wide2 to i16))))               LoopDispositions: { %loop.inner: Computable, %loop.inner.ph: Variant }
 
 
 @offset = external dso_local global i32, align 1

diff  --git a/llvm/test/Transforms/LoopVersioning/lcssa.ll b/llvm/test/Transforms/LoopVersioning/lcssa.ll
index ee14f693abd7a..4b51c21257243 100644
--- a/llvm/test/Transforms/LoopVersioning/lcssa.ll
+++ b/llvm/test/Transforms/LoopVersioning/lcssa.ll
@@ -56,7 +56,6 @@ define void @fill_no_null_opt(i8** %ls1.20, i8** %ls2.21, i8* %cse3.22) #0 {
 ; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP]], [[SCEVGEP2]]
 ; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[LS2_21_PROMOTED]], [[SCEVGEP1]]
 ; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
-; CHECK-NEXT:    [[SCEVGEP3:%.*]] = getelementptr i8, i8* [[LS1_20_PROMOTED]], i64 -1
 ; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label %bb1.ph.lver.orig, label %bb1.ph
 ; CHECK:       bb1.ph.lver.orig:
 ;