[llvm] b14be1e - [SCEV] Use object size for globals to sharpen ranges.

Sat Apr 29 13:33:51 PDT 2023

Author: Florian Hahn
Date: 2023-04-29T21:33:30+01:00
New Revision: b14be1e7c0530c3c82e965104f160a4b357f91ed

URL: https://github.com/llvm/llvm-project/commit/b14be1e7c0530c3c82e965104f160a4b357f91ed
DIFF: https://github.com/llvm/llvm-project/commit/b14be1e7c0530c3c82e965104f160a4b357f91ed.diff

LOG: [SCEV] Use object size for globals to sharpen ranges.

The highest address the object can start is ObjSize bytes before the
end (unsigned max value). If this value is not a multiple of the
alignment, the last possible start value is the next lowest multiple
of the alignment. Note: The computations cannot overflow,
because if they would there's no possible start address for the
object.

At the moment, this is limited to GlobalVariables, because I could not
find a API similar to getObjectSize to also get the alignment of the
object. With such an API, this can be generalized to general addresses.

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D149483

Added: 
    

Modified: 
    llvm/lib/Analysis/ScalarEvolution.cpp
    llvm/test/Analysis/ScalarEvolution/incorrect-exit-count.ll
    llvm/test/Analysis/ScalarEvolution/load.ll
    llvm/test/Analysis/ScalarEvolution/ptrtoint-global.ll
    llvm/test/Transforms/IndVarSimplify/ptrtoint-global-cmp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 641bf7b7f8d17..306090f3eb11c 100644

--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -76,6 +76,7 @@
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -6773,6 +6774,31 @@ const ConstantRange &ScalarEvolution::getRangeRef(
                         APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1),
           RangeType);
 
+    if (U->getType()->isPointerTy() && SignHint == HINT_RANGE_UNSIGNED) {
+      // Strengthen the range if the underlying IR value is a global using the
+      // size of the global.
+      ObjectSizeOpts Opts;
+      Opts.RoundToAlign = false;
+      Opts.NullIsUnknownSize = true;
+      uint64_t ObjSize;
+      auto *GV = dyn_cast<GlobalVariable>(U->getValue());
+      if (GV && getObjectSize(U->getValue(), ObjSize, DL, &TLI, Opts) &&
+          ObjSize > 1) {
+        // The highest address the object can start is ObjSize bytes before the
+        // end (unsigned max value). If this value is not a multiple of the
+        // alignment, the last possible start value is the next lowest multiple
+        // of the alignment. Note: The computations below cannot overflow,
+        // because if they would there's no possible start address for the
+        // object.
+        APInt MaxVal = APInt::getMaxValue(BitWidth) - APInt(BitWidth, ObjSize);
+        uint64_t Align = GV->getAlign().valueOrOne().value();
+        uint64_t Rem = MaxVal.urem(Align);
+        MaxVal -= APInt(BitWidth, Rem);
+        ConservativeResult = ConservativeResult.intersectWith(
+            {ConservativeResult.getUnsignedMin(), MaxVal + 1}, RangeType);
+      }
+    }
+
     // A range of Phi is a subset of union of all ranges of its input.
     if (PHINode *Phi = dyn_cast<PHINode>(U->getValue())) {
       // Make sure that we do not run over cycled Phis.

diff  --git a/llvm/test/Analysis/ScalarEvolution/incorrect-exit-count.ll b/llvm/test/Analysis/ScalarEvolution/incorrect-exit-count.ll
index 98b832dedacea..825b4abb5e8c5 100644
--- a/llvm/test/Analysis/ScalarEvolution/incorrect-exit-count.ll
+++ b/llvm/test/Analysis/ScalarEvolution/incorrect-exit-count.ll
@@ -21,7 +21,7 @@ define dso_local i32 @f() {
 ; CHECK-NEXT:    %idxprom20 = zext i32 %storemerge1921 to i64
 ; CHECK-NEXT:    --> (zext i32 {3,+,-1}<nsw><%for.cond6> to i64) U: [3,4) S: [3,4) Exits: <<Unknown>> LoopDispositions: { %for.cond6: Computable, %outer.loop: Variant }
 ; CHECK-NEXT:    %arrayidx7 = getelementptr inbounds [1 x [4 x i16]], ptr @__const.f.g, i64 0, i64 0, i64 %idxprom20
-; CHECK-NEXT:    --> ((2 * (zext i32 {3,+,-1}<nsw><%for.cond6> to i64))<nuw><nsw> + @__const.f.g)<nuw> U: [6,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %for.cond6: Computable, %outer.loop: Variant }
+; CHECK-NEXT:    --> ((2 * (zext i32 {3,+,-1}<nsw><%for.cond6> to i64))<nuw><nsw> + @__const.f.g)<nuw> U: [6,-3) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %for.cond6: Computable, %outer.loop: Variant }
 ; CHECK-NEXT:    %i = load i16, ptr %arrayidx7, align 2
 ; CHECK-NEXT:    --> %i U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond6: Variant, %outer.loop: Variant }
 ; CHECK-NEXT:    %storemerge1822.lcssa.ph = phi i32 [ 0, %for.cond6 ]
@@ -45,7 +45,7 @@ define dso_local i32 @f() {
 ; CHECK-NEXT:    %idxprom20.3 = zext i32 %storemerge1921.3 to i64
 ; CHECK-NEXT:    --> (zext i32 {3,+,-1}<nsw><%inner.loop> to i64) U: [3,4) S: [3,4) Exits: <<Unknown>> LoopDispositions: { %inner.loop: Computable, %outer.loop: Variant }
 ; CHECK-NEXT:    %arrayidx7.3 = getelementptr inbounds [1 x [4 x i16]], ptr @__const.f.g, i64 0, i64 0, i64 %idxprom20.3
-; CHECK-NEXT:    --> ((2 * (zext i32 {3,+,-1}<nsw><%inner.loop> to i64))<nuw><nsw> + @__const.f.g)<nuw> U: [6,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %inner.loop: Computable, %outer.loop: Variant }
+; CHECK-NEXT:    --> ((2 * (zext i32 {3,+,-1}<nsw><%inner.loop> to i64))<nuw><nsw> + @__const.f.g)<nuw> U: [6,-3) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %inner.loop: Computable, %outer.loop: Variant }
 ; CHECK-NEXT:    %i7 = load i16, ptr %arrayidx7.3, align 2
 ; CHECK-NEXT:    --> %i7 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %inner.loop: Variant, %outer.loop: Variant }
 ; CHECK-NEXT:    %i8 = load volatile i32, ptr @b, align 4

diff  --git a/llvm/test/Analysis/ScalarEvolution/load.ll b/llvm/test/Analysis/ScalarEvolution/load.ll
index 5ba3dff9d9df1..b95ad89f2def9 100644
--- a/llvm/test/Analysis/ScalarEvolution/load.ll
+++ b/llvm/test/Analysis/ScalarEvolution/load.ll
@@ -16,11 +16,11 @@ define i32 @test1() nounwind readnone {
 ; CHECK-NEXT:    %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
 ; CHECK-NEXT:    --> {0,+,1}<nuw><nsw><%for.body> U: [0,50) S: [0,50) Exits: 49 LoopDispositions: { %for.body: Computable }
 ; CHECK-NEXT:    %arrayidx = getelementptr inbounds [50 x i32], ptr @arr1, i32 0, i32 %i.03
-; CHECK-NEXT:    --> {@arr1,+,4}<nuw><%for.body> U: [0,-3) S: [-2147483648,2147483645) Exits: (196 + @arr1) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:    --> {@arr1,+,4}<nuw><%for.body> U: [0,-7) S: [-2147483648,2147483645) Exits: (196 + @arr1)<nuw> LoopDispositions: { %for.body: Computable }
 ; CHECK-NEXT:    %0 = load i32, ptr %arrayidx, align 4
 ; CHECK-NEXT:    --> %0 U: full-set S: full-set Exits: 50 LoopDispositions: { %for.body: Variant }
 ; CHECK-NEXT:    %arrayidx1 = getelementptr inbounds [50 x i32], ptr @arr2, i32 0, i32 %i.03
-; CHECK-NEXT:    --> {@arr2,+,4}<nuw><%for.body> U: [0,-3) S: [-2147483648,2147483645) Exits: (196 + @arr2) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT:    --> {@arr2,+,4}<nuw><%for.body> U: [0,-7) S: [-2147483648,2147483645) Exits: (196 + @arr2)<nuw> LoopDispositions: { %for.body: Computable }
 ; CHECK-NEXT:    %1 = load i32, ptr %arrayidx1, align 4
 ; CHECK-NEXT:    --> %1 U: full-set S: full-set Exits: 0 LoopDispositions: { %for.body: Variant }
 ; CHECK-NEXT:    %add = add i32 %0, %sum.04

diff  --git a/llvm/test/Analysis/ScalarEvolution/ptrtoint-global.ll b/llvm/test/Analysis/ScalarEvolution/ptrtoint-global.ll
index a76acbb77ac7b..dc73e28db9527 100644
--- a/llvm/test/Analysis/ScalarEvolution/ptrtoint-global.ll
+++ b/llvm/test/Analysis/ScalarEvolution/ptrtoint-global.ll
@@ -10,7 +10,7 @@ define i64 @ptrtoint_align_2_size_4_add_5() {
 ; CHECK-LABEL: 'ptrtoint_align_2_size_4_add_5'
 ; CHECK-NEXT:  Classifying expressions for: @ptrtoint_align_2_size_4_add_5
 ; CHECK-NEXT:    %add = add i64 ptrtoint (ptr @glob.i32.align2 to i64), 5
-; CHECK-NEXT:    --> (5 + (ptrtoint ptr @glob.i32.align2 to i64)) U: [5,4) S: [-9223372036854775803,-9223372036854775804)
+; CHECK-NEXT:    --> (5 + (ptrtoint ptr @glob.i32.align2 to i64))<nuw> U: [5,0) S: [5,0)
 ; CHECK-NEXT:  Determining loop execution counts for: @ptrtoint_align_2_size_4_add_5
 ;
 entry:
@@ -82,7 +82,7 @@ define i64 @ptrtoint_align_16_size_16_add_16() {
 ; CHECK-LABEL: 'ptrtoint_align_16_size_16_add_16'
 ; CHECK-NEXT:  Classifying expressions for: @ptrtoint_align_16_size_16_add_16
 ; CHECK-NEXT:    %add = add i64 ptrtoint (ptr @array4xi32 to i64), 16
-; CHECK-NEXT:    --> (16 + (ptrtoint ptr @array4xi32 to i64)) U: [0,-15) S: [-9223372036854775808,9223372036854775793)
+; CHECK-NEXT:    --> (16 + (ptrtoint ptr @array4xi32 to i64))<nuw> U: [16,-15) S: [-9223372036854775808,9223372036854775793)
 ; CHECK-NEXT:  Determining loop execution counts for: @ptrtoint_align_16_size_16_add_16
 ;
 entry:
@@ -94,7 +94,7 @@ define i64 @ptrtoint_align_16_size_16_add_31() {
 ; CHECK-LABEL: 'ptrtoint_align_16_size_16_add_31'
 ; CHECK-NEXT:  Classifying expressions for: @ptrtoint_align_16_size_16_add_31
 ; CHECK-NEXT:    %add = add i64 ptrtoint (ptr @array4xi32 to i64), 31
-; CHECK-NEXT:    --> (31 + (ptrtoint ptr @array4xi32 to i64)) U: [31,16) S: [-9223372036854775777,-9223372036854775792)
+; CHECK-NEXT:    --> (31 + (ptrtoint ptr @array4xi32 to i64))<nuw> U: [31,0) S: [31,0)
 ; CHECK-NEXT:  Determining loop execution counts for: @ptrtoint_align_16_size_16_add_31
 ;
 entry:
@@ -118,7 +118,7 @@ define i64 @ptrtoint_align_16_size_16_add_33() {
 ; CHECK-LABEL: 'ptrtoint_align_16_size_16_add_33'
 ; CHECK-NEXT:  Classifying expressions for: @ptrtoint_align_16_size_16_add_33
 ; CHECK-NEXT:    %add = add i64 ptrtoint (ptr @array4xi32 to i64), 33
-; CHECK-NEXT:    --> (33 + (ptrtoint ptr @array4xi32 to i64)) U: [33,18) S: [-9223372036854775775,-9223372036854775790)
+; CHECK-NEXT:    --> (33 + (ptrtoint ptr @array4xi32 to i64)) U: [33,2) S: [-9223372036854775775,-9223372036854775790)
 ; CHECK-NEXT:  Determining loop execution counts for: @ptrtoint_align_16_size_16_add_33
 ;
 entry:
@@ -130,13 +130,13 @@ define i64 @ptrtoint_align_16_size_16_add_16_umax_sub() {
 ; CHECK-LABEL: 'ptrtoint_align_16_size_16_add_16_umax_sub'
 ; CHECK-NEXT:  Classifying expressions for: @ptrtoint_align_16_size_16_add_16_umax_sub
 ; CHECK-NEXT:    %add.16 = add i64 ptrtoint (ptr @array4xi32 to i64), 16
-; CHECK-NEXT:    --> (16 + (ptrtoint ptr @array4xi32 to i64)) U: [0,-15) S: [-9223372036854775808,9223372036854775793)
+; CHECK-NEXT:    --> (16 + (ptrtoint ptr @array4xi32 to i64))<nuw> U: [16,-15) S: [-9223372036854775808,9223372036854775793)
 ; CHECK-NEXT:    %umax = call i64 @llvm.umax.i64(i64 ptrtoint (ptr @array4xi32 to i64), i64 %add.16)
-; CHECK-NEXT:    --> ((16 + (ptrtoint ptr @array4xi32 to i64)) umax (ptrtoint ptr @array4xi32 to i64)) U: [0,-15) S: [-9223372036854775808,9223372036854775793)
+; CHECK-NEXT:    --> (16 + (ptrtoint ptr @array4xi32 to i64))<nuw> U: [16,-15) S: [-9223372036854775808,9223372036854775793)
 ; CHECK-NEXT:    %add = add i64 %umax, 16
-; CHECK-NEXT:    --> (16 + ((16 + (ptrtoint ptr @array4xi32 to i64)) umax (ptrtoint ptr @array4xi32 to i64))) U: [0,-15) S: [-9223372036854775808,9223372036854775793)
+; CHECK-NEXT:    --> (32 + (ptrtoint ptr @array4xi32 to i64)) U: [0,-15) S: [-9223372036854775808,9223372036854775793)
 ; CHECK-NEXT:    %sub = sub i64 %add, ptrtoint (ptr @array4xi32 to i64)
-; CHECK-NEXT:    --> (16 + (-1 * (ptrtoint ptr @array4xi32 to i64)) + ((16 + (ptrtoint ptr @array4xi32 to i64)) umax (ptrtoint ptr @array4xi32 to i64))) U: [0,-15) S: [-9223372036854775808,9223372036854775793)
+; CHECK-NEXT:    --> 32 U: [32,33) S: [32,33)
 ; CHECK-NEXT:  Determining loop execution counts for: @ptrtoint_align_16_size_16_add_16_umax_sub
 ;
 entry:
@@ -151,13 +151,13 @@ define i64 @ptrtoint_align_16_size_16_add_31_umax_sub() {
 ; CHECK-LABEL: 'ptrtoint_align_16_size_16_add_31_umax_sub'
 ; CHECK-NEXT:  Classifying expressions for: @ptrtoint_align_16_size_16_add_31_umax_sub
 ; CHECK-NEXT:    %add.31 = add i64 ptrtoint (ptr @array4xi32 to i64), 31
-; CHECK-NEXT:    --> (31 + (ptrtoint ptr @array4xi32 to i64)) U: [31,16) S: [-9223372036854775777,-9223372036854775792)
+; CHECK-NEXT:    --> (31 + (ptrtoint ptr @array4xi32 to i64))<nuw> U: [31,0) S: [31,0)
 ; CHECK-NEXT:    %umax = call i64 @llvm.umax.i64(i64 ptrtoint (ptr @array4xi32 to i64), i64 %add.31)
-; CHECK-NEXT:    --> ((31 + (ptrtoint ptr @array4xi32 to i64)) umax (ptrtoint ptr @array4xi32 to i64)) U: full-set S: full-set
+; CHECK-NEXT:    --> (31 + (ptrtoint ptr @array4xi32 to i64))<nuw> U: [31,0) S: [31,0)
 ; CHECK-NEXT:    %add = add i64 %umax, 16
-; CHECK-NEXT:    --> (16 + ((31 + (ptrtoint ptr @array4xi32 to i64)) umax (ptrtoint ptr @array4xi32 to i64))) U: full-set S: full-set
+; CHECK-NEXT:    --> (47 + (ptrtoint ptr @array4xi32 to i64)) U: [47,16) S: [-9223372036854775761,-9223372036854775776)
 ; CHECK-NEXT:    %sub = sub i64 %add, ptrtoint (ptr @array4xi32 to i64)
-; CHECK-NEXT:    --> (16 + (-1 * (ptrtoint ptr @array4xi32 to i64)) + ((31 + (ptrtoint ptr @array4xi32 to i64)) umax (ptrtoint ptr @array4xi32 to i64))) U: full-set S: full-set
+; CHECK-NEXT:    --> 47 U: [47,48) S: [47,48)
 ; CHECK-NEXT:  Determining loop execution counts for: @ptrtoint_align_16_size_16_add_31_umax_sub
 ;
 entry:

diff  --git a/llvm/test/Transforms/IndVarSimplify/ptrtoint-global-cmp.ll b/llvm/test/Transforms/IndVarSimplify/ptrtoint-global-cmp.ll
index 4d43f2a7299e8..e06d83753fe29 100644
--- a/llvm/test/Transforms/IndVarSimplify/ptrtoint-global-cmp.ll
+++ b/llvm/test/Transforms/IndVarSimplify/ptrtoint-global-cmp.ll
@@ -6,25 +6,18 @@
 define i32 @test() {
 ; CHECK-LABEL: define i32 @test() {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 ptrtoint (ptr @a to i64), i64 add (i64 ptrtoint (ptr @a to i64), i64 16))
-; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[UMAX]], 3
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i64 [[TMP0]], ptrtoint (ptr @a to i64)
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP1]], 2
-; CHECK-NEXT:    [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP2]], i64 4)
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[TMP2]], [[UMIN]]
 ; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
 ; CHECK:       loop.header:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
 ; CHECK-NEXT:    [[RED:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[RED_NEXT:%.*]], [[LOOP_LATCH]] ]
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = zext i8 [[IV]] to i64
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, ptr @a, i64 [[IDXPROM]]
-; CHECK-NEXT:    br i1 [[TMP3]], label [[LOOP_LATCH]], label [[EXIT_1:%.*]]
+; CHECK-NEXT:    br i1 false, label [[LOOP_LATCH]], label [[EXIT_1:%.*]]
 ; CHECK:       loop.latch:
 ; CHECK-NEXT:    [[L:%.*]] = load i32, ptr [[GEP]], align 4
 ; CHECK-NEXT:    [[RED_NEXT]] = add nsw i32 [[L]], [[RED]]
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i8 [[IV]], 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[IV]], 4
-; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP_HEADER]], label [[EXIT_2:%.*]]
+; CHECK-NEXT:    br i1 true, label [[LOOP_HEADER]], label [[EXIT_2:%.*]]
 ; CHECK:       exit.1:
 ; CHECK-NEXT:    ret i32 0
 ; CHECK:       exit.2: