[llvm] SCEV: teach isImpliedViaOperations about samesign (PR #124270)
Yingwei Zheng via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 29 02:10:59 PST 2025
dtcxzyw wrote:
> I'm confused about where the regression is coming from because the output from `opt -passes='print<scalar-evolution>'` is identical (?)
Further reduced case:
```
; bin/opt -passes="print<scalar-evolution>,indvars" reduced.ll -S
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
define i32 @QRsol(i32 %0, ptr nocapture writeonly %1) local_unnamed_addr {
%3 = icmp sgt i32 %0, 0
br i1 %3, label %.lr.ph4.preheader, label %._crit_edge
.lr.ph4.preheader: ; preds = %2
br label %.lr.ph4
.loopexit.loopexit: ; preds = %.lr.ph
br label %.loopexit
.loopexit: ; preds = %.loopexit.loopexit, %.lr.ph4
%4 = icmp sgt i32 %.03.in, 1
br i1 %4, label %.lr.ph4, label %._crit_edge.loopexit
.lr.ph4: ; preds = %.lr.ph4.preheader, %.loopexit
%.03.in = phi i32 [ %.03, %.loopexit ], [ %0, %.lr.ph4.preheader ]
%.03 = add nsw i32 %.03.in, -1
%5 = zext nneg i32 %.03 to i64
%6 = getelementptr double, ptr null, i64 %5
store double poison, ptr %6, align 8
%7 = icmp samesign ugt i32 %.03.in, 1
br i1 %7, label %.lr.ph.preheader, label %.loopexit
.lr.ph.preheader: ; preds = %.lr.ph4
br label %.lr.ph
.lr.ph: ; preds = %.lr.ph.preheader, %.lr.ph
%.081 = phi i32 [ %10, %.lr.ph ], [ 0, %.lr.ph.preheader ]
%8 = zext nneg i32 %.081 to i64
%9 = getelementptr double, ptr %1, i64 %8
store double 1.000000e+00, ptr %9, align 8
%10 = add nuw nsw i32 %.081, 1
%11 = icmp slt i32 %10, %.03
br i1 %11, label %.lr.ph, label %.loopexit.loopexit
._crit_edge.loopexit: ; preds = %.loopexit
br label %._crit_edge
._crit_edge: ; preds = %._crit_edge.loopexit, %2
ret i32 0
}
```
Before:
```
Classifying expressions for: @QRsol
%.03.in = phi i32 [ %.03, %.loopexit ], [ %0, %.lr.ph4.preheader ]
--> {%0,+,-1}<nsw><%.lr.ph4> U: full-set S: full-set Exits: 1 LoopDispositions: { %.lr.ph4: Computable, %.lr.ph: Invariant }
%.03 = add nsw i32 %.03.in, -1
--> {(-1 + %0),+,-1}<nsw><%.lr.ph4> U: full-set S: full-set Exits: 0 LoopDispositions: { %.lr.ph4: Computable, %.lr.ph: Invariant }
%5 = zext nneg i32 %.03 to i64
--> {(zext i32 (-1 + %0) to i64),+,-1}<nsw><%.lr.ph4> U: [-2147483646,4294967296) S: [-2147483646,4294967296) Exits: 0 LoopDispositions: { %.lr.ph4: Computable, %.lr.ph: Invariant }
%6 = getelementptr double, ptr null, i64 %5
--> {((8 * (zext i32 (-1 + %0) to i64))<nuw><nsw> + null),+,-8}<nw><%.lr.ph4> U: [0,-7) S: [-17179869168,34359738361) Exits: null LoopDispositions: { %.lr.ph4: Computable, %.lr.ph: Invariant }
%.081 = phi i32 [ %10, %.lr.ph ], [ 0, %.lr.ph.preheader ]
--> {0,+,1}<nuw><nsw><%.lr.ph> U: [0,2147483647) S: [0,2147483647) Exits: (-1 + (1 smax {(-1 + %0),+,-1}<nsw><%.lr.ph4>))<nsw> LoopDispositions: { %.lr.ph: Computable, %.lr.ph4: Variant }
%8 = zext nneg i32 %.081 to i64
--> {0,+,1}<nuw><nsw><%.lr.ph> U: [0,2147483647) S: [0,2147483647) Exits: (zext i32 (-1 + (1 smax {(-1 + %0),+,-1}<nsw><%.lr.ph4>))<nsw> to i64) LoopDispositions: { %.lr.ph: Computable, %.lr.ph4: Variant }
%9 = getelementptr double, ptr %1, i64 %8
--> {%1,+,8}<nw><%.lr.ph> U: full-set S: full-set Exits: ((8 * (zext i32 (-1 + (1 smax {(-1 + %0),+,-1}<nsw><%.lr.ph4>))<nsw> to i64))<nuw><nsw> + %1) LoopDispositions: { %.lr.ph: Computable, %.lr.ph4: Variant }
%10 = add nuw nsw i32 %.081, 1
--> {1,+,1}<nuw><nsw><%.lr.ph> U: [1,-2147483648) S: [1,-2147483648) Exits: (1 smax {(-1 + %0),+,-1}<nsw><%.lr.ph4>) LoopDispositions: { %.lr.ph: Computable, %.lr.ph4: Variant }
Determining loop execution counts for: @QRsol
Loop %.lr.ph: backedge-taken count is (-1 + (1 smax {(-1 + %0),+,-1}<nsw><%.lr.ph4>))<nsw>
Loop %.lr.ph: constant max backedge-taken count is i32 2147483646
Loop %.lr.ph: symbolic max backedge-taken count is (-1 + (1 smax {(-1 + %0),+,-1}<nsw><%.lr.ph4>))<nsw>
Loop %.lr.ph: Trip multiple is 1
Loop %.lr.ph4: backedge-taken count is (-1 + %0)
Loop %.lr.ph4: constant max backedge-taken count is i32 2147483646
Loop %.lr.ph4: symbolic max backedge-taken count is (-1 + %0)
Loop %.lr.ph4: Trip multiple is 1
; ModuleID = 'test.ll'
source_filename = "test.ll"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
define i32 @QRsol(i32 %0, ptr nocapture writeonly %1) local_unnamed_addr {
%3 = icmp sgt i32 %0, 0
br i1 %3, label %.lr.ph4.preheader, label %._crit_edge
.lr.ph4.preheader: ; preds = %2
%4 = sext i32 %0 to i64
br label %.lr.ph4
.loopexit.loopexit: ; preds = %.lr.ph
br label %.loopexit
.loopexit: ; preds = %.lr.ph4, %.loopexit.loopexit
%5 = icmp sgt i64 %indvars.iv2, 1
br i1 %5, label %.lr.ph4, label %._crit_edge.loopexit
.lr.ph4: ; preds = %.loopexit, %.lr.ph4.preheader
%indvars.iv2 = phi i64 [ %indvars.iv.next3, %.loopexit ], [ %4, %.lr.ph4.preheader ]
%indvars.iv.next3 = add nsw i64 %indvars.iv2, -1
%6 = getelementptr double, ptr null, i64 %indvars.iv.next3
store double poison, ptr %6, align 8
%7 = icmp samesign ugt i64 %indvars.iv2, 1
br i1 %7, label %.lr.ph.preheader, label %.loopexit
.lr.ph.preheader: ; preds = %.lr.ph4
br label %.lr.ph
.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader
%indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
%8 = getelementptr double, ptr %1, i64 %indvars.iv
store double 1.000000e+00, ptr %8, align 8
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%9 = icmp slt i64 %indvars.iv.next, %indvars.iv.next3
br i1 %9, label %.lr.ph, label %.loopexit.loopexit
._crit_edge.loopexit: ; preds = %.loopexit
br label %._crit_edge
._crit_edge: ; preds = %._crit_edge.loopexit, %2
ret i32 0
}
```
After:
```
Printing analysis 'Scalar Evolution Analysis' for function 'QRsol':
Classifying expressions for: @QRsol
%.03.in = phi i32 [ %.03, %.loopexit ], [ %0, %.lr.ph4.preheader ]
--> {%0,+,-1}<nsw><%.lr.ph4> U: full-set S: full-set Exits: 1 LoopDispositions: { %.lr.ph4: Computable, %.lr.ph: Invariant }
%.03 = add nsw i32 %.03.in, -1
--> {(-1 + %0),+,-1}<nsw><%.lr.ph4> U: full-set S: full-set Exits: 0 LoopDispositions: { %.lr.ph4: Computable, %.lr.ph: Invariant }
%5 = zext nneg i32 %.03 to i64
--> {(zext i32 (-1 + %0) to i64),+,-1}<nsw><%.lr.ph4> U: [-2147483646,4294967296) S: [-2147483646,4294967296) Exits: 0 LoopDispositions: { %.lr.ph4: Computable, %.lr.ph: Invariant }
%6 = getelementptr double, ptr null, i64 %5
--> {((8 * (zext i32 (-1 + %0) to i64))<nuw><nsw> + null),+,-8}<nw><%.lr.ph4> U: [0,-7) S: [-17179869168,34359738361) Exits: null LoopDispositions: { %.lr.ph4: Computable, %.lr.ph: Invariant }
%.081 = phi i32 [ %10, %.lr.ph ], [ 0, %.lr.ph.preheader ]
--> {0,+,1}<nuw><nsw><%.lr.ph> U: [0,2147483647) S: [0,2147483647) Exits: {(-2 + %0),+,-1}<nw><%.lr.ph4> LoopDispositions: { %.lr.ph: Computable, %.lr.ph4: Variant }
%8 = zext nneg i32 %.081 to i64
--> {0,+,1}<nuw><nsw><%.lr.ph> U: [0,2147483647) S: [0,2147483647) Exits: (zext i32 {(-2 + %0),+,-1}<nw><%.lr.ph4> to i64) LoopDispositions: { %.lr.ph: Computable, %.lr.ph4: Variant }
%9 = getelementptr double, ptr %1, i64 %8
--> {%1,+,8}<nw><%.lr.ph> U: full-set S: full-set Exits: ((8 * (zext i32 {(-2 + %0),+,-1}<nw><%.lr.ph4> to i64))<nuw><nsw> + %1) LoopDispositions: { %.lr.ph: Computable, %.lr.ph4: Variant }
%10 = add nuw nsw i32 %.081, 1
--> {1,+,1}<nuw><nsw><%.lr.ph> U: [1,-2147483648) S: [1,-2147483648) Exits: {(-1 + %0),+,-1}<nsw><%.lr.ph4> LoopDispositions: { %.lr.ph: Computable, %.lr.ph4: Variant }
Determining loop execution counts for: @QRsol
Loop %.lr.ph: backedge-taken count is {(-2 + %0),+,-1}<nw><%.lr.ph4>
Loop %.lr.ph: constant max backedge-taken count is i32 2147483646
Loop %.lr.ph: symbolic max backedge-taken count is {(-2 + %0),+,-1}<nw><%.lr.ph4>
Loop %.lr.ph: Trip multiple is 1
Loop %.lr.ph4: backedge-taken count is (-1 + %0)
Loop %.lr.ph4: constant max backedge-taken count is i32 2147483646
Loop %.lr.ph4: symbolic max backedge-taken count is (-1 + %0)
Loop %.lr.ph4: Trip multiple is 1
; ModuleID = 'test.ll'
source_filename = "test.ll"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
define i32 @QRsol(i32 %0, ptr nocapture writeonly %1) local_unnamed_addr {
%3 = icmp sgt i32 %0, 0
br i1 %3, label %.lr.ph4.preheader, label %._crit_edge
.lr.ph4.preheader: ; preds = %2
%4 = add i32 %0, -1
%5 = zext i32 %4 to i64
%6 = zext i32 %0 to i64
br label %.lr.ph4
.loopexit.loopexit: ; preds = %.lr.ph
br label %.loopexit
.loopexit: ; preds = %.lr.ph4, %.loopexit.loopexit
%7 = icmp sgt i64 %indvars.iv6, 1
%indvars.iv.next5 = add nsw i64 %indvars.iv4, -1
br i1 %7, label %.lr.ph4, label %._crit_edge.loopexit
.lr.ph4: ; preds = %.loopexit, %.lr.ph4.preheader
%indvars.iv6 = phi i64 [ %indvars.iv.next7, %.loopexit ], [ %6, %.lr.ph4.preheader ]
%indvars.iv4 = phi i64 [ %indvars.iv.next5, %.loopexit ], [ %5, %.lr.ph4.preheader ]
%indvars.iv.next7 = add nsw i64 %indvars.iv6, -1
%8 = getelementptr double, ptr null, i64 %indvars.iv.next7
store double poison, ptr %8, align 8
%9 = icmp samesign ugt i64 %indvars.iv6, 1
br i1 %9, label %.lr.ph.preheader, label %.loopexit
.lr.ph.preheader: ; preds = %.lr.ph4
br label %.lr.ph
.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader
%indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
%10 = getelementptr double, ptr %1, i64 %indvars.iv
store double 1.000000e+00, ptr %10, align 8
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp ne i64 %indvars.iv.next, %indvars.iv4
br i1 %exitcond, label %.lr.ph, label %.loopexit.loopexit
._crit_edge.loopexit: ; preds = %.loopexit
br label %._crit_edge
._crit_edge: ; preds = %._crit_edge.loopexit, %2
ret i32 0
}
```
https://github.com/llvm/llvm-project/pull/124270
More information about the llvm-commits
mailing list