[llvm] r273079 - [SCEV] Fix incorrect trip count computation
Christof Douma via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 1 09:07:07 PDT 2016
Hi Sanjoy.
This fix in scalar evolution has the unfortunate effect that loop unroll
is unable to fully unroll the following simple loop:
void foo(int x, int *p) {
x += 2; //any value bigger than 1
for (int i = x; i <= x+1; ++i) p[i] = i;
}
I executed it with: ./bin/clang --target=aarch64-arm-none-eabi -mllvm
-debug-only=loop-unroll -O3
Without your patch it knows the trip count, with your patch it does not. I
had expected the above loop would still be recognised as having a trip
count of 2 as it has no wrapping adds. Any reason why this is no longer
the case? This does regress our benchmarks.
Thanks,
Christof
On 18/06/2016 05:38, "llvm-commits on behalf of Sanjoy Das via
llvm-commits" <llvm-commits-bounces at lists.llvm.org on behalf of
llvm-commits at lists.llvm.org> wrote:
>Author: sanjoy
>Date: Fri Jun 17 23:38:31 2016
>New Revision: 273079
>
>URL: http://llvm.org/viewvc/llvm-project?rev=273079&view=rev
>Log:
>[SCEV] Fix incorrect trip count computation
>
>The way we elide max expressions when computing trip counts is incorrect
>-- it breaks cases like this:
>
>```
>static int wrapping_add(int a, int b) {
> return (int)((unsigned)a + (unsigned)b);
>}
>
>void test() {
> volatile int end_buf = 2147483548; // INT_MIN - 100
> int end = end_buf;
>
> unsigned counter = 0;
> for (int start = wrapping_add(end, 200); start < end; start++)
> counter++;
>
> print(counter);
>}
>```
>
>Note: the `NoWrap` variable that was being tested has little to do with
>the values flowing into the max expression; it is a property of the
>induction variable.
>
>test/Transforms/LoopUnroll/nsw-tripcount.ll was added to solely test
>functionality I'm reverting in this change, so I've deleted the test
>fully.
>
>Added:
> llvm/trunk/test/Analysis/ScalarEvolution/trip-count13.ll
>Removed:
> llvm/trunk/test/Transforms/LoopUnroll/nsw-tripcount.ll
>Modified:
> llvm/trunk/lib/Analysis/ScalarEvolution.cpp
> llvm/trunk/test/Analysis/ScalarEvolution/nsw.ll
>
>Modified: llvm/trunk/lib/Analysis/ScalarEvolution.cpp
>URL:
>http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ScalarEvolutio
>n.cpp?rev=273079&r1=273078&r2=273079&view=diff
>==========================================================================
>====
>--- llvm/trunk/lib/Analysis/ScalarEvolution.cpp (original)
>+++ llvm/trunk/lib/Analysis/ScalarEvolution.cpp Fri Jun 17 23:38:31 2016
>@@ -8668,18 +8668,8 @@ ScalarEvolution::howManyLessThans(const
> : ICmpInst::ICMP_ULT;
> const SCEV *Start = IV->getStart();
> const SCEV *End = RHS;
>- if (!isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride),
>RHS)) {
>- const SCEV *Diff = getMinusSCEV(RHS, Start);
>- // If we have NoWrap set, then we can assume that the increment won't
>- // overflow, in which case if RHS - Start is a constant, we don't
>need to
>- // do a max operation since we can just figure it out statically
>- if (NoWrap && isa<SCEVConstant>(Diff)) {
>- if (cast<SCEVConstant>(Diff)->getAPInt().isNegative())
>- End = Start;
>- } else
>- End = IsSigned ? getSMaxExpr(RHS, Start)
>- : getUMaxExpr(RHS, Start);
>- }
>+ if (!isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride),
>RHS))
>+ End = IsSigned ? getSMaxExpr(RHS, Start) : getUMaxExpr(RHS, Start);
>
> const SCEV *BECount = computeBECount(getMinusSCEV(End, Start), Stride,
>false);
>
>@@ -8754,18 +8744,8 @@ ScalarEvolution::howManyGreaterThans(con
>
> const SCEV *Start = IV->getStart();
> const SCEV *End = RHS;
>- if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride),
>RHS)) {
>- const SCEV *Diff = getMinusSCEV(RHS, Start);
>- // If we have NoWrap set, then we can assume that the increment won't
>- // overflow, in which case if RHS - Start is a constant, we don't
>need to
>- // do a max operation since we can just figure it out statically
>- if (NoWrap && isa<SCEVConstant>(Diff)) {
>- if (!cast<SCEVConstant>(Diff)->getAPInt().isNegative())
>- End = Start;
>- } else
>- End = IsSigned ? getSMinExpr(RHS, Start)
>- : getUMinExpr(RHS, Start);
>- }
>+ if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS))
>+ End = IsSigned ? getSMinExpr(RHS, Start) : getUMinExpr(RHS, Start);
>
> const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride,
>false);
>
>
>Modified: llvm/trunk/test/Analysis/ScalarEvolution/nsw.ll
>URL:
>http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/ScalarEvoluti
>on/nsw.ll?rev=273079&r1=273078&r2=273079&view=diff
>==========================================================================
>====
>--- llvm/trunk/test/Analysis/ScalarEvolution/nsw.ll (original)
>+++ llvm/trunk/test/Analysis/ScalarEvolution/nsw.ll Fri Jun 17 23:38:31
>2016
>@@ -126,7 +126,7 @@ exit:
> }
>
> ; CHECK-LABEL: PR12375
>-; CHECK: --> {(4 + %arg)<nsw>,+,4}<nuw><%bb1>{{ U: [^ ]+ S: [^ ]+}}{{
>*}}Exits: (8 + %arg)<nsw>
>+; CHECK: --> {(4 + %arg)<nsw>,+,4}<nuw><%bb1>{{ U: [^ ]+ S: [^ ]+}}{{
>*}}Exits: (4 + (4 * ((-1 + (-1 * %arg) + ((4 + %arg)<nsw> umax (8 +
>%arg)<nsw>)) /u 4)) + %arg)
> define i32 @PR12375(i32* readnone %arg) {
> bb:
> %tmp = getelementptr inbounds i32, i32* %arg, i64 2
>@@ -163,7 +163,7 @@ bb5:
> declare void @f(i32)
>
> ; CHECK-LABEL: nswnowrap
>-; CHECK: --> {(1 + %v)<nsw>,+,1}<nsw><%for.body>{{ U: [^ ]+ S: [^ ]+}}{{
>*}}Exits: (2 + %v)
>+; CHECK: --> {(1 + %v)<nsw>,+,1}<nsw><%for.body>{{ U: [^ ]+ S: [^ ]+}}{{
>*}}Exits: (1 + ((1 + %v)<nsw> smax %v))
> define void @nswnowrap(i32 %v, i32* %buf) {
> entry:
> %add = add nsw i32 %v, 1
>
>Added: llvm/trunk/test/Analysis/ScalarEvolution/trip-count13.ll
>URL:
>http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/ScalarEvoluti
>on/trip-count13.ll?rev=273079&view=auto
>==========================================================================
>====
>--- llvm/trunk/test/Analysis/ScalarEvolution/trip-count13.ll (added)
>+++ llvm/trunk/test/Analysis/ScalarEvolution/trip-count13.ll Fri Jun 17
>23:38:31 2016
>@@ -0,0 +1,81 @@
>+; RUN: opt -S -analyze -scalar-evolution < %s | FileCheck %s
>+
>+define void @u_0(i8 %rhs) {
>+; E.g.: %rhs = 255, %start = 99, backedge taken 156 times
>+entry:
>+ %start = add i8 %rhs, 100
>+ br label %loop
>+
>+loop:
>+ %iv = phi i8 [ %start, %entry ], [ %iv.inc, %loop ]
>+ %iv.inc = add nuw i8 %iv, 1 ;; Note: this never unsigned-wraps
>+ %iv.cmp = icmp ult i8 %iv, %rhs
>+ br i1 %iv.cmp, label %loop, label %leave
>+
>+; CHECK-LABEL: Determining loop execution counts for: @u_0
>+; CHECK-NEXT: Loop %loop: backedge-taken count is (-100 + (-1 * %rhs) +
>((100 + %rhs) umax %rhs))
>+; CHECK-NEXT: Loop %loop: max backedge-taken count is -1
>+
>+leave:
>+ ret void
>+}
>+
>+define void @u_1(i8 %start) {
>+entry:
>+; E.g.: %start = 99, %rhs = 255, backedge taken 156 times
>+ %rhs = add i8 %start, -100
>+ br label %loop
>+
>+loop:
>+ %iv = phi i8 [ %start, %entry ], [ %iv.inc, %loop ]
>+ %iv.inc = add nuw i8 %iv, 1 ;; Note: this never unsigned-wraps
>+ %iv.cmp = icmp ult i8 %iv, %rhs
>+ br i1 %iv.cmp, label %loop, label %leave
>+
>+; CHECK-LABEL: Determining loop execution counts for: @u_1
>+; CHECK-NEXT: Loop %loop: backedge-taken count is ((-1 * %start) +
>((-100 + %start) umax %start))
>+; CHECK-NEXT: Loop %loop: max backedge-taken count is -1
>+
>+leave:
>+ ret void
>+}
>+
>+define void @s_0(i8 %rhs) {
>+entry:
>+; E.g.: %rhs = 127, %start = -29, backedge taken 156 times
>+ %start = add i8 %rhs, 100
>+ br label %loop
>+
>+loop:
>+ %iv = phi i8 [ %start, %entry ], [ %iv.inc, %loop ]
>+ %iv.inc = add nsw i8 %iv, 1 ;; Note: this never signed-wraps
>+ %iv.cmp = icmp slt i8 %iv, %rhs
>+ br i1 %iv.cmp, label %loop, label %leave
>+
>+; CHECK-LABEL: Determining loop execution counts for: @s_0
>+; CHECK-NEXT: Loop %loop: backedge-taken count is (-100 + (-1 * %rhs) +
>((100 + %rhs) smax %rhs))
>+; CHECK-NEXT: Loop %loop: max backedge-taken count is -1
>+
>+leave:
>+ ret void
>+}
>+
>+define void @s_1(i8 %start) {
>+entry:
>+; E.g.: start = -29, %rhs = 127, %backedge taken 156 times
>+ %rhs = add i8 %start, -100
>+ br label %loop
>+
>+loop:
>+ %iv = phi i8 [ %start, %entry ], [ %iv.inc, %loop ]
>+ %iv.inc = add nsw i8 %iv, 1
>+ %iv.cmp = icmp slt i8 %iv, %rhs
>+ br i1 %iv.cmp, label %loop, label %leave
>+
>+; CHECK-LABEL: Determining loop execution counts for: @s_1
>+; CHECK-NEXT: Loop %loop: backedge-taken count is ((-1 * %start) +
>((-100 + %start) smax %start))
>+; CHECK-NEXT: Loop %loop: max backedge-taken count is -1
>+
>+leave:
>+ ret void
>+}
>
>Removed: llvm/trunk/test/Transforms/LoopUnroll/nsw-tripcount.ll
>URL:
>http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/
>nsw-tripcount.ll?rev=273078&view=auto
>==========================================================================
>====
>--- llvm/trunk/test/Transforms/LoopUnroll/nsw-tripcount.ll (original)
>+++ llvm/trunk/test/Transforms/LoopUnroll/nsw-tripcount.ll (removed)
>@@ -1,32 +0,0 @@
>-; RUN: opt -loop-unroll -S %s | FileCheck %s
>-
>-; extern void f(int);
>-; void test1(int v) {
>-; for (int i=v; i<=v+1; ++i)
>-; f(i);
>-; }
>-;
>-; We can use the nsw information to see that the tripcount will be 2, so
>the
>-; loop should be unrolled as this is always beneficial
>-
>-declare void @f(i32)
>-
>-; CHECK-LABEL: @test1
>-define void @test1(i32 %v) {
>-entry:
>- %add = add nsw i32 %v, 1
>- br label %for.body
>-
>-for.body:
>- %i.04 = phi i32 [ %v, %entry ], [ %inc, %for.body ]
>- tail call void @f(i32 %i.04)
>- %inc = add nsw i32 %i.04, 1
>- %cmp = icmp slt i32 %i.04, %add
>- br i1 %cmp, label %for.body, label %for.end
>-
>-; CHECK: call void @f
>-; CHECK-NOT: br i1
>-; CHECK: call void @f
>-for.end:
>- ret void
>-}
>
>
>_______________________________________________
>llvm-commits mailing list
>llvm-commits at lists.llvm.org
>http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list