[llvm] r356392 - [SCEV] Guard movement of insertion point for loop-invariants
Warren Ristow via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 18 11:52:35 PDT 2019
Author: wristow
Date: Mon Mar 18 11:52:35 2019
New Revision: 356392
URL: http://llvm.org/viewvc/llvm-project?rev=356392&view=rev
Log:
[SCEV] Guard movement of insertion point for loop-invariants
This reinstates r347934, along with a tweak to address a problem with
PHI node ordering that that commit created (or exposed). (That commit
was reverted at r348426, due to the PHI node issue.)
Original commit message:
r320789 suppressed moving the insertion point of SCEV expressions with
dev/rem operations to the loop header in non-loop-invariant situations.
This, and similar, hoisting is also unsafe in the loop-invariant case,
since there may be a guard against a zero denominator. This is an
adjustment to the fix of r320789 to suppress the movement even in the
loop-invariant case.
This fixes PR30806.
Differential Revision: https://reviews.llvm.org/D57428
Added:
llvm/trunk/test/Transforms/LoopVectorize/pr30806-phi-scev.ll
llvm/trunk/test/Transforms/LoopVectorize/pr30806.ll
Modified:
llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp
Modified: llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp?rev=356392&r1=356391&r2=356392&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp (original)
+++ llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp Mon Mar 18 11:52:35 2019
@@ -1731,49 +1731,55 @@ Value *SCEVExpander::expand(const SCEV *
// Compute an insertion point for this SCEV object. Hoist the instructions
// as far out in the loop nest as possible.
Instruction *InsertPt = &*Builder.GetInsertPoint();
- for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());;
- L = L->getParentLoop())
- if (SE.isLoopInvariant(S, L)) {
- if (!L) break;
- if (BasicBlock *Preheader = L->getLoopPreheader())
- InsertPt = Preheader->getTerminator();
- else {
- // LSR sets the insertion point for AddRec start/step values to the
- // block start to simplify value reuse, even though it's an invalid
- // position. SCEVExpander must correct for this in all cases.
- InsertPt = &*L->getHeader()->getFirstInsertionPt();
- }
- } else {
- // We can move insertion point only if there is no div or rem operations
- // otherwise we are risky to move it over the check for zero denominator.
- auto SafeToHoist = [](const SCEV *S) {
- return !SCEVExprContains(S, [](const SCEV *S) {
- if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) {
- if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS()))
- // Division by non-zero constants can be hoisted.
- return SC->getValue()->isZero();
- // All other divisions should not be moved as they may be
- // divisions by zero and should be kept within the
- // conditions of the surrounding loops that guard their
- // execution (see PR35406).
- return true;
- }
- return false;
- });
- };
- // If the SCEV is computable at this level, insert it into the header
- // after the PHIs (and after any other instructions that we've inserted
- // there) so that it is guaranteed to dominate any user inside the loop.
- if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L) &&
- SafeToHoist(S))
- InsertPt = &*L->getHeader()->getFirstInsertionPt();
- while (InsertPt->getIterator() != Builder.GetInsertPoint() &&
- (isInsertedInstruction(InsertPt) ||
- isa<DbgInfoIntrinsic>(InsertPt))) {
- InsertPt = &*std::next(InsertPt->getIterator());
+
+ // We can move insertion point only if there is no div or rem operations
+ // otherwise we are risky to move it over the check for zero denominator.
+ auto SafeToHoist = [](const SCEV *S) {
+ return !SCEVExprContains(S, [](const SCEV *S) {
+ if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) {
+ if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS()))
+ // Division by non-zero constants can be hoisted.
+ return SC->getValue()->isZero();
+ // All other divisions should not be moved as they may be
+ // divisions by zero and should be kept within the
+ // conditions of the surrounding loops that guard their
+ // execution (see PR35406).
+ return true;
+ }
+ return false;
+ });
+ };
+ if (SafeToHoist(S)) {
+ for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());;
+ L = L->getParentLoop()) {
+ if (SE.isLoopInvariant(S, L)) {
+ if (!L) break;
+ if (BasicBlock *Preheader = L->getLoopPreheader())
+ InsertPt = Preheader->getTerminator();
+ else
+ // LSR sets the insertion point for AddRec start/step values to the
+ // block start to simplify value reuse, even though it's an invalid
+ // position. SCEVExpander must correct for this in all cases.
+ InsertPt = &*L->getHeader()->getFirstInsertionPt();
+ } else {
+ // If the SCEV is computable at this level, insert it into the header
+ // after the PHIs (and after any other instructions that we've inserted
+ // there) so that it is guaranteed to dominate any user inside the loop.
+ if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
+ InsertPt = &*L->getHeader()->getFirstInsertionPt();
+ while (InsertPt->getIterator() != Builder.GetInsertPoint() &&
+ (isInsertedInstruction(InsertPt) ||
+ isa<DbgInfoIntrinsic>(InsertPt)))
+ InsertPt = &*std::next(InsertPt->getIterator());
+ break;
}
- break;
}
+ }
+
+ // IndVarSimplify sometimes sets the insertion point at the block start, even
+ // when there are PHIs at that point. We must correct for this.
+ if (isa<PHINode>(*InsertPt))
+ InsertPt = &*InsertPt->getParent()->getFirstInsertionPt();
// Check to see if we already expanded this here.
auto I = InsertedExpressions.find(std::make_pair(S, InsertPt));
Added: llvm/trunk/test/Transforms/LoopVectorize/pr30806-phi-scev.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/pr30806-phi-scev.ll?rev=356392&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/pr30806-phi-scev.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/pr30806-phi-scev.ll Mon Mar 18 11:52:35 2019
@@ -0,0 +1,66 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+; Produced from the test-case:
+;
+; extern void foo(char *, unsigned , unsigned *);
+; extern void bar(int *, long);
+; extern char *processBuf(char *);
+;
+; extern unsigned theSize;
+;
+; void foo(char *buf, unsigned denominator, unsigned *flag) {
+; int incr = (int) (theSize / denominator);
+; int inx = 0;
+; while (*flag) {
+; int itmp = inx + incr;
+; int i = (int) theSize;
+; bar(&i, (long) itmp);
+; buf = processBuf(buf);
+; inx = itmp;
+; }
+; }
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+ at theSize = external local_unnamed_addr global i32, align 4
+
+define void @foo(i8* %buf, i32 %denominator, i32* %flag) local_unnamed_addr {
+entry:
+ %i = alloca i32, align 4
+ %0 = load i32, i32* @theSize, align 4
+ %div = udiv i32 %0, %denominator
+ %1 = load i32, i32* %flag, align 4
+ %tobool5 = icmp eq i32 %1, 0
+ br i1 %tobool5, label %while.end, label %while.body.lr.ph
+
+while.body.lr.ph: ; preds = %entry
+ %2 = bitcast i32* %i to i8*
+ br label %while.body
+
+while.body: ; preds = %while.body.lr.ph, %while.body
+; Check that there are two PHIs followed by a 'sext' in the same block, and that
+; the test does not crash.
+; CHECK: phi
+; CHECK-NEXT: phi
+; CHECK-NEXT: sext
+ %buf.addr.07 = phi i8* [ %buf, %while.body.lr.ph ], [ %call, %while.body ]
+ %inx.06 = phi i32 [ 0, %while.body.lr.ph ], [ %add, %while.body ]
+ %add = add nsw i32 %inx.06, %div
+ %3 = load i32, i32* @theSize, align 4
+ store i32 %3, i32* %i, align 4
+ %conv = sext i32 %add to i64
+ call void @bar(i32* nonnull %i, i64 %conv)
+ %call = call i8* @processBuf(i8* %buf.addr.07)
+ %4 = load i32, i32* %flag, align 4
+ %tobool = icmp eq i32 %4, 0
+ br i1 %tobool, label %while.end.loopexit, label %while.body
+
+while.end.loopexit: ; preds = %while.body
+ br label %while.end
+
+while.end: ; preds = %while.end.loopexit, %entry
+ ret void
+}
+
+declare void @bar(i32*, i64) local_unnamed_addr
+declare i8* @processBuf(i8*) local_unnamed_addr
Added: llvm/trunk/test/Transforms/LoopVectorize/pr30806.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/pr30806.ll?rev=356392&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/pr30806.ll (added)
+++ llvm/trunk/test/Transforms/LoopVectorize/pr30806.ll Mon Mar 18 11:52:35 2019
@@ -0,0 +1,65 @@
+; RUN: opt -loop-vectorize -S < %s 2>&1 | FileCheck %s
+
+; Produced from test-case:
+;
+; void testGuardedInnerLoop(uint32_t *ptr, uint32_t denom, uint32_t numer, uint32_t outer_lim)
+; {
+; for(uint32_t outer_i = 0; outer_i < outer_lim; ++outer_i) {
+; if (denom > 0) {
+; const uint32_t lim = numer / denom;
+;
+; for (uint32_t i = 0; i < lim; ++i)
+; ptr[i] = 1;
+; }
+; }
+; }
+
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @testGuardedInnerLoop(i32* %ptr, i32 %denom, i32 %numer, i32 %outer_lim) {
+entry:
+ %cmp1 = icmp eq i32 %outer_lim, 0
+ br i1 %cmp1, label %exit, label %loop1.preheader
+
+; Verify that a 'udiv' does not appear between the 'loop1.preheader' label, and
+; whatever label comes next.
+loop1.preheader:
+; CHECK-LABEL: loop1.preheader:
+; CHECK-NOT: udiv
+; CHECK-LABEL: :
+ br label %loop1
+
+loop1:
+ %outer_i = phi i32 [ %inc1, %loop2.exit ], [ 0, %loop1.preheader ]
+ %0 = add i32 %denom, -1
+ %1 = icmp ult i32 %0, %numer
+ br i1 %1, label %loop2.preheader, label %loop2.exit
+
+; Verify that a 'udiv' does appear between the 'loop2.preheader' label, and
+; whatever label comes next.
+loop2.preheader:
+; CHECK-LABEL: loop2.preheader:
+; CHECK: udiv
+; CHECK-LABEL: :
+ %lim = udiv i32 %numer, %denom
+ %2 = zext i32 %lim to i64
+ br label %loop2
+
+loop2:
+ %indvar.loop2 = phi i64 [ 0, %loop2.preheader ], [ %indvar.loop2.next, %loop2 ]
+ %arrayidx = getelementptr inbounds i32, i32* %ptr, i64 %indvar.loop2
+ store i32 1, i32* %arrayidx, align 4
+ %indvar.loop2.next = add nuw nsw i64 %indvar.loop2, 1
+ %cmp2 = icmp ult i64 %indvar.loop2.next, %2
+ br i1 %cmp2, label %loop2, label %loop2.exit
+
+loop2.exit:
+ %inc1 = add nuw i32 %outer_i, 1
+ %exitcond = icmp eq i32 %inc1, %outer_lim
+ br i1 %exitcond, label %exit, label %loop1
+
+exit:
+ ret void
+}
More information about the llvm-commits
mailing list