[llvm] r300446 - [LoopPeeling] Get rid of Phis that become invariant after N steps
Max Kazantsev via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 17 02:52:02 PDT 2017
Author: mkazantsev
Date: Mon Apr 17 04:52:02 2017
New Revision: 300446
URL: http://llvm.org/viewvc/llvm-project?rev=300446&view=rev
Log:
[LoopPeeling] Get rid of Phis that become invariant after N steps
This patch is a generalization of the improvement introduced in rL296898.
Previously, we were able to peel one iteration of a loop to get rid of a Phi that becomes
an invariant on the 2nd iteration. In more general case, if a Phi becomes invariant after
N iterations, we can peel N times and turn it into invariant.
In order to do this, we for every Phi in loop's header we define the Invariant Depth value
which is calculated as follows:
Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge].
If %y is a loop invariant, then Depth(%x) = 1.
If %y is a Phi from the loop header, Depth(%x) = Depth(%y) + 1.
Otherwise, Depth(%x) is infinite.
Notice that if we peel a loop, all Phis with Depth = 1 become invariants,
and all other Phis with finite depth decrease the depth by 1.
Thus, peeling N first iterations allows us to turn all Phis with Depth <= N
into invariants.
Reviewers: reames, apilipenko, mkuper, skatkov, anna, sanjoy
Reviewed By: sanjoy
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D31613
Modified:
llvm/trunk/lib/Transforms/Utils/LoopUnrollPeel.cpp
llvm/trunk/test/Transforms/LoopUnroll/peel-loop-not-forced.ll
Modified: llvm/trunk/lib/Transforms/Utils/LoopUnrollPeel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/LoopUnrollPeel.cpp?rev=300446&r1=300445&r2=300446&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/LoopUnrollPeel.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/LoopUnrollPeel.cpp Mon Apr 17 04:52:02 2017
@@ -46,6 +46,11 @@ static cl::opt<unsigned> UnrollForcePeel
"unroll-force-peel-count", cl::init(0), cl::Hidden,
cl::desc("Force a peel count regardless of profiling information."));
+// Designates that a Phi is estimated to become invariant after an "infinite"
+// number of loop iterations (i.e. only may become an invariant if the loop is
+// fully unrolled).
+static const unsigned InfiniteIterationsToInvariance = UINT_MAX;
+
// Check whether we are capable of peeling this loop.
static bool canPeel(Loop *L) {
// Make sure the loop is in simplified form
@@ -66,10 +71,62 @@ static bool canPeel(Loop *L) {
return true;
}
+// This function calculates the number of iterations after which the given Phi
+// becomes an invariant. The pre-calculated values are memorized in the map. The
+// function (shortcut is I) is calculated according to the following definition:
+// Given %x = phi <Inputs from above the loop>, ..., [%y, %back.edge].
+// If %y is a loop invariant, then I(%x) = 1.
+// If %y is a Phi from the loop header, I(%x) = I(%y) + 1.
+// Otherwise, I(%x) is infinite.
+// TODO: Actually if %y is an expression that depends only on Phi %z and some
+// loop invariants, we can estimate I(%x) = I(%z) + 1. The example
+// looks like:
+// %x = phi(0, %a), <-- becomes invariant starting from 3rd iteration.
+// %y = phi(0, 5),
+// %a = %y + 1.
+static unsigned calculateIterationsToInvariance(
+ PHINode *Phi, Loop *L, BasicBlock *BackEdge,
+ SmallDenseMap<PHINode *, unsigned> &IterationsToInvariance) {
+ assert(Phi->getParent() == L->getHeader() &&
+ "Non-loop Phi should not be checked for turning into invariant.");
+ assert(BackEdge == L->getLoopLatch() && "Wrong latch?");
+ // If we already know the answer, take it from the map.
+ auto I = IterationsToInvariance.find(Phi);
+ if (I != IterationsToInvariance.end())
+ return I->second;
+
+ // Otherwise we need to analyze the input from the back edge.
+ Value *Input = Phi->getIncomingValueForBlock(BackEdge);
+ // Place infinity to map to avoid infinite recursion for cycled Phis. Such
+ // cycles can never stop on an invariant.
+ IterationsToInvariance[Phi] = InfiniteIterationsToInvariance;
+ unsigned ToInvariance = InfiniteIterationsToInvariance;
+
+ if (L->isLoopInvariant(Input))
+ ToInvariance = 1u;
+ else if (PHINode *IncPhi = dyn_cast<PHINode>(Input)) {
+ // Only consider Phis in header block.
+ if (IncPhi->getParent() != L->getHeader())
+ return InfiniteIterationsToInvariance;
+ // If the input becomes an invariant after X iterations, then our Phi
+ // becomes an invariant after X + 1 iterations.
+ unsigned InputToInvariance = calculateIterationsToInvariance(
+ IncPhi, L, BackEdge, IterationsToInvariance);
+ if (InputToInvariance != InfiniteIterationsToInvariance)
+ ToInvariance = InputToInvariance + 1u;
+ }
+
+ // If we found that this Phi lies in an invariant chain, update the map.
+ if (ToInvariance != InfiniteIterationsToInvariance)
+ IterationsToInvariance[Phi] = ToInvariance;
+ return ToInvariance;
+}
+
// Return the number of iterations we want to peel off.
void llvm::computePeelCount(Loop *L, unsigned LoopSize,
TargetTransformInfo::UnrollingPreferences &UP,
unsigned &TripCount) {
+ assert(LoopSize > 0 && "Zero loop size is not allowed!");
UP.PeelCount = 0;
if (!canPeel(L))
return;
@@ -78,31 +135,37 @@ void llvm::computePeelCount(Loop *L, uns
if (!L->empty())
return;
- // Try to find a Phi node that has the same loop invariant as an input from
- // its only back edge. If there is such Phi, peeling 1 iteration from the
- // loop is profitable, because starting from 2nd iteration we will have an
- // invariant instead of this Phi.
+ // Here we try to get rid of Phis which become invariants after 1, 2, ..., N
+ // iterations of the loop. For this we compute the number for iterations after
+ // which every Phi is guaranteed to become an invariant, and try to peel the
+ // maximum number of iterations among these values, thus turning all those
+ // Phis into invariants.
// First, check that we can peel at least one iteration.
if (2 * LoopSize <= UP.Threshold && UnrollPeelMaxCount > 0) {
+ // Store the pre-calculated values here.
+ SmallDenseMap<PHINode *, unsigned> IterationsToInvariance;
+ // Now go through all Phis to calculate their the number of iterations they
+ // need to become invariants.
+ unsigned DesiredPeelCount = 0;
BasicBlock *BackEdge = L->getLoopLatch();
assert(BackEdge && "Loop is not in simplified form?");
- BasicBlock *Header = L->getHeader();
- // Iterate over Phis to find one with invariant input on back edge.
- bool FoundCandidate = false;
- PHINode *Phi;
- for (auto BI = Header->begin(); isa<PHINode>(&*BI); ++BI) {
- Phi = cast<PHINode>(&*BI);
- Value *Input = Phi->getIncomingValueForBlock(BackEdge);
- if (L->isLoopInvariant(Input)) {
- FoundCandidate = true;
- break;
- }
+ for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); ++BI) {
+ PHINode *Phi = cast<PHINode>(&*BI);
+ unsigned ToInvariance = calculateIterationsToInvariance(
+ Phi, L, BackEdge, IterationsToInvariance);
+ if (ToInvariance != InfiniteIterationsToInvariance)
+ DesiredPeelCount = std::max(DesiredPeelCount, ToInvariance);
}
- if (FoundCandidate) {
- DEBUG(dbgs() << "Peel one iteration to get rid of " << *Phi
- << " because starting from 2nd iteration it is always"
- << " an invariant\n");
- UP.PeelCount = 1;
+ if (DesiredPeelCount > 0) {
+ // Pay respect to limitations implied by loop size and the max peel count.
+ unsigned MaxPeelCount = UnrollPeelMaxCount;
+ MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1);
+ DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
+ // Consider max peel count limitation.
+ assert(DesiredPeelCount > 0 && "Wrong loop size estimation?");
+ DEBUG(dbgs() << "Peel " << DesiredPeelCount << " iteration(s) to turn"
+ << " some Phis into invariants.\n");
+ UP.PeelCount = DesiredPeelCount;
return;
}
}
Modified: llvm/trunk/test/Transforms/LoopUnroll/peel-loop-not-forced.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/peel-loop-not-forced.ll?rev=300446&r1=300445&r2=300446&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/peel-loop-not-forced.ll (original)
+++ llvm/trunk/test/Transforms/LoopUnroll/peel-loop-not-forced.ll Mon Apr 17 04:52:02 2017
@@ -1,4 +1,4 @@
-; RUN: opt < %s -S -loop-unroll -unroll-threshold=8 | FileCheck %s
+; RUN: opt < %s -S -loop-unroll -unroll-threshold=30 | FileCheck %s
define i32 @invariant_backedge_1(i32 %a, i32 %b) {
; CHECK-LABEL: @invariant_backedge_1
@@ -25,10 +25,112 @@ exit:
ret i32 %sum
}
-; Peeling should fail due to method size.
define i32 @invariant_backedge_2(i32 %a, i32 %b) {
+; This loop should be peeled twice because it has a Phi which becomes invariant
+; starting from 3rd iteration.
; CHECK-LABEL: @invariant_backedge_2
-; CHECK-NOT: loop.peel:
+; CHECK: loop.peel{{.*}}:
+; CHECK: loop.peel{{.*}}:
+; CHECK: %i = phi
+; CHECK: %sum = phi
+; CHECK-NOT: %half.inv = phi
+; CHECK-NOT: %plus = phi
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ %inc, %loop ]
+ %sum = phi i32 [ 0, %entry ], [ %incsum, %loop ]
+ %half.inv = phi i32 [ %a, %entry ], [ %b, %loop ]
+ %plus = phi i32 [ %a, %entry ], [ %half.inv, %loop ]
+
+ %incsum = add i32 %sum, %plus
+ %inc = add i32 %i, 1
+ %cmp = icmp slt i32 %i, 1000
+
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret i32 %sum
+}
+
+define i32 @invariant_backedge_3(i32 %a, i32 %b) {
+; This loop should be peeled thrice because it has a Phi which becomes invariant
+; starting from 4th iteration.
+; CHECK-LABEL: @invariant_backedge_3
+; CHECK: loop.peel{{.*}}:
+; CHECK: loop.peel{{.*}}:
+; CHECK: loop.peel{{.*}}:
+; CHECK: %i = phi
+; CHECK: %sum = phi
+; CHECK-NOT: %half.inv = phi
+; CHECK-NOT: %half.inv.2 = phi
+; CHECK-NOT: %plus = phi
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ %inc, %loop ]
+ %sum = phi i32 [ 0, %entry ], [ %incsum, %loop ]
+ %half.inv = phi i32 [ %a, %entry ], [ %b, %loop ]
+ %half.inv.2 = phi i32 [ %a, %entry ], [ %half.inv, %loop ]
+ %plus = phi i32 [ %a, %entry ], [ %half.inv.2, %loop ]
+
+ %incsum = add i32 %sum, %plus
+ %inc = add i32 %i, 1
+ %cmp = icmp slt i32 %i, 1000
+
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret i32 %sum
+}
+
+define i32 @invariant_backedge_limited_by_size(i32 %a, i32 %b) {
+; This loop should normally be peeled thrice because it has a Phi which becomes
+; invariant starting from 4th iteration, but the size of the loop only allows
+; us to peel twice because we are restricted to 30 instructions in resulting
+; code. Thus, %plus Phi node should stay in loop even despite its backedge
+; input is an invariant.
+; CHECK-LABEL: @invariant_backedge_limited_by_size
+; CHECK: loop.peel{{.*}}:
+; CHECK: loop.peel{{.*}}:
+; CHECK: %i = phi
+; CHECK: %sum = phi
+; CHECK: %plus = phi i32 [ %a, {{.*}} ], [ %b, %loop ]
+; CHECK-NOT: %half.inv = phi
+; CHECK-NOT: %half.inv.2 = phi
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ %inc, %loop ]
+ %sum = phi i32 [ 0, %entry ], [ %incsum, %loop ]
+ %half.inv = phi i32 [ %a, %entry ], [ %b, %loop ]
+ %half.inv.2 = phi i32 [ %a, %entry ], [ %half.inv, %loop ]
+ %plus = phi i32 [ %a, %entry ], [ %half.inv.2, %loop ]
+
+ %incsum = add i32 %sum, %plus
+ %inc = add i32 %i, 1
+ %cmp = icmp slt i32 %i, 1000
+
+ %incsum2 = add i32 %incsum, %plus
+ %incsum3 = add i32 %incsum, %plus
+ %incsum4 = add i32 %incsum, %plus
+ %incsum5 = add i32 %incsum, %plus
+ %incsum6 = add i32 %incsum, %plus
+ %incsum7 = add i32 %incsum, %plus
+
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret i32 %sum
+}
+
+; Peeling should fail due to method size.
+define i32 @invariant_backedge_negative(i32 %a, i32 %b) {
+; CHECK-LABEL: @invariant_backedge_negative
+; CHECK-NOT: loop.peel{{.*}}:
; CHECK: loop:
; CHECK: %i = phi
; CHECK: %sum = phi
@@ -43,6 +145,47 @@ loop:
%incsum = add i32 %sum, %plus
%incsum2 = add i32 %incsum, %plus
+ %incsum3 = add i32 %incsum, %plus
+ %incsum4 = add i32 %incsum, %plus
+ %incsum5 = add i32 %incsum, %plus
+ %incsum6 = add i32 %incsum, %plus
+ %incsum7 = add i32 %incsum, %plus
+ %incsum8 = add i32 %incsum, %plus
+ %incsum9 = add i32 %incsum, %plus
+ %incsum10 = add i32 %incsum, %plus
+ %incsum11 = add i32 %incsum, %plus
+ %incsum12 = add i32 %incsum, %plus
+ %incsum13 = add i32 %incsum, %plus
+ %incsum14 = add i32 %incsum, %plus
+ %incsum15 = add i32 %incsum, %plus
+ %inc = add i32 %i, 1
+ %cmp = icmp slt i32 %i, 1000
+
+ br i1 %cmp, label %loop, label %exit
+
+exit:
+ ret i32 %sum
+}
+
+define i32 @cycled_phis(i32 %a, i32 %b) {
+; Make sure that we do not crash working with cycled Phis and don't peel it.
+; TODO: Actually this loop should be partially unrolled with factor 2.
+; CHECK-LABEL: @cycled_phis
+; CHECK-NOT: loop.peel{{.*}}:
+; CHECK: loop:
+; CHECK: %i = phi
+; CHECK: %phi.a = phi
+; CHECK: %phi.b = phi
+; CHECK: %sum = phi
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ %inc, %loop ]
+ %phi.a = phi i32 [ %a, %entry ], [ %phi.b, %loop ]
+ %phi.b = phi i32 [ %b, %entry ], [ %phi.a, %loop ]
+ %sum = phi i32 [ 0, %entry], [ %incsum, %loop ]
+ %incsum = add i32 %sum, %phi.a
%inc = add i32 %i, 1
%cmp = icmp slt i32 %i, 1000
More information about the llvm-commits
mailing list