[llvm] r296770 - The patch turns on epilogue unroll for loops with constant recurency start.

Evgeny Stupachenko via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 2 09:38:46 PST 2017


Author: evstupac
Date: Thu Mar  2 11:38:46 2017
New Revision: 296770

URL: http://llvm.org/viewvc/llvm-project?rev=296770&view=rev
Log:
The patch turns on epilogue unroll for loops with constant recurency start.
Summary:

Set unroll remainder to epilog if a loop contains a phi with constant parameter:

  loop:
  pn = phi [Const, PreHeader], [pn.next, Latch]
  ...

Reviewer: hfinkel

Differential Revision: http://reviews.llvm.org/D27004

From: Evgeny Stupachenko <evstupac at gmail.com>

Modified:
    llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp
    llvm/trunk/test/Transforms/LoopUnroll/revisit.ll
    llvm/trunk/test/Transforms/LoopUnroll/runtime-loop5.ll
    llvm/trunk/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll
    llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll

Modified: llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp?rev=296770&r1=296769&r2=296770&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp Thu Mar  2 11:38:46 2017
@@ -216,6 +216,45 @@ const Loop* llvm::addClonedBlockToLoopIn
   }
 }
 
+/// The function chooses which type of unroll (epilog or prolog) is more
+/// profitabale.
+/// Epilog unroll is more profitable when there is PHI that starts from
+/// constant.  In this case epilog will leave PHI start from constant,
+/// but prolog will convert it to non-constant.
+///
+/// loop:
+///   PN = PHI [I, Latch], [CI, PreHeader]
+///   I = foo(PN)
+///   ...
+///
+/// Epilog unroll case.
+/// loop:
+///   PN = PHI [I2, Latch], [CI, PreHeader]
+///   I1 = foo(PN)
+///   I2 = foo(I1)
+///   ...
+/// Prolog unroll case.
+///   NewPN = PHI [PrologI, Prolog], [CI, PreHeader]
+/// loop:
+///   PN = PHI [I2, Latch], [NewPN, PreHeader]
+///   I1 = foo(PN)
+///   I2 = foo(I1)
+///   ...
+///
+static bool isEpilogProfitable(Loop *L) {
+  BasicBlock *PreHeader = L->getLoopPreheader();
+  BasicBlock *Header = L->getHeader();
+  assert(PreHeader && Header);
+  for (Instruction &BBI : *Header) {
+    PHINode *PN = dyn_cast<PHINode>(&BBI);
+    if (!PN)
+      break;
+    if (isa<ConstantInt>(PN->getIncomingValueForBlock(PreHeader)))
+      return true;
+  }
+  return false;
+}
+
 /// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
 /// if unrolling was successful, or false if the loop was unmodified. Unrolling
 /// can only fail when the loop's latch block is not terminated by a conditional
@@ -359,9 +398,13 @@ bool llvm::UnrollLoop(Loop *L, unsigned
                "convergent operation.");
       });
 
+  bool EpilogProfitability =
+      UnrollRuntimeEpilog.getNumOccurrences() ? UnrollRuntimeEpilog
+                                              : isEpilogProfitable(L);
+
   if (RuntimeTripCount && TripMultiple % Count != 0 &&
       !UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount,
-                                  UnrollRuntimeEpilog, LI, SE, DT, 
+                                  EpilogProfitability, LI, SE, DT,
                                   PreserveLCSSA)) {
     if (Force)
       RuntimeTripCount = false;

Modified: llvm/trunk/test/Transforms/LoopUnroll/revisit.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/revisit.ll?rev=296770&r1=296769&r2=296770&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/revisit.ll (original)
+++ llvm/trunk/test/Transforms/LoopUnroll/revisit.ll Thu Mar  2 11:38:46 2017
@@ -138,11 +138,11 @@ l0.0.latch:
 ; CHECK-CHILDREN: LoopUnrollPass on Loop at depth 2 containing: %l0.0<header>
 ; CHECK-CHILDREN-NOT: LoopUnrollPass
 ;
-; Revisit the children of the outer loop that are part of the prologue.
+; Revisit the children of the outer loop that are part of the epilogue.
 ; 
-; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.0.prol<header>
+; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.0.epil<header>
 ; CHECK-NOT: LoopUnrollPass
-; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.1.prol<header>
+; CHECK: LoopUnrollPass on Loop at depth 2 containing: %l0.0.1.epil<header>
 ; CHECK-NOT: LoopUnrollPass
 l0.latch:
   br label %l0

Modified: llvm/trunk/test/Transforms/LoopUnroll/runtime-loop5.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/runtime-loop5.ll?rev=296770&r1=296769&r2=296770&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/runtime-loop5.ll (original)
+++ llvm/trunk/test/Transforms/LoopUnroll/runtime-loop5.ll Thu Mar  2 11:38:46 2017
@@ -14,9 +14,6 @@ entry:
   %cmp1 = icmp eq i3 %n, 0
   br i1 %cmp1, label %for.end, label %for.body
 
-; UNROLL-16-NOT: for.body.prol:
-; UNROLL-4: for.body.prol:
-
 for.body:                                         ; preds = %for.body, %entry
 ; UNROLL-16-LABEL: for.body:
 ; UNROLL-4-LABEL: for.body:
@@ -42,6 +39,10 @@ for.body:
 
 ; UNROLL-16-LABEL: for.end
 ; UNROLL-4-LABEL: for.end
+
+; UNROLL-16-NOT: for.body.epil:
+; UNROLL-4: for.body.epil:
+
 for.end:                                          ; preds = %for.body, %entry
   %sum.0.lcssa = phi i3 [ 0, %entry ], [ %add, %for.body ]
   ret i3 %sum.0.lcssa

Modified: llvm/trunk/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll?rev=296770&r1=296769&r2=296770&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll (original)
+++ llvm/trunk/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll Thu Mar  2 11:38:46 2017
@@ -3,12 +3,12 @@
 @known_constant = internal unnamed_addr constant [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16
 
 ; CHECK-LABEL: @bar_prof
-; CHECK: loop.prol:
 ; CHECK: loop:
 ; CHECK: %mul = mul
 ; CHECK: %mul.1 = mul
 ; CHECK: %mul.2 = mul
 ; CHECK: %mul.3 = mul
+; CHECK: loop.epil:
 define i32 @bar_prof(i32* noalias nocapture readonly %src, i64 %c) !prof !1 {
 entry:
   br label %loop
@@ -32,7 +32,7 @@ loop.end:
 }
 
 ; CHECK-LABEL: @bar_prof_flat
-; CHECK-NOT: loop.prol
+; CHECK-NOT: loop.epil
 define i32 @bar_prof_flat(i32* noalias nocapture readonly %src, i64 %c) !prof !1 {
 entry:
   br label %loop

Modified: llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll?rev=296770&r1=296769&r2=296770&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll (original)
+++ llvm/trunk/test/Transforms/LoopUnroll/unroll-pragmas.ll Thu Mar  2 11:38:46 2017
@@ -171,10 +171,6 @@ for.end:
 ; should be duplicated (original and 4x unrolled).
 ;
 ; CHECK-LABEL: @runtime_loop_with_count4(
-; CHECK: for.body.prol:
-; CHECK: store
-; CHECK-NOT: store
-; CHECK: br i1
 ; CHECK: for.body
 ; CHECK: store
 ; CHECK: store
@@ -182,6 +178,10 @@ for.end:
 ; CHECK: store
 ; CHECK-NOT: store
 ; CHECK: br i1
+; CHECK: for.body.epil:
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: br i1
 define void @runtime_loop_with_count4(i32* nocapture %a, i32 %b) {
 entry:
   %cmp3 = icmp sgt i32 %b, 0
@@ -287,10 +287,6 @@ for.end:
 ; (original and 8x).
 ;
 ; CHECK-LABEL: @runtime_loop_with_enable(
-; CHECK: for.body.prol:
-; CHECK: store
-; CHECK-NOT: store
-; CHECK: br i1
 ; CHECK: for.body:
 ; CHECK: store i32
 ; CHECK: store i32
@@ -302,6 +298,10 @@ for.end:
 ; CHECK: store i32
 ; CHECK-NOT: store i32
 ; CHECK: br i1
+; CHECK: for.body.epil:
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: br i1
 define void @runtime_loop_with_enable(i32* nocapture %a, i32 %b) {
 entry:
   %cmp3 = icmp sgt i32 %b, 0
@@ -328,16 +328,16 @@ for.end:
 ; should be duplicated (original and 3x unrolled).
 ;
 ; CHECK-LABEL: @runtime_loop_with_count3(
-; CHECK: for.body.prol:
-; CHECK: store
-; CHECK-NOT: store
-; CHECK: br i1
 ; CHECK: for.body
 ; CHECK: store
 ; CHECK: store
 ; CHECK: store
 ; CHECK-NOT: store
 ; CHECK: br i1
+; CHECK: for.body.epil:
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: br i1
 define void @runtime_loop_with_count3(i32* nocapture %a, i32 %b) {
 entry:
   %cmp3 = icmp sgt i32 %b, 0




More information about the llvm-commits mailing list