[llvm] r310824 - [LoopUnroll] Enable option to peel remainder loop

Sam Parker via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 14 02:25:26 PDT 2017


Author: sam_parker
Date: Mon Aug 14 02:25:26 2017
New Revision: 310824

URL: http://llvm.org/viewvc/llvm-project?rev=310824&view=rev
Log:
[LoopUnroll] Enable option to peel remainder loop

On some targets, the penalty of executing runtime unrolling checks
and then not the unrolled loop can be significantly detrimental to
performance. This results in the need to be more conservative with
the unroll count, keeping a trip count of 2 reduces the overhead as
well as increasing the chance of the unrolled body being executed. But
being conservative leaves performance gains on the table.

This patch enables the unrolling of the remainder loop introduced by
runtime unrolling. This can help reduce the overhead of misunrolled
loops because the cost of non-taken branches is much less than the
cost of the backedge that would normally be executed in the remainder
loop. This allows larger unroll factors to be used without suffering
performance loses with smaller iteration counts.

Differential Revision: https://reviews.llvm.org/D36309

Added:
    llvm/trunk/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
Modified:
    llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
    llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h
    llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp
    llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp
    llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp

Modified: llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h?rev=310824&r1=310823&r2=310824&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h (original)
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h Mon Aug 14 02:25:26 2017
@@ -382,6 +382,8 @@ public:
     bool UpperBound;
     /// Allow peeling off loop iterations for loops with low dynamic tripcount.
     bool AllowPeeling;
+    /// Allow unrolling of all the iterations of the runtime loop remainder.
+    bool UnrollRemainder;
   };
 
   /// \brief Get target-customized preferences for the generic loop unrolling

Modified: llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h?rev=310824&r1=310823&r2=310824&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h (original)
+++ llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h Mon Aug 14 02:25:26 2017
@@ -42,14 +42,18 @@ const Loop* addClonedBlockToLoopInfo(Bas
 bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
                 bool AllowRuntime, bool AllowExpensiveTripCount,
                 bool PreserveCondBr, bool PreserveOnlyFirst,
-                unsigned TripMultiple, unsigned PeelCount, LoopInfo *LI,
-                ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
-                OptimizationRemarkEmitter *ORE, bool PreserveLCSSA);
+                unsigned TripMultiple, unsigned PeelCount, bool UnrollRemainder,
+                LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
+                AssumptionCache *AC, OptimizationRemarkEmitter *ORE,
+                bool PreserveLCSSA);
 
 bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
                                 bool AllowExpensiveTripCount,
-                                bool UseEpilogRemainder, LoopInfo *LI,
+                                bool UseEpilogRemainder, bool UnrollRemainder,
+                                LoopInfo *LI,
                                 ScalarEvolution *SE, DominatorTree *DT,
+                                AssumptionCache *AC,
+                                OptimizationRemarkEmitter *ORE,
                                 bool PreserveLCSSA);
 
 void computePeelCount(Loop *L, unsigned LoopSize,

Modified: llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp?rev=310824&r1=310823&r2=310824&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp Mon Aug 14 02:25:26 2017
@@ -115,6 +115,10 @@ static cl::opt<bool>
                        cl::desc("Allows loops to be peeled when the dynamic "
                                 "trip count is known to be low."));
 
+static cl::opt<bool> UnrollUnrollRemainder(
+  "unroll-remainder", cl::Hidden,
+  cl::desc("Allow the loop remainder to be unrolled."));
+
 // This option isn't ever intended to be enabled, it serves to allow
 // experiments to check the assumptions about when this kind of revisit is
 // necessary.
@@ -153,6 +157,7 @@ static TargetTransformInfo::UnrollingPre
   UP.Partial = false;
   UP.Runtime = false;
   UP.AllowRemainder = true;
+  UP.UnrollRemainder = false;
   UP.AllowExpensiveTripCount = false;
   UP.Force = false;
   UP.UpperBound = false;
@@ -188,6 +193,8 @@ static TargetTransformInfo::UnrollingPre
     UP.UpperBound = false;
   if (UnrollAllowPeeling.getNumOccurrences() > 0)
     UP.AllowPeeling = UnrollAllowPeeling;
+  if (UnrollUnrollRemainder.getNumOccurrences() > 0)
+    UP.UnrollRemainder = UnrollUnrollRemainder;
 
   // Apply user values provided by argument
   if (UserThreshold.hasValue()) {
@@ -1034,7 +1041,8 @@ static bool tryToUnrollLoop(
   // Unroll the loop.
   if (!UnrollLoop(L, UP.Count, TripCount, UP.Force, UP.Runtime,
                   UP.AllowExpensiveTripCount, UseUpperBound, MaxOrZero,
-                  TripMultiple, UP.PeelCount, LI, &SE, &DT, &AC, &ORE,
+                  TripMultiple, UP.PeelCount, UP.UnrollRemainder,
+                  LI, &SE, &DT, &AC, &ORE,
                   PreserveLCSSA))
     return false;
 

Modified: llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp?rev=310824&r1=310823&r2=310824&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp Mon Aug 14 02:25:26 2017
@@ -295,7 +295,8 @@ static bool isEpilogProfitable(Loop *L)
 bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
                       bool AllowRuntime, bool AllowExpensiveTripCount,
                       bool PreserveCondBr, bool PreserveOnlyFirst,
-                      unsigned TripMultiple, unsigned PeelCount, LoopInfo *LI,
+                      unsigned TripMultiple, unsigned PeelCount,
+                      bool UnrollRemainder, LoopInfo *LI,
                       ScalarEvolution *SE, DominatorTree *DT,
                       AssumptionCache *AC, OptimizationRemarkEmitter *ORE,
                       bool PreserveLCSSA) {
@@ -418,7 +419,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned
 
   if (RuntimeTripCount && TripMultiple % Count != 0 &&
       !UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount,
-                                  EpilogProfitability, LI, SE, DT,
+                                  EpilogProfitability, UnrollRemainder,
+                                  LI, SE, DT, AC, ORE,
                                   PreserveLCSSA)) {
     if (Force)
       RuntimeTripCount = false;

Modified: llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp?rev=310824&r1=310823&r2=310824&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp Mon Aug 14 02:25:26 2017
@@ -294,7 +294,8 @@ static void ConnectEpilog(Loop *L, Value
 /// Return the new cloned loop that is created when CreateRemainderLoop is true.
 static Loop *
 CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
-                const bool UseEpilogRemainder, BasicBlock *InsertTop,
+                const bool UseEpilogRemainder, const bool UnrollRemainder,
+                BasicBlock *InsertTop,
                 BasicBlock *InsertBot, BasicBlock *Preheader,
                 std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
                 ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) {
@@ -413,10 +414,13 @@ CloneLoopBlocks(Loop *L, Value *NewIter,
     }
 
     LLVMContext &Context = NewLoop->getHeader()->getContext();
-    SmallVector<Metadata *, 1> DisableOperands;
-    DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable"));
-    MDNode *DisableNode = MDNode::get(Context, DisableOperands);
-    MDs.push_back(DisableNode);
+    if (!UnrollRemainder) {
+      SmallVector<Metadata *, 1> DisableOperands;
+      DisableOperands.push_back(MDString::get(Context,
+                                              "llvm.loop.unroll.disable"));
+      MDNode *DisableNode = MDNode::get(Context, DisableOperands);
+      MDs.push_back(DisableNode);
+    }
 
     MDNode *NewLoopID = MDNode::get(Context, MDs);
     // Set operand 0 to refer to the loop id itself.
@@ -525,8 +529,11 @@ static bool canProfitablyUnrollMultiExit
 bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
                                       bool AllowExpensiveTripCount,
                                       bool UseEpilogRemainder,
+                                      bool UnrollRemainder,
                                       LoopInfo *LI, ScalarEvolution *SE,
-                                      DominatorTree *DT, bool PreserveLCSSA) {
+                                      DominatorTree *DT, AssumptionCache *AC,
+                                      OptimizationRemarkEmitter *ORE,
+                                      bool PreserveLCSSA) {
   DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
   DEBUG(L->dump());
 
@@ -739,7 +746,8 @@ bool llvm::UnrollRuntimeLoopRemainder(Lo
   BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
   BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
   Loop *remainderLoop = CloneLoopBlocks(
-      L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop, InsertBot,
+      L, ModVal, CreateRemainderLoop, UseEpilogRemainder, UnrollRemainder,
+      InsertTop, InsertBot,
       NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
 
   // Insert the cloned blocks into the function.
@@ -883,6 +891,15 @@ bool llvm::UnrollRuntimeLoopRemainder(Lo
       formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA);
   }
 
+  if (remainderLoop && UnrollRemainder) {
+    UnrollLoop(remainderLoop, /*Count*/Count - 1, /*TripCount*/Count - 1,
+               /*Force*/false, /*AllowRuntime*/false,
+               /*AllowExpensiveTripCount*/false, /*PreserveCondBr*/true,
+               /*PreserveOnlyFirst*/false, /*TripMultiple*/1,
+               /*PeelCount*/0, /*UnrollRemainder*/false, LI, SE, DT, AC, ORE,
+               PreserveLCSSA);
+  }
+
   NumRuntimeUnrolled++;
   return true;
 }

Added: llvm/trunk/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll?rev=310824&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/runtime-unroll-remainder.ll Mon Aug 14 02:25:26 2017
@@ -0,0 +1,74 @@
+; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-count=4 -unroll-remainder -instcombine | FileCheck %s
+
+; CHECK-LABEL: unroll
+define i32 @unroll(i32* nocapture readonly %a, i32* nocapture readonly %b, i32 %N) local_unnamed_addr #0 {
+entry:
+  %cmp9 = icmp eq i32 %N, 0
+  br i1 %cmp9, label %for.cond.cleanup, label %for.body.lr.ph
+
+for.body.lr.ph:
+  %wide.trip.count = zext i32 %N to i64
+  br label %for.body
+
+for.cond.cleanup:
+  %c.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %c.0.lcssa
+
+; CHECK-LABEL: for.body.lr.ph
+; CHECK: [[COUNT:%[a-z.0-9]+]] = add nsw i64 %wide.trip.count, -1
+; CHECK: %xtraiter = and i64 %wide.trip.count, 3
+; CHECK: [[CMP:%[a-z.0-9]+]] = icmp ult i64 [[COUNT]], 3
+; CHECK: br i1 [[CMP]], label %[[CLEANUP:.*]], label %for.body.lr.ph.new
+
+; CHECK-LABEL: for.body.lr.ph.new:
+; CHECK: %unroll_iter = sub nsw i64 %wide.trip.count, %xtraiter
+; CHECK: br label %for.body
+
+; CHECK: [[CLEANUP]]:
+; CHECK: [[MOD:%[a-z.0-9]+]] = icmp eq i64 %xtraiter, 0
+; CHECK: br i1 [[MOD]], label %[[EXIT:.*]], label %[[EPIL_PEEL0_PRE:.*]]
+
+; CHECK: [[EPIL_PEEL0_PRE]]:
+; CHECK: br label %[[EPIL_PEEL0:.*]]
+
+; CHECK: [[EPIL_PEEL0]]:
+; CHECK: [[PEEL_CMP0:%[a-z.0-9]+]] = icmp eq i64 %xtraiter, 1
+; CHECK: br i1 [[PEEL_CMP0]], label %[[EPIL_EXIT:.*]], label %[[EPIL_PEEL1:.*]],
+
+; CHECK: [[EPIL_EXIT]]:
+; CHECK: br label %[[EXIT]]
+
+; CHECK: [[EXIT]]:
+; CHECK: ret i32
+
+; CHECK-LABEL: for.body:
+; CHECK: [[INDVAR0:%[a-z.0-9]+]] = phi i64 [ 0, %for.body.lr.ph
+; CHECK: [[ITER:%[a-z.0-9]+]] = phi i64 [ %unroll_iter
+; CHECK: or i64 [[INDVAR0]], 1
+; CHECK: or i64 [[INDVAR0]], 2
+; CHECK: or i64 [[INDVAR0]], 3
+; CHECK: add nsw i64 [[INDVAR0]], 4
+; CHECK: [[SUB:%[a-z.0-9]+]] = add i64 [[ITER]], -4
+; CHECK: [[ITER_CMP:%[a-z.0-9]+]] = icmp eq i64 [[SUB]], 0
+; CHECK: br i1 [[ITER_CMP]], label %[[LOOP_EXIT:.*]], label %for.body
+
+; CHECK: [[EPIL_PEEL1]]:
+; CHECK: [[PEEL_CMP1:%[a-z.0-9]+]] = icmp eq i64 %xtraiter, 2
+; CHECK: br i1 [[PEEL_CMP1]], label %[[EPIL_EXIT]], label %[[EPIL_PEEL2:.*]],
+
+; CHECK: [[EPIL_PEEL2]]:
+; CHECK: br label %[[EXIT]]
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %c.010 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+  %1 = load i32, i32* %arrayidx2, align 4
+  %mul = mul nsw i32 %1, %0
+  %add = add nsw i32 %mul, %c.010
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}




More information about the llvm-commits mailing list