[llvm] r287186 - Use profile info to adjust loop unroll threshold.
Dehao Chen via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 16 17:17:03 PST 2016
Author: dehao
Date: Wed Nov 16 19:17:02 2016
New Revision: 287186
URL: http://llvm.org/viewvc/llvm-project?rev=287186&view=rev
Log:
Use profile info to adjust loop unroll threshold.
Summary:
For flat loop, even if it is hot, it is not a good idea to unroll in runtime, thus we set a lower partial unroll threshold.
For hot loop, we set a higher unroll threshold and allows expensive tripcount computation to allow more aggressive unrolling.
Reviewers: davidxl, mzolotukhin
Subscribers: sanjoy, mehdi_amini, llvm-commits
Differential Revision: https://reviews.llvm.org/D26527
Added:
llvm/trunk/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll
Modified:
llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h
llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp
llvm/trunk/lib/Transforms/Utils/LoopUtils.cpp
Modified: llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h?rev=287186&r1=287185&r2=287186&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h (original)
+++ llvm/trunk/include/llvm/Transforms/Utils/LoopUtils.h Wed Nov 16 19:17:02 2016
@@ -461,6 +461,11 @@ Optional<const MDOperand *> findStringMe
void addStringMetadataToLoop(Loop *TheLoop, const char *MDString,
unsigned V = 0);
+/// \brief Get a loop's estimated trip count based on branch weight metadata.
+/// Returns 0 when the count is estimated to be 0, or None when a meaningful
+/// estimate can not be made.
+Optional<unsigned> getLoopEstimatedTripCount(Loop *L);
+
/// Helper to consistently add the set of standard passes to a loop pass's \c
/// AnalysisUsage.
///
Modified: llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp?rev=287186&r1=287185&r2=287186&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp Wed Nov 16 19:17:02 2016
@@ -102,6 +102,12 @@ static cl::opt<unsigned> PragmaUnrollThr
cl::desc("Unrolled size limit for loops with an unroll(full) or "
"unroll_count pragma."));
+static cl::opt<unsigned> FlatLoopTripCountThreshold(
+ "flat-loop-tripcount-threshold", cl::init(5), cl::Hidden,
+ cl::desc("If the runtime tripcount for the loop is lower than the "
+ "threshold, the loop is considered as flat and will be less "
+ "aggressively unrolled."));
+
/// A magic value for use with the Threshold parameter to indicate
/// that the loop unroll should be performed regardless of how much
/// code expansion would result.
@@ -748,6 +754,16 @@ static bool computeUnrollCount(
bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
PragmaEnableUnroll || UserUnrollCount;
+ // Check if the runtime trip count is too small when profile is available.
+ if (L->getHeader()->getParent()->getEntryCount() && TripCount == 0) {
+ if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
+ if (*ProfileTripCount < FlatLoopTripCountThreshold)
+ return false;
+ else
+ UP.AllowExpensiveTripCount = true;
+ }
+ }
+
if (ExplicitUnroll && TripCount != 0) {
// If the loop has an unrolling pragma, we want to be more aggressive with
// unrolling limits. Set thresholds to at least the PragmaThreshold value
Modified: llvm/trunk/lib/Transforms/Utils/LoopUtils.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/LoopUtils.cpp?rev=287186&r1=287185&r2=287186&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/LoopUtils.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/LoopUtils.cpp Wed Nov 16 19:17:02 2016
@@ -1067,3 +1067,39 @@ bool llvm::isGuaranteedToExecute(const I
// just a special case of this.)
return true;
}
+
+Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) {
+ // Only support loops with a unique exiting block, and a latch.
+ if (!L->getExitingBlock())
+ return None;
+
+ // Get the branch weights for the the loop's backedge.
+ BranchInst *LatchBR =
+ dyn_cast<BranchInst>(L->getLoopLatch()->getTerminator());
+ if (!LatchBR || LatchBR->getNumSuccessors() != 2)
+ return None;
+
+ assert((LatchBR->getSuccessor(0) == L->getHeader() ||
+ LatchBR->getSuccessor(1) == L->getHeader()) &&
+ "At least one edge out of the latch must go to the header");
+
+ // To estimate the number of times the loop body was executed, we want to
+ // know the number of times the backedge was taken, vs. the number of times
+ // we exited the loop.
+ // The branch weights give us almost what we want, since they were adjusted
+ // from the raw counts to provide a better probability estimate. Remove
+ // the adjustment by subtracting 1 from both weights.
+ uint64_t TrueVal, FalseVal;
+ if (!LatchBR->extractProfMetadata(TrueVal, FalseVal) || (TrueVal <= 1) ||
+ (FalseVal <= 1))
+ return None;
+
+ TrueVal -= 1;
+ FalseVal -= 1;
+
+ // Divide the count of the backedge by the count of the edge exiting the loop.
+ if (LatchBR->getSuccessor(0) == L->getHeader())
+ return TrueVal / FalseVal;
+ else
+ return FalseVal / TrueVal;
+}
Added: llvm/trunk/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll?rev=287186&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll Wed Nov 16 19:17:02 2016
@@ -0,0 +1,59 @@
+; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-threshold=40 -unroll-dynamic-cost-savings-discount=0 | FileCheck %s
+
+ at known_constant = internal unnamed_addr constant [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16
+
+; CHECK-LABEL: @bar_prof
+; CHECK: loop.prol:
+; CHECK: loop:
+; CHECK: %mul = mul
+; CHECK: %mul.1 = mul
+; CHECK: %mul.2 = mul
+; CHECK: %mul.3 = mul
+define i32 @bar_prof(i32* noalias nocapture readonly %src, i64 %c) !prof !1 {
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
+ %r = phi i32 [ 0, %entry ], [ %add, %loop ]
+ %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
+ %src_element = load i32, i32* %arrayidx, align 4
+ %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @known_constant, i64 0, i64 %iv
+ %const_array_element = load i32, i32* %array_const_idx, align 4
+ %mul = mul nsw i32 %src_element, %const_array_element
+ %add = add nsw i32 %mul, %r
+ %inc = add nuw nsw i64 %iv, 1
+ %exitcond86.i = icmp eq i64 %inc, %c
+ br i1 %exitcond86.i, label %loop.end, label %loop, !prof !2
+
+loop.end:
+ %r.lcssa = phi i32 [ %r, %loop ]
+ ret i32 %r.lcssa
+}
+
+; CHECK-LABEL: @bar_prof_flat
+; CHECK-NOT: loop.prol
+define i32 @bar_prof_flat(i32* noalias nocapture readonly %src, i64 %c) !prof !1 {
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
+ %r = phi i32 [ 0, %entry ], [ %add, %loop ]
+ %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
+ %src_element = load i32, i32* %arrayidx, align 4
+ %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @known_constant, i64 0, i64 %iv
+ %const_array_element = load i32, i32* %array_const_idx, align 4
+ %mul = mul nsw i32 %src_element, %const_array_element
+ %add = add nsw i32 %mul, %r
+ %inc = add nuw nsw i64 %iv, 1
+ %exitcond86.i = icmp eq i64 %inc, %c
+ br i1 %exitcond86.i, label %loop, label %loop.end, !prof !2
+
+loop.end:
+ %r.lcssa = phi i32 [ %r, %loop ]
+ ret i32 %r.lcssa
+}
+
+!1 = !{!"function_entry_count", i64 1}
+!2 = !{!"branch_weights", i32 1, i32 1000}
More information about the llvm-commits
mailing list