[llvm] r306484 - [NewPM/Inliner] Reduce threshold for cold callsites in the non-PGO case
Easwaran Raman via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 27 16:11:18 PDT 2017
Author: eraman
Date: Tue Jun 27 16:11:18 2017
New Revision: 306484
URL: http://llvm.org/viewvc/llvm-project?rev=306484&view=rev
Log:
[NewPM/Inliner] Reduce threshold for cold callsites in the non-PGO case
Differential Revision: https://reviews.llvm.org/D34312
Added:
llvm/trunk/test/Transforms/Inline/inline-cold-callsite-pgo.ll
- copied, changed from r306478, llvm/trunk/test/Transforms/Inline/inline-cold-callsite.ll
Modified:
llvm/trunk/lib/Analysis/InlineCost.cpp
llvm/trunk/test/Transforms/Inline/inline-cold-callsite.ll
Modified: llvm/trunk/lib/Analysis/InlineCost.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/InlineCost.cpp?rev=306484&r1=306483&r2=306484&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/InlineCost.cpp (original)
+++ llvm/trunk/lib/Analysis/InlineCost.cpp Tue Jun 27 16:11:18 2017
@@ -66,6 +66,12 @@ static cl::opt<int>
cl::ZeroOrMore,
cl::desc("Threshold for hot callsites "));
+static cl::opt<int> ColdCallSiteRelFreq(
+ "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
+ cl::desc("Maxmimum block frequency, expressed as a percentage of caller's "
+ "entry frequency, for a callsite to be cold in the absence of "
+ "profile information."));
+
namespace {
class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
@@ -172,6 +178,9 @@ class CallAnalyzer : public InstVisitor<
/// Return true if size growth is allowed when inlining the callee at CS.
bool allowSizeGrowth(CallSite CS);
+ /// Return true if \p CS is a cold callsite.
+ bool isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI);
+
// Custom analysis routines.
bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
@@ -631,6 +640,26 @@ bool CallAnalyzer::allowSizeGrowth(CallS
return true;
}
+bool CallAnalyzer::isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI) {
+ // If global profile summary is available, then callsite's coldness is
+ // determined based on that.
+ if (PSI->hasProfileSummary())
+ return PSI->isColdCallSite(CS, CallerBFI);
+ if (!CallerBFI)
+ return false;
+
+ // In the absence of global profile summary, determine if the callsite is cold
+ // relative to caller's entry. We could potentially cache the computation of
+ // scaled entry frequency, but the added complexity is not worth it unless
+ // this scaling shows up high in the profiles.
+ const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
+ auto CallSiteBB = CS.getInstruction()->getParent();
+ auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
+ auto CallerEntryFreq =
+ CallerBFI->getBlockFreq(&(CS.getCaller()->getEntryBlock()));
+ return CallSiteFreq < CallerEntryFreq * ColdProb;
+}
+
void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
// If no size growth is allowed for this inlining, set Threshold to 0.
if (!allowSizeGrowth(CS)) {
@@ -676,7 +705,7 @@ void CallAnalyzer::updateThreshold(CallS
if (PSI->isHotCallSite(CS, CallerBFI)) {
DEBUG(dbgs() << "Hot callsite.\n");
Threshold = Params.HotCallSiteThreshold.getValue();
- } else if (PSI->isColdCallSite(CS, CallerBFI)) {
+ } else if (isColdCallSite(CS, CallerBFI)) {
DEBUG(dbgs() << "Cold callsite.\n");
Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
}
Copied: llvm/trunk/test/Transforms/Inline/inline-cold-callsite-pgo.ll (from r306478, llvm/trunk/test/Transforms/Inline/inline-cold-callsite.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/inline-cold-callsite-pgo.ll?p2=llvm/trunk/test/Transforms/Inline/inline-cold-callsite-pgo.ll&p1=llvm/trunk/test/Transforms/Inline/inline-cold-callsite.ll&r1=306478&r2=306484&rev=306484&view=diff
==============================================================================
(empty)
Modified: llvm/trunk/test/Transforms/Inline/inline-cold-callsite.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/inline-cold-callsite.ll?rev=306484&r1=306483&r2=306484&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/Inline/inline-cold-callsite.ll (original)
+++ llvm/trunk/test/Transforms/Inline/inline-cold-callsite.ll Tue Jun 27 16:11:18 2017
@@ -1,54 +1,47 @@
+
; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=100 -inline-cold-callsite-threshold=0 -S | FileCheck %s
; This tests that a cold callsite gets the inline-cold-callsite-threshold
; and does not get inlined. Another callsite to an identical callee that
; is not cold gets inlined because cost is below the inline-threshold.
-define i32 @callee1(i32 %x) !prof !21 {
- %x1 = add i32 %x, 1
- %x2 = add i32 %x1, 1
- %x3 = add i32 %x2, 1
+define void @callee() {
+ call void @extern()
call void @extern()
- ret i32 %x3
+ ret void
}
-define i32 @caller(i32 %n) !prof !22 {
-; CHECK-LABEL: @caller(
- %cond = icmp sle i32 %n, 100
- br i1 %cond, label %cond_true, label %cond_false, !prof !0
-
-cond_true:
-; CHECK-LABEL: cond_true:
-; CHECK-NOT: call i32 @callee1
-; CHECK: ret i32 %x3.i
- %i = call i32 @callee1(i32 %n)
- ret i32 %i
-cond_false:
-; CHECK-LABEL: cond_false:
-; CHECK: call i32 @callee1
-; CHECK: ret i32 %j
- %j = call i32 @callee1(i32 %n)
- ret i32 %j
-}
declare void @extern()
+declare i1 @ext(i32)
+
+; CHECK-LABEL: caller
+define i32 @caller(i32 %n) {
+entry:
+ %cmp4 = icmp sgt i32 %n, 0
+ br i1 %cmp4, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret i32 0
+
+for.body:
+ %i.05 = phi i32 [ %inc, %for.inc ], [ 0, %entry ]
+; CHECK: %call = tail call
+ %call = tail call zeroext i1 @ext(i32 %i.05)
+; CHECK-NOT: call void @callee
+; CHECK-NEXT: call void @extern
+ call void @callee()
+ br i1 %call, label %cold, label %for.inc, !prof !0
+
+cold:
+; CHECK: call void @callee
+ call void @callee()
+ br label %for.inc
+
+for.inc:
+ %inc = add nuw nsw i32 %i.05, 1
+ %exitcond = icmp eq i32 %inc, %n
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
-!0 = !{!"branch_weights", i32 200, i32 1}
-!llvm.module.flags = !{!1}
-!21 = !{!"function_entry_count", i64 200}
-!22 = !{!"function_entry_count", i64 200}
-
-!1 = !{i32 1, !"ProfileSummary", !2}
-!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
-!3 = !{!"ProfileFormat", !"InstrProf"}
-!4 = !{!"TotalCount", i64 10000}
-!5 = !{!"MaxCount", i64 1000}
-!6 = !{!"MaxInternalCount", i64 1}
-!7 = !{!"MaxFunctionCount", i64 1000}
-!8 = !{!"NumCounts", i64 3}
-!9 = !{!"NumFunctions", i64 3}
-!10 = !{!"DetailedSummary", !11}
-!11 = !{!12, !13, !14}
-!12 = !{i32 10000, i64 1000, i32 1}
-!13 = !{i32 999000, i64 1000, i32 1}
-!14 = !{i32 999999, i64 1, i32 2}
+!0 = !{!"branch_weights", i32 1, i32 2000}
More information about the llvm-commits
mailing list