[llvm] r306484 - [NewPM/Inliner] Reduce threshold for cold callsites in the non-PGO case

Easwaran Raman via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 27 16:11:18 PDT 2017


Author: eraman
Date: Tue Jun 27 16:11:18 2017
New Revision: 306484

URL: http://llvm.org/viewvc/llvm-project?rev=306484&view=rev
Log:
[NewPM/Inliner] Reduce threshold for cold callsites in the non-PGO case

Differential Revision: https://reviews.llvm.org/D34312

Added:
    llvm/trunk/test/Transforms/Inline/inline-cold-callsite-pgo.ll
      - copied, changed from r306478, llvm/trunk/test/Transforms/Inline/inline-cold-callsite.ll
Modified:
    llvm/trunk/lib/Analysis/InlineCost.cpp
    llvm/trunk/test/Transforms/Inline/inline-cold-callsite.ll

Modified: llvm/trunk/lib/Analysis/InlineCost.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/InlineCost.cpp?rev=306484&r1=306483&r2=306484&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/InlineCost.cpp (original)
+++ llvm/trunk/lib/Analysis/InlineCost.cpp Tue Jun 27 16:11:18 2017
@@ -66,6 +66,12 @@ static cl::opt<int>
                          cl::ZeroOrMore,
                          cl::desc("Threshold for hot callsites "));
 
+static cl::opt<int> ColdCallSiteRelFreq(
+    "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore,
+    cl::desc("Maxmimum block frequency, expressed as a percentage of caller's "
+             "entry frequency, for a callsite to be cold in the absence of "
+             "profile information."));
+
 namespace {
 
 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
@@ -172,6 +178,9 @@ class CallAnalyzer : public InstVisitor<
   /// Return true if size growth is allowed when inlining the callee at CS.
   bool allowSizeGrowth(CallSite CS);
 
+  /// Return true if \p CS is a cold callsite.
+  bool isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI);
+
   // Custom analysis routines.
   bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
 
@@ -631,6 +640,26 @@ bool CallAnalyzer::allowSizeGrowth(CallS
   return true;
 }
 
+bool CallAnalyzer::isColdCallSite(CallSite CS, BlockFrequencyInfo *CallerBFI) {
+  // If global profile summary is available, then callsite's coldness is
+  // determined based on that.
+  if (PSI->hasProfileSummary())
+    return PSI->isColdCallSite(CS, CallerBFI);
+  if (!CallerBFI)
+    return false;
+
+  // In the absence of global profile summary, determine if the callsite is cold
+  // relative to caller's entry. We could potentially cache the computation of
+  // scaled entry frequency, but the added complexity is not worth it unless
+  // this scaling shows up high in the profiles.
+  const BranchProbability ColdProb(ColdCallSiteRelFreq, 100);
+  auto CallSiteBB = CS.getInstruction()->getParent();
+  auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB);
+  auto CallerEntryFreq =
+      CallerBFI->getBlockFreq(&(CS.getCaller()->getEntryBlock()));
+  return CallSiteFreq < CallerEntryFreq * ColdProb;
+}
+
 void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
   // If no size growth is allowed for this inlining, set Threshold to 0.
   if (!allowSizeGrowth(CS)) {
@@ -676,7 +705,7 @@ void CallAnalyzer::updateThreshold(CallS
         if (PSI->isHotCallSite(CS, CallerBFI)) {
           DEBUG(dbgs() << "Hot callsite.\n");
           Threshold = Params.HotCallSiteThreshold.getValue();
-        } else if (PSI->isColdCallSite(CS, CallerBFI)) {
+        } else if (isColdCallSite(CS, CallerBFI)) {
           DEBUG(dbgs() << "Cold callsite.\n");
           Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
         }

Copied: llvm/trunk/test/Transforms/Inline/inline-cold-callsite-pgo.ll (from r306478, llvm/trunk/test/Transforms/Inline/inline-cold-callsite.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/inline-cold-callsite-pgo.ll?p2=llvm/trunk/test/Transforms/Inline/inline-cold-callsite-pgo.ll&p1=llvm/trunk/test/Transforms/Inline/inline-cold-callsite.ll&r1=306478&r2=306484&rev=306484&view=diff
==============================================================================
    (empty)

Modified: llvm/trunk/test/Transforms/Inline/inline-cold-callsite.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/inline-cold-callsite.ll?rev=306484&r1=306483&r2=306484&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/Inline/inline-cold-callsite.ll (original)
+++ llvm/trunk/test/Transforms/Inline/inline-cold-callsite.ll Tue Jun 27 16:11:18 2017
@@ -1,54 +1,47 @@
+
 ; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=100 -inline-cold-callsite-threshold=0 -S | FileCheck %s
 
 ; This tests that a cold callsite gets the inline-cold-callsite-threshold
 ; and does not get inlined. Another callsite to an identical callee that
 ; is not cold gets inlined because cost is below the inline-threshold.
 
-define i32 @callee1(i32 %x) !prof !21 {
-  %x1 = add i32 %x, 1
-  %x2 = add i32 %x1, 1
-  %x3 = add i32 %x2, 1
+define void @callee() {
+  call void @extern()
   call void @extern()
-  ret i32 %x3
+  ret void
 }
 
-define i32 @caller(i32 %n) !prof !22 {
-; CHECK-LABEL: @caller(
-  %cond = icmp sle i32 %n, 100
-  br i1 %cond, label %cond_true, label %cond_false, !prof !0
-
-cond_true:
-; CHECK-LABEL: cond_true:
-; CHECK-NOT: call i32 @callee1
-; CHECK: ret i32 %x3.i
-  %i = call i32 @callee1(i32 %n)
-  ret i32 %i
-cond_false:
-; CHECK-LABEL: cond_false:
-; CHECK: call i32 @callee1
-; CHECK: ret i32 %j
-  %j = call i32 @callee1(i32 %n)
-  ret i32 %j
-}
 declare void @extern()
+declare i1 @ext(i32)
+
+; CHECK-LABEL: caller
+define i32 @caller(i32 %n) {
+entry:
+  %cmp4 = icmp sgt i32 %n, 0
+  br i1 %cmp4, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret i32 0
+
+for.body:
+  %i.05 = phi i32 [ %inc, %for.inc ], [ 0, %entry ]
+; CHECK: %call = tail call
+  %call = tail call zeroext i1 @ext(i32 %i.05)
+; CHECK-NOT: call void @callee
+; CHECK-NEXT: call void @extern
+  call void @callee()
+  br i1 %call, label %cold, label %for.inc, !prof !0
+
+cold:
+; CHECK: call void @callee
+  call void @callee()
+  br label %for.inc
+
+for.inc:
+  %inc = add nuw nsw i32 %i.05, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
 
-!0 = !{!"branch_weights", i32 200, i32 1}
 
-!llvm.module.flags = !{!1}
-!21 = !{!"function_entry_count", i64 200}
-!22 = !{!"function_entry_count", i64 200}
-
-!1 = !{i32 1, !"ProfileSummary", !2}
-!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
-!3 = !{!"ProfileFormat", !"InstrProf"}
-!4 = !{!"TotalCount", i64 10000}
-!5 = !{!"MaxCount", i64 1000}
-!6 = !{!"MaxInternalCount", i64 1}
-!7 = !{!"MaxFunctionCount", i64 1000}
-!8 = !{!"NumCounts", i64 3}
-!9 = !{!"NumFunctions", i64 3}
-!10 = !{!"DetailedSummary", !11}
-!11 = !{!12, !13, !14}
-!12 = !{i32 10000, i64 1000, i32 1}
-!13 = !{i32 999000, i64 1000, i32 1}
-!14 = !{i32 999999, i64 1, i32 2}
+!0 = !{!"branch_weights", i32 1, i32 2000}




More information about the llvm-commits mailing list