[llvm] r309964 - Do not want to use BFI to get profile count for sample pgo

Dehao Chen via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 3 10:11:41 PDT 2017


Author: dehao
Date: Thu Aug  3 10:11:41 2017
New Revision: 309964

URL: http://llvm.org/viewvc/llvm-project?rev=309964&view=rev
Log:
Do not want to use BFI to get profile count for sample pgo

Summary: For SamplePGO, we already record the callsite count in the call instruction itself. So we do not want to use BFI to get profile count as it is less accurate.

Reviewers: tejohnson, davidxl, eraman

Reviewed By: eraman

Subscribers: sanjoy, llvm-commits, mehdi_amini

Differential Revision: https://reviews.llvm.org/D36025

Added:
    llvm/trunk/test/Bitcode/Inputs/thinlto-function-summary-callgraph-sample-profile-summary.ll
Modified:
    llvm/trunk/lib/Analysis/ProfileSummaryInfo.cpp
    llvm/trunk/test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll
    llvm/trunk/unittests/Analysis/ProfileSummaryInfoTest.cpp

Modified: llvm/trunk/lib/Analysis/ProfileSummaryInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ProfileSummaryInfo.cpp?rev=309964&r1=309963&r2=309964&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/ProfileSummaryInfo.cpp (original)
+++ llvm/trunk/lib/Analysis/ProfileSummaryInfo.cpp Thu Aug  3 10:11:41 2017
@@ -39,6 +39,12 @@ static cl::opt<int> ProfileSummaryCutoff
     cl::desc("A count is cold if it is below the minimum count"
              " to reach this percentile of total counts."));
 
+static cl::opt<bool> AccurateSampleProfile(
+    "accurate-sample-profile", cl::Hidden, cl::init(false),
+    cl::desc("If the sample profile is accurate, we will mark all un-sampled "
+             "callsite as cold. Otherwise, treat un-sampled callsites as if "
+             "we have no profile."));
+
 // Find the minimum count to reach a desired percentile of counts.
 static uint64_t getMinCountForPercentile(SummaryEntryVector &DS,
                                          uint64_t Percentile) {
@@ -78,10 +84,12 @@ ProfileSummaryInfo::getProfileCount(cons
   if (hasSampleProfile()) {
     // In sample PGO mode, check if there is a profile metadata on the
     // instruction. If it is present, determine hotness solely based on that,
-    // since the sampled entry count may not be accurate.
+    // since the sampled entry count may not be accurate. If there is no
+    // annotated on the instruction, return None.
     uint64_t TotalCount;
     if (Inst->extractProfTotalWeight(TotalCount))
       return TotalCount;
+    return None;
   }
   if (BFI)
     return BFI->getBlockProfileCount(Inst->getParent());
@@ -199,7 +207,15 @@ bool ProfileSummaryInfo::isHotCallSite(c
 bool ProfileSummaryInfo::isColdCallSite(const CallSite &CS,
                                         BlockFrequencyInfo *BFI) {
   auto C = getProfileCount(CS.getInstruction(), BFI);
-  return C && isColdCount(*C);
+  if (C)
+    return isColdCount(*C);
+
+  // In SamplePGO, if the caller has been sampled, and there is no profile
+  // annotatedon the callsite, we consider the callsite as cold.
+  // If there is no profile for the caller, and we know the profile is
+  // accurate, we consider the callsite as cold.
+  return (hasSampleProfile() &&
+          (CS.getCaller()->getEntryCount() || AccurateSampleProfile));
 }
 
 INITIALIZE_PASS(ProfileSummaryInfoWrapperPass, "profile-summary-info",

Added: llvm/trunk/test/Bitcode/Inputs/thinlto-function-summary-callgraph-sample-profile-summary.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Bitcode/Inputs/thinlto-function-summary-callgraph-sample-profile-summary.ll?rev=309964&view=auto
==============================================================================
--- llvm/trunk/test/Bitcode/Inputs/thinlto-function-summary-callgraph-sample-profile-summary.ll (added)
+++ llvm/trunk/test/Bitcode/Inputs/thinlto-function-summary-callgraph-sample-profile-summary.ll Thu Aug  3 10:11:41 2017
@@ -0,0 +1,31 @@
+; ModuleID = 'thinlto-function-summary-callgraph-profile-summary2.ll'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @hot1() #1 {
+  ret void
+}
+define void @hot2() #1 {
+  ret void
+}
+define void @hot3() #1 {
+  ret void
+}
+define void @cold1() #1 {
+  ret void
+}
+define void @cold2() #1 {
+  ret void
+}
+define void @cold3() #1 {
+  ret void
+}
+define void @none1() #1 {
+  ret void
+}
+define void @none2() #1 {
+  ret void
+}
+define void @none3() #1 {
+  ret void
+}

Modified: llvm/trunk/test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll?rev=309964&r1=309963&r2=309964&view=diff
==============================================================================
--- llvm/trunk/test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll (original)
+++ llvm/trunk/test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll Thu Aug  3 10:11:41 2017
@@ -1,7 +1,7 @@
 ; Test to check the callgraph in summary when there is PGO
 ; RUN: opt -module-summary %s -o %t.o
 ; RUN: llvm-bcanalyzer -dump %t.o | FileCheck %s
-; RUN: opt -module-summary %p/Inputs/thinlto-function-summary-callgraph-profile-summary.ll -o %t2.o
+; RUN: opt -module-summary %p/Inputs/thinlto-function-summary-callgraph-sample-profile-summary.ll -o %t2.o
 ; RUN: llvm-lto -thinlto -o %t3 %t.o %t2.o
 ; RUN: llvm-bcanalyzer -dump %t3.thinlto.bc | FileCheck %s --check-prefix=COMBINED
 
@@ -16,24 +16,26 @@
 ; "hot3"
 ; CHECK-NEXT: <FUNCTION op0=20 op1=4
 ; "hot4"
-; CHECK-NEXT: <FUNCTION op0=24 op1=4
+; CHECK-NEXT: <FUNCTION op0=24 op1=5
 ; "cold"
-; CHECK-NEXT: <FUNCTION op0=28 op1=4
+; CHECK-NEXT: <FUNCTION op0=29 op1=5
 ; "none1"
-; CHECK-NEXT: <FUNCTION op0=32 op1=5
+; CHECK-NEXT: <FUNCTION op0=34 op1=5
 ; "none2"
-; CHECK-NEXT: <FUNCTION op0=37 op1=5
+; CHECK-NEXT: <FUNCTION op0=39 op1=5
 ; "none3"
-; CHECK-NEXT: <FUNCTION op0=42 op1=5
+; CHECK-NEXT: <FUNCTION op0=44 op1=5
+; CHECK-NEXT: <FUNCTION op0=49 op1=5
+
 ; CHECK-LABEL:       <GLOBALVAL_SUMMARY_BLOCK
 ; CHECK-NEXT:    <VERSION
-; CHECK-NEXT:    <VALUE_GUID op0=25 op1=123/>
-; op4=hot1 op6=cold op8=hot2 op10=hot4 op12=none1 op14=hot3 op16=none2 op18=none3 op20=123
-; CHECK-NEXT:    <PERMODULE_PROFILE {{.*}} op4=1 op5=3 op6=5 op7=1 op8=2 op9=3 op10=4 op11=3 op12=6 op13=2 op14=3 op15=3 op16=7 op17=2 op18=8 op19=2 op20=25 op21=4/>
+; CHECK-NEXT:    <VALUE_GUID op0=26 op1=123/>
+; op4=none1 op6=hot1 op8=cold1 op10=none2 op12=hot2 op14=cold2 op16=none3 op18=hot3 op20=cold3 op22=123
+; CHECK-NEXT:    <PERMODULE_PROFILE {{.*}} op4=7 op5=0 op6=1 op7=3 op8=4 op9=1 op10=8 op11=0 op12=2 op13=3 op14=5 op15=1 op16=9 op17=0 op18=3 op19=3 op20=6 op21=1 op22=26 op23=4/>
 ; CHECK-NEXT:  </GLOBALVAL_SUMMARY_BLOCK>
 
 ; CHECK: <STRTAB_BLOCK
-; CHECK-NEXT: blob data = 'hot_functionhot1hot2hot3hot4coldnone1none2none3{{.*}}'
+; CHECK-NEXT: blob data = 'hot_functionhot1hot2hot3cold1cold2cold3none1none2none3{{.*}}'
 
 ; COMBINED:       <GLOBALVAL_SUMMARY_BLOCK
 ; COMBINED-NEXT:    <VERSION
@@ -45,13 +47,17 @@
 ; COMBINED-NEXT:    <VALUE_GUID
 ; COMBINED-NEXT:    <VALUE_GUID
 ; COMBINED-NEXT:    <VALUE_GUID
+; COMBINED-NEXT:    <VALUE_GUID
+; COMBINED-NEXT:    <VALUE_GUID
+; COMBINED-NEXT:    <COMBINED abbrevid=
 ; COMBINED-NEXT:    <COMBINED abbrevid=
 ; COMBINED-NEXT:    <COMBINED abbrevid=
 ; COMBINED-NEXT:    <COMBINED abbrevid=
 ; COMBINED-NEXT:    <COMBINED abbrevid=
 ; COMBINED-NEXT:    <COMBINED abbrevid=
 ; COMBINED-NEXT:    <COMBINED abbrevid=
-; COMBINED-NEXT:    <COMBINED_PROFILE {{.*}} op5=[[HOT1:.*]] op6=3 op7=[[COLD:.*]] op8=1 op9=[[HOT2:.*]] op10=3 op11=[[NONE1:.*]] op12=2 op13=[[HOT3:.*]] op14=3 op15=[[NONE2:.*]] op16=2 op17=[[NONE3:.*]] op18=2/>
+; COMBINED-NEXT:    <COMBINED abbrevid=
+; COMBINED-NEXT:    <COMBINED_PROFILE {{.*}} op5=[[NONE1:.*]] op6=0 op7=[[HOT1:.*]] op8=3 op9=[[COLD1:.*]] op10=1 op11=[[NONE2:.*]] op12=0 op13=[[HOT2:.*]] op14=3 op15=[[COLD2:.*]] op16=1 op17=[[NONE3:.*]] op18=0 op19=[[HOT3:.*]] op20=3 op21=[[COLD3:.*]] op22=1/>
 ; COMBINED_NEXT:    <COMBINED abbrevid=
 ; COMBINED_NEXT:  </GLOBALVAL_SUMMARY_BLOCK>
 
@@ -63,24 +69,19 @@ target triple = "x86_64-unknown-linux-gn
 ; This function have high profile count, so entry block is hot.
 define void @hot_function(i1 %a, i1 %a2) !prof !20 {
 entry:
-    call void @hot1()
-    br i1 %a, label %Cold, label %Hot, !prof !41
-Cold:           ; 1/1000 goes here
-  call void @cold()
-  call void @hot2()
-  call void @hot4(), !prof !15
-  call void @none1()
-  br label %exit
-Hot:            ; 999/1000 goes here
-  call void @hot2()
-  call void @hot3()
-  br i1 %a2, label %None1, label %None2, !prof !42
-None1:          ; half goes here
   call void @none1()
+  call void @hot1(), !prof !15
+  call void @cold1(), !prof !16
+  br i1 %a, label %Cold, label %Hot, !prof !41
+Cold:           ; 1/1000 goes here
   call void @none2()
+  call void @hot2(), !prof !15
+  call void @cold2(), !prof !16
   br label %exit
-None2:          ; half goes here
+Hot:            ; 999/1000 goes here
   call void @none3()
+  call void @hot3(), !prof !15
+  call void @cold3(), !prof !16
   br label %exit
 exit:
   ret void
@@ -89,17 +90,14 @@ exit:
 declare void @hot1() #1
 declare void @hot2() #1
 declare void @hot3() #1
-declare void @hot4() #1
-declare void @cold() #1
+declare void @cold1() #1
+declare void @cold2() #1
+declare void @cold3() #1
 declare void @none1() #1
 declare void @none2() #1
 declare void @none3() #1
 
-
 !41 = !{!"branch_weights", i32 1, i32 1000}
-!42 = !{!"branch_weights", i32 1, i32 1}
-
-
 
 !llvm.module.flags = !{!1}
 !20 = !{!"function_entry_count", i64 110, i64 123}
@@ -119,3 +117,4 @@ declare void @none3() #1
 !13 = !{i32 999000, i64 100, i32 1}
 !14 = !{i32 999999, i64 1, i32 2}
 !15 = !{!"branch_weights", i32 100}
+!16 = !{!"branch_weights", i32 1}

Modified: llvm/trunk/unittests/Analysis/ProfileSummaryInfoTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Analysis/ProfileSummaryInfoTest.cpp?rev=309964&r1=309963&r2=309964&view=diff
==============================================================================
--- llvm/trunk/unittests/Analysis/ProfileSummaryInfoTest.cpp (original)
+++ llvm/trunk/unittests/Analysis/ProfileSummaryInfoTest.cpp Thu Aug  3 10:11:41 2017
@@ -196,14 +196,18 @@ TEST_F(ProfileSummaryInfoTest, SamplePro
 
   CallSite CS1(BB1->getFirstNonPHI());
   auto *CI2 = BB2->getFirstNonPHI();
+  // Manually attach branch weights metadata to the call instruction.
+  SmallVector<uint32_t, 1> Weights;
+  Weights.push_back(1000);
+  MDBuilder MDB(M->getContext());
+  CI2->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
   CallSite CS2(CI2);
 
-  EXPECT_TRUE(PSI.isHotCallSite(CS1, &BFI));
-  EXPECT_FALSE(PSI.isHotCallSite(CS2, &BFI));
+  EXPECT_FALSE(PSI.isHotCallSite(CS1, &BFI));
+  EXPECT_TRUE(PSI.isHotCallSite(CS2, &BFI));
 
   // Test that CS2 is considered hot when it gets an MD_prof metadata with
   // weights that exceed the hot count threshold.
-  MDBuilder MDB(M->getContext());
   CI2->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights({400}));
   EXPECT_TRUE(PSI.isHotCallSite(CS2, &BFI));
 }




More information about the llvm-commits mailing list