[llvm] r309964 - Do not want to use BFI to get profile count for sample pgo
Dehao Chen via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 3 10:11:41 PDT 2017
Author: dehao
Date: Thu Aug 3 10:11:41 2017
New Revision: 309964
URL: http://llvm.org/viewvc/llvm-project?rev=309964&view=rev
Log:
Do not want to use BFI to get profile count for sample pgo
Summary: For SamplePGO, we already record the callsite count in the call instruction itself. So we do not want to use BFI to get profile count as it is less accurate.
Reviewers: tejohnson, davidxl, eraman
Reviewed By: eraman
Subscribers: sanjoy, llvm-commits, mehdi_amini
Differential Revision: https://reviews.llvm.org/D36025
Added:
llvm/trunk/test/Bitcode/Inputs/thinlto-function-summary-callgraph-sample-profile-summary.ll
Modified:
llvm/trunk/lib/Analysis/ProfileSummaryInfo.cpp
llvm/trunk/test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll
llvm/trunk/unittests/Analysis/ProfileSummaryInfoTest.cpp
Modified: llvm/trunk/lib/Analysis/ProfileSummaryInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ProfileSummaryInfo.cpp?rev=309964&r1=309963&r2=309964&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/ProfileSummaryInfo.cpp (original)
+++ llvm/trunk/lib/Analysis/ProfileSummaryInfo.cpp Thu Aug 3 10:11:41 2017
@@ -39,6 +39,12 @@ static cl::opt<int> ProfileSummaryCutoff
cl::desc("A count is cold if it is below the minimum count"
" to reach this percentile of total counts."));
+static cl::opt<bool> AccurateSampleProfile(
+ "accurate-sample-profile", cl::Hidden, cl::init(false),
+ cl::desc("If the sample profile is accurate, we will mark all un-sampled "
+ "callsite as cold. Otherwise, treat un-sampled callsites as if "
+ "we have no profile."));
+
// Find the minimum count to reach a desired percentile of counts.
static uint64_t getMinCountForPercentile(SummaryEntryVector &DS,
uint64_t Percentile) {
@@ -78,10 +84,12 @@ ProfileSummaryInfo::getProfileCount(cons
if (hasSampleProfile()) {
// In sample PGO mode, check if there is a profile metadata on the
// instruction. If it is present, determine hotness solely based on that,
- // since the sampled entry count may not be accurate.
+ // since the sampled entry count may not be accurate. If there is no
+ // annotated on the instruction, return None.
uint64_t TotalCount;
if (Inst->extractProfTotalWeight(TotalCount))
return TotalCount;
+ return None;
}
if (BFI)
return BFI->getBlockProfileCount(Inst->getParent());
@@ -199,7 +207,15 @@ bool ProfileSummaryInfo::isHotCallSite(c
bool ProfileSummaryInfo::isColdCallSite(const CallSite &CS,
BlockFrequencyInfo *BFI) {
auto C = getProfileCount(CS.getInstruction(), BFI);
- return C && isColdCount(*C);
+ if (C)
+ return isColdCount(*C);
+
+ // In SamplePGO, if the caller has been sampled, and there is no profile
+ // annotatedon the callsite, we consider the callsite as cold.
+ // If there is no profile for the caller, and we know the profile is
+ // accurate, we consider the callsite as cold.
+ return (hasSampleProfile() &&
+ (CS.getCaller()->getEntryCount() || AccurateSampleProfile));
}
INITIALIZE_PASS(ProfileSummaryInfoWrapperPass, "profile-summary-info",
Added: llvm/trunk/test/Bitcode/Inputs/thinlto-function-summary-callgraph-sample-profile-summary.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Bitcode/Inputs/thinlto-function-summary-callgraph-sample-profile-summary.ll?rev=309964&view=auto
==============================================================================
--- llvm/trunk/test/Bitcode/Inputs/thinlto-function-summary-callgraph-sample-profile-summary.ll (added)
+++ llvm/trunk/test/Bitcode/Inputs/thinlto-function-summary-callgraph-sample-profile-summary.ll Thu Aug 3 10:11:41 2017
@@ -0,0 +1,31 @@
+; ModuleID = 'thinlto-function-summary-callgraph-profile-summary2.ll'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @hot1() #1 {
+ ret void
+}
+define void @hot2() #1 {
+ ret void
+}
+define void @hot3() #1 {
+ ret void
+}
+define void @cold1() #1 {
+ ret void
+}
+define void @cold2() #1 {
+ ret void
+}
+define void @cold3() #1 {
+ ret void
+}
+define void @none1() #1 {
+ ret void
+}
+define void @none2() #1 {
+ ret void
+}
+define void @none3() #1 {
+ ret void
+}
Modified: llvm/trunk/test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll?rev=309964&r1=309963&r2=309964&view=diff
==============================================================================
--- llvm/trunk/test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll (original)
+++ llvm/trunk/test/Bitcode/thinlto-function-summary-callgraph-sample-profile-summary.ll Thu Aug 3 10:11:41 2017
@@ -1,7 +1,7 @@
; Test to check the callgraph in summary when there is PGO
; RUN: opt -module-summary %s -o %t.o
; RUN: llvm-bcanalyzer -dump %t.o | FileCheck %s
-; RUN: opt -module-summary %p/Inputs/thinlto-function-summary-callgraph-profile-summary.ll -o %t2.o
+; RUN: opt -module-summary %p/Inputs/thinlto-function-summary-callgraph-sample-profile-summary.ll -o %t2.o
; RUN: llvm-lto -thinlto -o %t3 %t.o %t2.o
; RUN: llvm-bcanalyzer -dump %t3.thinlto.bc | FileCheck %s --check-prefix=COMBINED
@@ -16,24 +16,26 @@
; "hot3"
; CHECK-NEXT: <FUNCTION op0=20 op1=4
; "hot4"
-; CHECK-NEXT: <FUNCTION op0=24 op1=4
+; CHECK-NEXT: <FUNCTION op0=24 op1=5
; "cold"
-; CHECK-NEXT: <FUNCTION op0=28 op1=4
+; CHECK-NEXT: <FUNCTION op0=29 op1=5
; "none1"
-; CHECK-NEXT: <FUNCTION op0=32 op1=5
+; CHECK-NEXT: <FUNCTION op0=34 op1=5
; "none2"
-; CHECK-NEXT: <FUNCTION op0=37 op1=5
+; CHECK-NEXT: <FUNCTION op0=39 op1=5
; "none3"
-; CHECK-NEXT: <FUNCTION op0=42 op1=5
+; CHECK-NEXT: <FUNCTION op0=44 op1=5
+; CHECK-NEXT: <FUNCTION op0=49 op1=5
+
; CHECK-LABEL: <GLOBALVAL_SUMMARY_BLOCK
; CHECK-NEXT: <VERSION
-; CHECK-NEXT: <VALUE_GUID op0=25 op1=123/>
-; op4=hot1 op6=cold op8=hot2 op10=hot4 op12=none1 op14=hot3 op16=none2 op18=none3 op20=123
-; CHECK-NEXT: <PERMODULE_PROFILE {{.*}} op4=1 op5=3 op6=5 op7=1 op8=2 op9=3 op10=4 op11=3 op12=6 op13=2 op14=3 op15=3 op16=7 op17=2 op18=8 op19=2 op20=25 op21=4/>
+; CHECK-NEXT: <VALUE_GUID op0=26 op1=123/>
+; op4=none1 op6=hot1 op8=cold1 op10=none2 op12=hot2 op14=cold2 op16=none3 op18=hot3 op20=cold3 op22=123
+; CHECK-NEXT: <PERMODULE_PROFILE {{.*}} op4=7 op5=0 op6=1 op7=3 op8=4 op9=1 op10=8 op11=0 op12=2 op13=3 op14=5 op15=1 op16=9 op17=0 op18=3 op19=3 op20=6 op21=1 op22=26 op23=4/>
; CHECK-NEXT: </GLOBALVAL_SUMMARY_BLOCK>
; CHECK: <STRTAB_BLOCK
-; CHECK-NEXT: blob data = 'hot_functionhot1hot2hot3hot4coldnone1none2none3{{.*}}'
+; CHECK-NEXT: blob data = 'hot_functionhot1hot2hot3cold1cold2cold3none1none2none3{{.*}}'
; COMBINED: <GLOBALVAL_SUMMARY_BLOCK
; COMBINED-NEXT: <VERSION
@@ -45,13 +47,17 @@
; COMBINED-NEXT: <VALUE_GUID
; COMBINED-NEXT: <VALUE_GUID
; COMBINED-NEXT: <VALUE_GUID
+; COMBINED-NEXT: <VALUE_GUID
+; COMBINED-NEXT: <VALUE_GUID
+; COMBINED-NEXT: <COMBINED abbrevid=
; COMBINED-NEXT: <COMBINED abbrevid=
; COMBINED-NEXT: <COMBINED abbrevid=
; COMBINED-NEXT: <COMBINED abbrevid=
; COMBINED-NEXT: <COMBINED abbrevid=
; COMBINED-NEXT: <COMBINED abbrevid=
; COMBINED-NEXT: <COMBINED abbrevid=
-; COMBINED-NEXT: <COMBINED_PROFILE {{.*}} op5=[[HOT1:.*]] op6=3 op7=[[COLD:.*]] op8=1 op9=[[HOT2:.*]] op10=3 op11=[[NONE1:.*]] op12=2 op13=[[HOT3:.*]] op14=3 op15=[[NONE2:.*]] op16=2 op17=[[NONE3:.*]] op18=2/>
+; COMBINED-NEXT: <COMBINED abbrevid=
+; COMBINED-NEXT: <COMBINED_PROFILE {{.*}} op5=[[NONE1:.*]] op6=0 op7=[[HOT1:.*]] op8=3 op9=[[COLD1:.*]] op10=1 op11=[[NONE2:.*]] op12=0 op13=[[HOT2:.*]] op14=3 op15=[[COLD2:.*]] op16=1 op17=[[NONE3:.*]] op18=0 op19=[[HOT3:.*]] op20=3 op21=[[COLD3:.*]] op22=1/>
; COMBINED_NEXT: <COMBINED abbrevid=
; COMBINED_NEXT: </GLOBALVAL_SUMMARY_BLOCK>
@@ -63,24 +69,19 @@ target triple = "x86_64-unknown-linux-gn
; This function have high profile count, so entry block is hot.
define void @hot_function(i1 %a, i1 %a2) !prof !20 {
entry:
- call void @hot1()
- br i1 %a, label %Cold, label %Hot, !prof !41
-Cold: ; 1/1000 goes here
- call void @cold()
- call void @hot2()
- call void @hot4(), !prof !15
- call void @none1()
- br label %exit
-Hot: ; 999/1000 goes here
- call void @hot2()
- call void @hot3()
- br i1 %a2, label %None1, label %None2, !prof !42
-None1: ; half goes here
call void @none1()
+ call void @hot1(), !prof !15
+ call void @cold1(), !prof !16
+ br i1 %a, label %Cold, label %Hot, !prof !41
+Cold: ; 1/1000 goes here
call void @none2()
+ call void @hot2(), !prof !15
+ call void @cold2(), !prof !16
br label %exit
-None2: ; half goes here
+Hot: ; 999/1000 goes here
call void @none3()
+ call void @hot3(), !prof !15
+ call void @cold3(), !prof !16
br label %exit
exit:
ret void
@@ -89,17 +90,14 @@ exit:
declare void @hot1() #1
declare void @hot2() #1
declare void @hot3() #1
-declare void @hot4() #1
-declare void @cold() #1
+declare void @cold1() #1
+declare void @cold2() #1
+declare void @cold3() #1
declare void @none1() #1
declare void @none2() #1
declare void @none3() #1
-
!41 = !{!"branch_weights", i32 1, i32 1000}
-!42 = !{!"branch_weights", i32 1, i32 1}
-
-
!llvm.module.flags = !{!1}
!20 = !{!"function_entry_count", i64 110, i64 123}
@@ -119,3 +117,4 @@ declare void @none3() #1
!13 = !{i32 999000, i64 100, i32 1}
!14 = !{i32 999999, i64 1, i32 2}
!15 = !{!"branch_weights", i32 100}
+!16 = !{!"branch_weights", i32 1}
Modified: llvm/trunk/unittests/Analysis/ProfileSummaryInfoTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Analysis/ProfileSummaryInfoTest.cpp?rev=309964&r1=309963&r2=309964&view=diff
==============================================================================
--- llvm/trunk/unittests/Analysis/ProfileSummaryInfoTest.cpp (original)
+++ llvm/trunk/unittests/Analysis/ProfileSummaryInfoTest.cpp Thu Aug 3 10:11:41 2017
@@ -196,14 +196,18 @@ TEST_F(ProfileSummaryInfoTest, SamplePro
CallSite CS1(BB1->getFirstNonPHI());
auto *CI2 = BB2->getFirstNonPHI();
+ // Manually attach branch weights metadata to the call instruction.
+ SmallVector<uint32_t, 1> Weights;
+ Weights.push_back(1000);
+ MDBuilder MDB(M->getContext());
+ CI2->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
CallSite CS2(CI2);
- EXPECT_TRUE(PSI.isHotCallSite(CS1, &BFI));
- EXPECT_FALSE(PSI.isHotCallSite(CS2, &BFI));
+ EXPECT_FALSE(PSI.isHotCallSite(CS1, &BFI));
+ EXPECT_TRUE(PSI.isHotCallSite(CS2, &BFI));
// Test that CS2 is considered hot when it gets an MD_prof metadata with
// weights that exceed the hot count threshold.
- MDBuilder MDB(M->getContext());
CI2->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights({400}));
EXPECT_TRUE(PSI.isHotCallSite(CS2, &BFI));
}
More information about the llvm-commits
mailing list