[llvm] r349088 - [SampleFDO] handle ProfileSampleAccurate when initializing function entry count
Wei Mi via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 13 13:51:43 PST 2018
Author: wmi
Date: Thu Dec 13 13:51:42 2018
New Revision: 349088
URL: http://llvm.org/viewvc/llvm-project?rev=349088&view=rev
Log:
[SampleFDO] handle ProfileSampleAccurate when initializing function entry count
ProfileSampleAccurate is used to indicate the profile has exact match to the
code to be optimized.
Previously ProfileSampleAccurate is handled in ProfileSummaryInfo::isColdCallSite
and ProfileSummaryInfo::isColdBlock. A better solution is to initialize function
entry count to 0 when ProfileSampleAccurate is true, so we don't have to handle
ProfileSampleAccurate in multiple places.
Differential Revision: https://reviews.llvm.org/D55660
Added:
llvm/trunk/test/Transforms/SampleProfile/inline-cold-callsite-samplepgo.ll
llvm/trunk/test/Transforms/SampleProfile/section-accurate-samplepgo.ll
Removed:
llvm/trunk/test/Transforms/Inline/inline-cold-callsite-samplepgo.ll
Modified:
llvm/trunk/lib/Analysis/ProfileSummaryInfo.cpp
llvm/trunk/lib/Transforms/IPO/SampleProfile.cpp
llvm/trunk/test/Transforms/CodeGenPrepare/section-samplepgo.ll
Modified: llvm/trunk/lib/Analysis/ProfileSummaryInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ProfileSummaryInfo.cpp?rev=349088&r1=349087&r2=349088&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/ProfileSummaryInfo.cpp (original)
+++ llvm/trunk/lib/Analysis/ProfileSummaryInfo.cpp Thu Dec 13 13:51:42 2018
@@ -39,11 +39,6 @@ static cl::opt<int> ProfileSummaryCutoff
cl::desc("A count is cold if it is below the minimum count"
" to reach this percentile of total counts."));
-static cl::opt<bool> ProfileSampleAccurate(
- "profile-sample-accurate", cl::Hidden, cl::init(false),
- cl::desc("If the sample profile is accurate, we will mark all un-sampled "
- "callsite as cold. Otherwise, treat un-sampled callsites as if "
- "we have no profile."));
static cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold(
"profile-summary-huge-working-set-size-threshold", cl::Hidden,
cl::init(15000), cl::ZeroOrMore,
@@ -261,14 +256,7 @@ bool ProfileSummaryInfo::isHotBlock(cons
bool ProfileSummaryInfo::isColdBlock(const BasicBlock *BB,
BlockFrequencyInfo *BFI) {
auto Count = BFI->getBlockProfileCount(BB);
- if (Count)
- return isColdCount(*Count);
- if (!hasSampleProfile())
- return false;
-
- const Function *F = BB->getParent();
- return ProfileSampleAccurate ||
- (F && F->hasFnAttribute("profile-sample-accurate"));
+ return Count && isColdCount(*Count);
}
bool ProfileSummaryInfo::isHotCallSite(const CallSite &CS,
@@ -285,11 +273,7 @@ bool ProfileSummaryInfo::isColdCallSite(
// In SamplePGO, if the caller has been sampled, and there is no profile
// annotated on the callsite, we consider the callsite as cold.
- // If there is no profile for the caller, and we know the profile is
- // accurate, we consider the callsite as cold.
- return (hasSampleProfile() &&
- (CS.getCaller()->hasProfileData() || ProfileSampleAccurate ||
- CS.getCaller()->hasFnAttribute("profile-sample-accurate")));
+ return hasSampleProfile() && CS.getCaller()->hasProfileData();
}
INITIALIZE_PASS(ProfileSummaryInfoWrapperPass, "profile-summary-info",
Modified: llvm/trunk/lib/Transforms/IPO/SampleProfile.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/SampleProfile.cpp?rev=349088&r1=349087&r2=349088&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/IPO/SampleProfile.cpp (original)
+++ llvm/trunk/lib/Transforms/IPO/SampleProfile.cpp Thu Dec 13 13:51:42 2018
@@ -123,6 +123,12 @@ static cl::opt<bool> NoWarnSampleUnused(
cl::desc("Use this option to turn off/on warnings about function with "
"samples but without debug information to use those samples. "));
+static cl::opt<bool> ProfileSampleAccurate(
+ "profile-sample-accurate", cl::Hidden, cl::init(false),
+ cl::desc("If the sample profile is accurate, we will mark all un-sampled "
+ "callsite and function as having 0 samples. Otherwise, treat "
+ "un-sampled callsites and functions conservatively as unknown. "));
+
namespace {
using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
@@ -1604,10 +1610,18 @@ bool SampleProfileLoaderLegacyPass::runO
}
bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) {
- // Initialize the entry count to -1, which will be treated conservatively
- // by getEntryCount as the same as unknown (None). If we have samples this
- // will be overwritten in emitAnnotations.
- F.setEntryCount(ProfileCount(-1, Function::PCT_Real));
+ // By default the entry count is initialized to -1, which will be treated
+ // conservatively by getEntryCount as the same as unknown (None). This is
+ // to avoid newly added code to be treated as cold. If we have samples
+ // this will be overwritten in emitAnnotations.
+ // If ProfileSampleAccurate is true or F has profile-sample-accurate
+ // attribute, initialize the entry count to 0 so callsites or functions
+ // unsampled will be treated as cold.
+ uint64_t initialEntryCount =
+ (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate"))
+ ? 0
+ : -1;
+ F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
if (AM) {
auto &FAM =
Modified: llvm/trunk/test/Transforms/CodeGenPrepare/section-samplepgo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/section-samplepgo.ll?rev=349088&r1=349087&r2=349088&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/section-samplepgo.ll (original)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/section-samplepgo.ll Thu Dec 13 13:51:42 2018
@@ -1,19 +1,16 @@
; RUN: opt < %s -codegenprepare -S | FileCheck %s
-; RUN: opt < %s -codegenprepare -profile-sample-accurate -S | FileCheck %s --check-prefix ACCURATE
target triple = "x86_64-pc-linux-gnu"
; This tests that hot/cold functions get correct section prefix assigned
; CHECK: hot_func{{.*}}!section_prefix ![[HOT_ID:[0-9]+]]
-; ACCURATE: hot_func{{.*}}!section_prefix ![[HOT_ID:[0-9]+]]
; The entry is hot
define void @hot_func() !prof !15 {
ret void
}
; CHECK: hot_call_func{{.*}}!section_prefix ![[HOT_ID]]
-; ACCURATE: hot_call_func{{.*}}!section_prefix ![[HOT_ID]]
; The sum of 2 callsites are hot
define void @hot_call_func() !prof !16 {
call void @hot_func(), !prof !17
@@ -22,7 +19,6 @@ define void @hot_call_func() !prof !16 {
}
; CHECK-NOT: normal_func{{.*}}!section_prefix
-; ACCURATE-NOT: normal_func{{.*}}!section_prefix
; The sum of all callsites are neither hot or cold
define void @normal_func() !prof !16 {
call void @hot_func(), !prof !17
@@ -32,36 +28,12 @@ define void @normal_func() !prof !16 {
}
; CHECK: cold_func{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
-; ACCURATE: cold_func{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
; The entry and the callsite are both cold
define void @cold_func() !prof !16 {
call void @hot_func(), !prof !18
ret void
}
-
-; CHECK-NOT: foo_not_in_profile{{.*}}!section_prefix
-; The function not appearing in profile is neither hot nor cold
-;
-; ACCURATE: foo_not_in_profile{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
-; The function not appearing in profile is cold when -profile-sample-accurate
-; is on
-define void @foo_not_in_profile() !prof !19 {
- call void @hot_func()
- ret void
-}
-
-; CHECK: bar_not_in_profile{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
-; ACCURATE: bar_not_in_profile{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
-; The function not appearing in profile is cold when the func has
-; profile-sample-accurate attribute
-define void @bar_not_in_profile() #0 !prof !19 {
- call void @hot_func()
- ret void
-}
-
-attributes #0 = { "profile-sample-accurate" }
-
; CHECK: ![[HOT_ID]] = !{!"function_section_prefix", !".hot"}
; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"}
!llvm.module.flags = !{!1}
@@ -83,4 +55,3 @@ attributes #0 = { "profile-sample-accura
!16 = !{!"function_entry_count", i64 1}
!17 = !{!"branch_weights", i32 80}
!18 = !{!"branch_weights", i32 1}
-!19 = !{!"function_entry_count", i64 -1}
Removed: llvm/trunk/test/Transforms/Inline/inline-cold-callsite-samplepgo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/inline-cold-callsite-samplepgo.ll?rev=349087&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Inline/inline-cold-callsite-samplepgo.ll (original)
+++ llvm/trunk/test/Transforms/Inline/inline-cold-callsite-samplepgo.ll (removed)
@@ -1,47 +0,0 @@
-; For SamplePGO, if -profile-sample-accurate is specified, cold callsite
-; heuristics should be honored if the caller has no profile.
-
-; RUN: opt < %s -inline -S -inline-cold-callsite-threshold=0 | FileCheck %s
-
-define i32 @callee(i32 %x) {
- %x1 = add i32 %x, 1
- %x2 = add i32 %x1, 1
- %x3 = add i32 %x2, 1
- call void @extern()
- call void @extern()
- ret i32 %x3
-}
-
-define i32 @caller(i32 %y1) {
-; CHECK-LABEL: @caller
-; CHECK-NOT: call i32 @callee
- %y2 = call i32 @callee(i32 %y1)
- ret i32 %y2
-}
-
-define i32 @caller_accurate(i32 %y1) #0 {
-; CHECK-LABEL: @caller_accurate
-; CHECK: call i32 @callee
- %y2 = call i32 @callee(i32 %y1)
- ret i32 %y2
-}
-
-declare void @extern()
-
-attributes #0 = { "profile-sample-accurate" }
-
-!llvm.module.flags = !{!1}
-!1 = !{i32 1, !"ProfileSummary", !2}
-!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
-!3 = !{!"ProfileFormat", !"SampleProfile"}
-!4 = !{!"TotalCount", i64 10000}
-!5 = !{!"MaxCount", i64 1000}
-!6 = !{!"MaxInternalCount", i64 1}
-!7 = !{!"MaxFunctionCount", i64 1000}
-!8 = !{!"NumCounts", i64 3}
-!9 = !{!"NumFunctions", i64 3}
-!10 = !{!"DetailedSummary", !11}
-!11 = !{!12, !13, !14}
-!12 = !{i32 10000, i64 100, i32 1}
-!13 = !{i32 999000, i64 100, i32 1}
-!14 = !{i32 999999, i64 1, i32 2}
Added: llvm/trunk/test/Transforms/SampleProfile/inline-cold-callsite-samplepgo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/inline-cold-callsite-samplepgo.ll?rev=349088&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/inline-cold-callsite-samplepgo.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/inline-cold-callsite-samplepgo.ll Thu Dec 13 13:51:42 2018
@@ -0,0 +1,31 @@
+; For SamplePGO, if -profile-sample-accurate is specified, cold callsite
+; heuristics should be honored if the caller has no profile.
+
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -inline -S -inline-cold-callsite-threshold=0 | FileCheck %s
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -profile-sample-accurate -inline -S -inline-cold-callsite-threshold=0 | FileCheck %s --check-prefix ACCURATE
+
+declare void @extern()
+define void @callee() {
+ call void @extern()
+ ret void
+}
+
+define void @caller(i32 %y1) {
+; CHECK-LABEL: @caller
+; CHECK-NOT: call void @callee
+; ACCURATE-LABEL: @caller
+; ACCURATE: call void @callee
+ call void @callee()
+ ret void
+}
+
+define void @caller_accurate(i32 %y1) #0 {
+; CHECK-LABEL: @caller_accurate
+; CHECK: call void @callee
+; ACCURATE-LABEL: @caller_accurate
+; ACCURATE: call void @callee
+ call void @callee()
+ ret void
+}
+
+attributes #0 = { "profile-sample-accurate" }
Added: llvm/trunk/test/Transforms/SampleProfile/section-accurate-samplepgo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/section-accurate-samplepgo.ll?rev=349088&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/section-accurate-samplepgo.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/section-accurate-samplepgo.ll Thu Dec 13 13:51:42 2018
@@ -0,0 +1,52 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -codegenprepare -S | FileCheck %s
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -codegenprepare -profile-sample-accurate -S | FileCheck %s --check-prefix ACCURATE
+
+target triple = "x86_64-pc-linux-gnu"
+
+; The test checks that function without profile gets unlikely section prefix
+; if -profile-sample-accurate is specified or the function has the
+; profile-sample-accurate attribute.
+
+declare void @hot_func()
+
+; CHECK-NOT: foo_not_in_profile{{.*}}!section_prefix
+; CHECK: foo_not_in_profile{{.*}}!prof ![[UNKNOWN_ID:[0-9]+]]
+; ACCURATE: foo_not_in_profile{{.*}}!prof ![[ZERO_ID:[0-9]+]] !section_prefix ![[COLD_ID:[0-9]+]]
+; The function not appearing in profile is cold when -profile-sample-accurate
+; is on.
+define void @foo_not_in_profile() {
+ call void @hot_func()
+ ret void
+}
+
+; CHECK: bar_not_in_profile{{.*}}!prof ![[ZERO_ID:[0-9]+]] !section_prefix ![[COLD_ID:[0-9]+]]
+; ACCURATE: bar_not_in_profile{{.*}}!prof ![[ZERO_ID:[0-9]+]] !section_prefix ![[COLD_ID:[0-9]+]]
+; The function not appearing in profile is cold when the func has
+; profile-sample-accurate attribute.
+define void @bar_not_in_profile() #0 {
+ call void @hot_func()
+ ret void
+}
+
+attributes #0 = { "profile-sample-accurate" }
+
+; CHECK: ![[UNKNOWN_ID]] = !{!"function_entry_count", i64 -1}
+; CHECK: ![[ZERO_ID]] = !{!"function_entry_count", i64 0}
+; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"}
+; ACCURATE: ![[ZERO_ID]] = !{!"function_entry_count", i64 0}
+; ACCURATE: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"}
+!llvm.module.flags = !{!1}
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"SampleProfile"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 1000}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 1000}
+!8 = !{!"NumCounts", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 100, i32 1}
+!13 = !{i32 999000, i64 100, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}
More information about the llvm-commits
mailing list