[llvm] r349088 - [SampleFDO] handle ProfileSampleAccurate when initializing function entry count

Wei Mi via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 13 13:51:43 PST 2018


Author: wmi
Date: Thu Dec 13 13:51:42 2018
New Revision: 349088

URL: http://llvm.org/viewvc/llvm-project?rev=349088&view=rev
Log:
[SampleFDO] handle ProfileSampleAccurate when initializing function entry count

ProfileSampleAccurate is used to indicate the profile has exact match to the
code to be optimized.

Previously ProfileSampleAccurate is handled in ProfileSummaryInfo::isColdCallSite
and ProfileSummaryInfo::isColdBlock. A better solution is to initialize function
entry count to 0 when ProfileSampleAccurate is true, so we don't have to handle
ProfileSampleAccurate in multiple places.

Differential Revision: https://reviews.llvm.org/D55660

Added:
    llvm/trunk/test/Transforms/SampleProfile/inline-cold-callsite-samplepgo.ll
    llvm/trunk/test/Transforms/SampleProfile/section-accurate-samplepgo.ll
Removed:
    llvm/trunk/test/Transforms/Inline/inline-cold-callsite-samplepgo.ll
Modified:
    llvm/trunk/lib/Analysis/ProfileSummaryInfo.cpp
    llvm/trunk/lib/Transforms/IPO/SampleProfile.cpp
    llvm/trunk/test/Transforms/CodeGenPrepare/section-samplepgo.ll

Modified: llvm/trunk/lib/Analysis/ProfileSummaryInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ProfileSummaryInfo.cpp?rev=349088&r1=349087&r2=349088&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/ProfileSummaryInfo.cpp (original)
+++ llvm/trunk/lib/Analysis/ProfileSummaryInfo.cpp Thu Dec 13 13:51:42 2018
@@ -39,11 +39,6 @@ static cl::opt<int> ProfileSummaryCutoff
     cl::desc("A count is cold if it is below the minimum count"
              " to reach this percentile of total counts."));
 
-static cl::opt<bool> ProfileSampleAccurate(
-    "profile-sample-accurate", cl::Hidden, cl::init(false),
-    cl::desc("If the sample profile is accurate, we will mark all un-sampled "
-             "callsite as cold. Otherwise, treat un-sampled callsites as if "
-             "we have no profile."));
 static cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold(
     "profile-summary-huge-working-set-size-threshold", cl::Hidden,
     cl::init(15000), cl::ZeroOrMore,
@@ -261,14 +256,7 @@ bool ProfileSummaryInfo::isHotBlock(cons
 bool ProfileSummaryInfo::isColdBlock(const BasicBlock *BB,
                                   BlockFrequencyInfo *BFI) {
   auto Count = BFI->getBlockProfileCount(BB);
-  if (Count)
-    return isColdCount(*Count);
-  if (!hasSampleProfile())
-    return false;
-
-  const Function *F = BB->getParent();
-  return ProfileSampleAccurate ||
-         (F && F->hasFnAttribute("profile-sample-accurate"));
+  return Count && isColdCount(*Count);
 }
 
 bool ProfileSummaryInfo::isHotCallSite(const CallSite &CS,
@@ -285,11 +273,7 @@ bool ProfileSummaryInfo::isColdCallSite(
 
   // In SamplePGO, if the caller has been sampled, and there is no profile
   // annotated on the callsite, we consider the callsite as cold.
-  // If there is no profile for the caller, and we know the profile is
-  // accurate, we consider the callsite as cold.
-  return (hasSampleProfile() &&
-          (CS.getCaller()->hasProfileData() || ProfileSampleAccurate ||
-           CS.getCaller()->hasFnAttribute("profile-sample-accurate")));
+  return hasSampleProfile() && CS.getCaller()->hasProfileData();
 }
 
 INITIALIZE_PASS(ProfileSummaryInfoWrapperPass, "profile-summary-info",

Modified: llvm/trunk/lib/Transforms/IPO/SampleProfile.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/SampleProfile.cpp?rev=349088&r1=349087&r2=349088&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/IPO/SampleProfile.cpp (original)
+++ llvm/trunk/lib/Transforms/IPO/SampleProfile.cpp Thu Dec 13 13:51:42 2018
@@ -123,6 +123,12 @@ static cl::opt<bool> NoWarnSampleUnused(
     cl::desc("Use this option to turn off/on warnings about function with "
              "samples but without debug information to use those samples. "));
 
+static cl::opt<bool> ProfileSampleAccurate(
+    "profile-sample-accurate", cl::Hidden, cl::init(false),
+    cl::desc("If the sample profile is accurate, we will mark all un-sampled "
+             "callsite and function as having 0 samples. Otherwise, treat "
+             "un-sampled callsites and functions conservatively as unknown. "));
+
 namespace {
 
 using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
@@ -1604,10 +1610,18 @@ bool SampleProfileLoaderLegacyPass::runO
 }
 
 bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) {
-  // Initialize the entry count to -1, which will be treated conservatively
-  // by getEntryCount as the same as unknown (None). If we have samples this
-  // will be overwritten in emitAnnotations.
-  F.setEntryCount(ProfileCount(-1, Function::PCT_Real));
+  // By default the entry count is initialized to -1, which will be treated
+  // conservatively by getEntryCount as the same as unknown (None). This is
+  // to avoid newly added code to be treated as cold. If we have samples
+  // this will be overwritten in emitAnnotations.
+  // If ProfileSampleAccurate is true or F has profile-sample-accurate
+  // attribute, initialize the entry count to 0 so callsites or functions
+  // unsampled will be treated as cold.
+  uint64_t initialEntryCount =
+      (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate"))
+          ? 0
+          : -1;
+  F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
   std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
   if (AM) {
     auto &FAM =

Modified: llvm/trunk/test/Transforms/CodeGenPrepare/section-samplepgo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/section-samplepgo.ll?rev=349088&r1=349087&r2=349088&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/section-samplepgo.ll (original)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/section-samplepgo.ll Thu Dec 13 13:51:42 2018
@@ -1,19 +1,16 @@
 ; RUN: opt < %s -codegenprepare -S | FileCheck %s
-; RUN: opt < %s -codegenprepare -profile-sample-accurate -S | FileCheck %s --check-prefix ACCURATE
 
 target triple = "x86_64-pc-linux-gnu"
 
 ; This tests that hot/cold functions get correct section prefix assigned
 
 ; CHECK: hot_func{{.*}}!section_prefix ![[HOT_ID:[0-9]+]]
-; ACCURATE: hot_func{{.*}}!section_prefix ![[HOT_ID:[0-9]+]]
 ; The entry is hot
 define void @hot_func() !prof !15 {
   ret void
 }
 
 ; CHECK: hot_call_func{{.*}}!section_prefix ![[HOT_ID]]
-; ACCURATE: hot_call_func{{.*}}!section_prefix ![[HOT_ID]]
 ; The sum of 2 callsites are hot
 define void @hot_call_func() !prof !16 {
   call void @hot_func(), !prof !17
@@ -22,7 +19,6 @@ define void @hot_call_func() !prof !16 {
 }
 
 ; CHECK-NOT: normal_func{{.*}}!section_prefix
-; ACCURATE-NOT: normal_func{{.*}}!section_prefix
 ; The sum of all callsites are neither hot or cold
 define void @normal_func() !prof !16 {
   call void @hot_func(), !prof !17
@@ -32,36 +28,12 @@ define void @normal_func() !prof !16 {
 }
 
 ; CHECK: cold_func{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
-; ACCURATE: cold_func{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
 ; The entry and the callsite are both cold
 define void @cold_func() !prof !16 {
   call void @hot_func(), !prof !18
   ret void
 }
 
-
-; CHECK-NOT: foo_not_in_profile{{.*}}!section_prefix
-; The function not appearing in profile is neither hot nor cold
-;
-; ACCURATE: foo_not_in_profile{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
-; The function not appearing in profile is cold when -profile-sample-accurate
-; is on
-define void @foo_not_in_profile() !prof !19 {
-  call void @hot_func()
-  ret void
-}
-
-; CHECK: bar_not_in_profile{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
-; ACCURATE: bar_not_in_profile{{.*}}!section_prefix ![[COLD_ID:[0-9]+]]
-; The function not appearing in profile is cold when the func has
-; profile-sample-accurate attribute
-define void @bar_not_in_profile() #0 !prof !19 {
-  call void @hot_func()
-  ret void
-}
-
-attributes #0 = { "profile-sample-accurate" }
-
 ; CHECK: ![[HOT_ID]] = !{!"function_section_prefix", !".hot"}
 ; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"}
 !llvm.module.flags = !{!1}
@@ -83,4 +55,3 @@ attributes #0 = { "profile-sample-accura
 !16 = !{!"function_entry_count", i64 1}
 !17 = !{!"branch_weights", i32 80}
 !18 = !{!"branch_weights", i32 1}
-!19 = !{!"function_entry_count", i64 -1}

Removed: llvm/trunk/test/Transforms/Inline/inline-cold-callsite-samplepgo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/inline-cold-callsite-samplepgo.ll?rev=349087&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Inline/inline-cold-callsite-samplepgo.ll (original)
+++ llvm/trunk/test/Transforms/Inline/inline-cold-callsite-samplepgo.ll (removed)
@@ -1,47 +0,0 @@
-; For SamplePGO, if -profile-sample-accurate is specified, cold callsite
-; heuristics should be honored if the caller has no profile.
-
-; RUN: opt < %s -inline -S -inline-cold-callsite-threshold=0 | FileCheck %s
-
-define i32 @callee(i32 %x) {
-  %x1 = add i32 %x, 1
-  %x2 = add i32 %x1, 1
-  %x3 = add i32 %x2, 1
-  call void @extern()
-  call void @extern()
-  ret i32 %x3
-}
-
-define i32 @caller(i32 %y1) {
-; CHECK-LABEL: @caller
-; CHECK-NOT: call i32 @callee
-  %y2 = call i32 @callee(i32 %y1)
-  ret i32 %y2
-}
-
-define i32 @caller_accurate(i32 %y1) #0 {
-; CHECK-LABEL: @caller_accurate
-; CHECK: call i32 @callee
-  %y2 = call i32 @callee(i32 %y1)
-  ret i32 %y2
-}
-
-declare void @extern()
-
-attributes #0 = { "profile-sample-accurate" }
-
-!llvm.module.flags = !{!1}
-!1 = !{i32 1, !"ProfileSummary", !2}
-!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
-!3 = !{!"ProfileFormat", !"SampleProfile"}
-!4 = !{!"TotalCount", i64 10000}
-!5 = !{!"MaxCount", i64 1000}
-!6 = !{!"MaxInternalCount", i64 1}
-!7 = !{!"MaxFunctionCount", i64 1000}
-!8 = !{!"NumCounts", i64 3}
-!9 = !{!"NumFunctions", i64 3}
-!10 = !{!"DetailedSummary", !11}
-!11 = !{!12, !13, !14}
-!12 = !{i32 10000, i64 100, i32 1}
-!13 = !{i32 999000, i64 100, i32 1}
-!14 = !{i32 999999, i64 1, i32 2}

Added: llvm/trunk/test/Transforms/SampleProfile/inline-cold-callsite-samplepgo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/inline-cold-callsite-samplepgo.ll?rev=349088&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/inline-cold-callsite-samplepgo.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/inline-cold-callsite-samplepgo.ll Thu Dec 13 13:51:42 2018
@@ -0,0 +1,31 @@
+; For SamplePGO, if -profile-sample-accurate is specified, cold callsite
+; heuristics should be honored if the caller has no profile.
+
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -inline -S -inline-cold-callsite-threshold=0 | FileCheck %s
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -profile-sample-accurate -inline -S -inline-cold-callsite-threshold=0 | FileCheck %s --check-prefix ACCURATE
+
+declare void @extern()
+define void @callee() {
+  call void @extern()
+  ret void
+}
+
+define void @caller(i32 %y1) {
+; CHECK-LABEL: @caller
+; CHECK-NOT: call void @callee
+; ACCURATE-LABEL: @caller
+; ACCURATE: call void @callee
+  call void @callee()
+  ret void
+}
+
+define void @caller_accurate(i32 %y1) #0 {
+; CHECK-LABEL: @caller_accurate
+; CHECK: call void @callee
+; ACCURATE-LABEL: @caller_accurate
+; ACCURATE: call void @callee
+  call void @callee()
+  ret void
+}
+
+attributes #0 = { "profile-sample-accurate" }

Added: llvm/trunk/test/Transforms/SampleProfile/section-accurate-samplepgo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/section-accurate-samplepgo.ll?rev=349088&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/section-accurate-samplepgo.ll (added)
+++ llvm/trunk/test/Transforms/SampleProfile/section-accurate-samplepgo.ll Thu Dec 13 13:51:42 2018
@@ -0,0 +1,52 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -codegenprepare -S | FileCheck %s
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -codegenprepare -profile-sample-accurate -S | FileCheck %s --check-prefix ACCURATE
+
+target triple = "x86_64-pc-linux-gnu"
+
+; The test checks that function without profile gets unlikely section prefix
+; if -profile-sample-accurate is specified or the function has the
+; profile-sample-accurate attribute.
+
+declare void @hot_func()
+
+; CHECK-NOT: foo_not_in_profile{{.*}}!section_prefix
+; CHECK: foo_not_in_profile{{.*}}!prof ![[UNKNOWN_ID:[0-9]+]]
+; ACCURATE: foo_not_in_profile{{.*}}!prof ![[ZERO_ID:[0-9]+]] !section_prefix ![[COLD_ID:[0-9]+]]
+; The function not appearing in profile is cold when -profile-sample-accurate
+; is on.
+define void @foo_not_in_profile() {
+  call void @hot_func()
+  ret void
+}
+
+; CHECK: bar_not_in_profile{{.*}}!prof ![[ZERO_ID:[0-9]+]] !section_prefix ![[COLD_ID:[0-9]+]]
+; ACCURATE: bar_not_in_profile{{.*}}!prof ![[ZERO_ID:[0-9]+]] !section_prefix ![[COLD_ID:[0-9]+]]
+; The function not appearing in profile is cold when the func has
+; profile-sample-accurate attribute.
+define void @bar_not_in_profile() #0 {
+  call void @hot_func()
+  ret void
+}
+
+attributes #0 = { "profile-sample-accurate" }
+
+; CHECK: ![[UNKNOWN_ID]] = !{!"function_entry_count", i64 -1}
+; CHECK: ![[ZERO_ID]] = !{!"function_entry_count", i64 0}
+; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"}
+; ACCURATE: ![[ZERO_ID]] = !{!"function_entry_count", i64 0}
+; ACCURATE: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"}
+!llvm.module.flags = !{!1}
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"SampleProfile"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 1000}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 1000}
+!8 = !{!"NumCounts", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 100, i32 1}
+!13 = !{i32 999000, i64 100, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}




More information about the llvm-commits mailing list