[llvm] aa2ddfc - [SampleFDO] For functions without profiles, provide an option to put

Wei Mi via llvm-commits llvm-commits at lists.llvm.org
Fri May 8 11:30:20 PDT 2020


Author: Wei Mi
Date: 2020-05-08T11:18:09-07:00
New Revision: aa2ddfc73d6e4a3369b7992aecaf107987c505b6

URL: https://github.com/llvm/llvm-project/commit/aa2ddfc73d6e4a3369b7992aecaf107987c505b6
DIFF: https://github.com/llvm/llvm-project/commit/aa2ddfc73d6e4a3369b7992aecaf107987c505b6.diff

LOG: [SampleFDO] For functions without profiles, provide an option to put
them in a special text section.

For sampleFDO, because the optimized build uses profile generated from
previous release, previously we couldn't tell a function without profile
was truely cold or just newly created so we had to treat them conservatively
and put them in .text section instead of .text.unlikely. The result was when
we persuing the best performance by locking .text.hot and .text in memory,
we wasted a lot of memory to keep cold functions inside.

In https://reviews.llvm.org/D66374, we introduced profile symbol list to
discriminate functions being cold versus functions being newly added.
This mechanism works quite well for regular use cases in AutoFDO. However,
in some case, we can only have a partial profile when optimizing a target.
The partial profile may be an aggregated profile collected from many targets.
The profile symbol list method used for regular sampleFDO profile is not
applicable to partial profile use case because it may be too large and
introduce many false positives.

To solve the problem for partial profile use case, we provide an option called
--profile-unknown-in-special-section. For functions without profile, we will
still treat them conservatively in compiler optimizations -- for example,
treat them as warm instead of cold in inliner. When we use profile info to
add section prefix for functions, we will discriminate functions known to be
not cold versus functions without profile (being unknown), and we will put
functions being unknown in a special text section called .text.unknown.
Runtime system will have the flexibility to decide where to put the special
section in order to achieve a balance between performance and memory saving.

Differential Revision: https://reviews.llvm.org/D62540

Added: 
    

Modified: 
    llvm/include/llvm/Analysis/ProfileSummaryInfo.h
    llvm/lib/Analysis/ProfileSummaryInfo.cpp
    llvm/lib/CodeGen/CodeGenPrepare.cpp
    llvm/test/Transforms/SampleProfile/section-accurate-samplepgo.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
index e293d069f1f1..8fbc9e8990b2 100644
--- a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
+++ b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
@@ -72,13 +72,6 @@ class ProfileSummaryInfo {
            Summary->getKind() == ProfileSummary::PSK_Sample;
   }
 
-  /// Returns true if module \c M has partial-profile sample profile.
-  bool hasPartialSampleProfile() {
-    return hasProfileSummary() &&
-           Summary->getKind() == ProfileSummary::PSK_Sample &&
-           Summary->isPartialProfile();
-  }
-
   /// Returns true if module \c M has instrumentation profile.
   bool hasInstrumentationProfile() {
     return hasProfileSummary() &&
@@ -106,6 +99,8 @@ class ProfileSummaryInfo {
   Optional<uint64_t> getProfileCount(const CallBase &CallInst,
                                      BlockFrequencyInfo *BFI,
                                      bool AllowSynthetic = false);
+  /// Returns true if module \c M has partial-profile sample profile.
+  bool hasPartialSampleProfile();
   /// Returns true if the working set size of the code is considered huge.
   bool hasHugeWorkingSetSize();
   /// Returns true if the working set size of the code is considered large.
@@ -118,6 +113,8 @@ class ProfileSummaryInfo {
   bool isFunctionEntryCold(const Function *F);
   /// Returns true if \p F contains only cold code.
   bool isFunctionColdInCallGraph(const Function *F, BlockFrequencyInfo &BFI);
+  /// Returns true if the hotness of \p F is unknown.
+  bool isFunctionHotnessUnknown(const Function &F);
   /// Returns true if \p F contains hot code with regard to a given hot
   /// percentile cutoff value.
   bool isFunctionHotInCallGraphNthPercentile(int PercentileCutoff,

diff  --git a/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/llvm/lib/Analysis/ProfileSummaryInfo.cpp
index dd53aa78f40f..ef33b9b1de5a 100644
--- a/llvm/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/llvm/lib/Analysis/ProfileSummaryInfo.cpp
@@ -66,6 +66,10 @@ static cl::opt<int> ProfileSummaryColdCount(
     cl::desc("A fixed cold count that overrides the count derived from"
              " profile-summary-cutoff-cold"));
 
+static cl::opt<bool> PartialProfile(
+    "partial-profile", cl::Hidden, cl::init(false),
+    cl::desc("Specify the current profile is used as a partial profile."));
+
 // Find the summary entry for a desired percentile of counts.
 static const ProfileSummaryEntry &getEntryForPercentile(SummaryEntryVector &DS,
                                                         uint64_t Percentile) {
@@ -192,6 +196,11 @@ bool ProfileSummaryInfo::isFunctionColdInCallGraph(const Function *F,
   return true;
 }
 
+bool ProfileSummaryInfo::isFunctionHotnessUnknown(const Function &F) {
+  assert(hasPartialSampleProfile() && "Expect partial sample profile");
+  return !F.getEntryCount().hasValue();
+}
+
 template<bool isHot>
 bool ProfileSummaryInfo::isFunctionHotOrColdInCallGraphNthPercentile(
     int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) {
@@ -399,6 +408,12 @@ bool ProfileSummaryInfo::isColdCallSite(const CallBase &CB,
   return hasSampleProfile() && CB.getCaller()->hasProfileData();
 }
 
+bool ProfileSummaryInfo::hasPartialSampleProfile() {
+  return hasProfileSummary() &&
+         Summary->getKind() == ProfileSummary::PSK_Sample &&
+         (PartialProfile || Summary->isPartialProfile());
+}
+
 INITIALIZE_PASS(ProfileSummaryInfoWrapperPass, "profile-summary-info",
                 "Profile summary info", false, true)
 

diff  --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 359618d6669a..eceee30c532f 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -177,6 +177,17 @@ static cl::opt<bool> ProfileGuidedSectionPrefix(
     "profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::ZeroOrMore,
     cl::desc("Use profile info to add section prefix for hot/cold functions"));
 
+static cl::opt<bool> ProfileUnknownInSpecialSection(
+    "profile-unknown-in-special-section", cl::Hidden, cl::init(false),
+    cl::ZeroOrMore,
+    cl::desc("In profiling mode like sampleFDO, if a function doesn't have "
+             "profile, we cannot tell the function is cold for sure because "
+             "it may be a function newly added without ever being sampled. "
+             "With the flag enabled, compiler can put such profile unknown "
+             "functions into a special section, so runtime system can choose "
+             "to handle it in a 
diff erent way than .text section, to save "
+             "RAM for example. "));
+
 static cl::opt<unsigned> FreqRatioToSkipMerge(
     "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
     cl::desc("Skip merging empty blocks if (frequency of empty block) / "
@@ -452,6 +463,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
       F.setSectionPrefix(".hot");
     else if (PSI->isFunctionColdInCallGraph(&F, *BFI))
       F.setSectionPrefix(".unlikely");
+    else if (ProfileUnknownInSpecialSection && PSI->hasPartialSampleProfile() &&
+             PSI->isFunctionHotnessUnknown(F))
+      F.setSectionPrefix(".unknown");
   }
 
   /// This optimization identifies DIV instructions that can be

diff  --git a/llvm/test/Transforms/SampleProfile/section-accurate-samplepgo.ll b/llvm/test/Transforms/SampleProfile/section-accurate-samplepgo.ll
index 82c407ebb9d2..6ac39a167689 100644
--- a/llvm/test/Transforms/SampleProfile/section-accurate-samplepgo.ll
+++ b/llvm/test/Transforms/SampleProfile/section-accurate-samplepgo.ll
@@ -1,5 +1,6 @@
 ; REQUIRES: x86-registered-target
 ; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -codegenprepare -S | FileCheck %s
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -codegenprepare -profile-unknown-in-special-section -partial-profile -S | FileCheck %s --check-prefix UNKNOWN
 ; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -codegenprepare -profile-sample-accurate -S | FileCheck %s --check-prefix ACCURATE
 
 target triple = "x86_64-pc-linux-gnu"
@@ -11,7 +12,8 @@ target triple = "x86_64-pc-linux-gnu"
 declare void @hot_func()
 
 ; CHECK-NOT: foo_not_in_profile{{.*}}!section_prefix
-; CHECK: foo_not_in_profile{{.*}}!prof ![[UNKNOWN_ID:[0-9]+]]
+; CHECK: foo_not_in_profile{{.*}}!prof ![[NOPROFILE_ID:[0-9]+]]
+; UNKNOWN: foo_not_in_profile{{.*}}!prof ![[NOPROFILE_ID:[0-9]+]] !section_prefix ![[UNKNOWN_ID:[0-9]+]]
 ; ACCURATE: foo_not_in_profile{{.*}}!prof ![[ZERO_ID:[0-9]+]] !section_prefix ![[COLD_ID:[0-9]+]]
 ; The function not appearing in profile is cold when -profile-sample-accurate
 ; is on.
@@ -31,9 +33,11 @@ define void @bar_not_in_profile() #0 {
 
 attributes #0 = { "profile-sample-accurate" }
 
-; CHECK: ![[UNKNOWN_ID]] = !{!"function_entry_count", i64 -1}
+; CHECK: ![[NOPROFILE_ID]] = !{!"function_entry_count", i64 -1}
 ; CHECK: ![[ZERO_ID]] = !{!"function_entry_count", i64 0}
 ; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"}
+; UNKNOWN: ![[NOPROFILE_ID]] = !{!"function_entry_count", i64 -1}
+; UNKNOWN: ![[UNKNOWN_ID]] = !{!"function_section_prefix", !".unknown"}
 ; ACCURATE: ![[ZERO_ID]] = !{!"function_entry_count", i64 0}
 ; ACCURATE: ![[COLD_ID]] = !{!"function_section_prefix", !".unlikely"}
 !llvm.module.flags = !{!1}


        


More information about the llvm-commits mailing list