[llvm] r373133 - [SampleFDO] Create a separate flag profile-accurate-for-symsinlist to handle

Wei Mi via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 27 15:33:59 PDT 2019


Author: wmi
Date: Fri Sep 27 15:33:59 2019
New Revision: 373133

URL: http://llvm.org/viewvc/llvm-project?rev=373133&view=rev
Log:
[SampleFDO] Create a separate flag profile-accurate-for-symsinlist to handle
profile symbol list.

Currently many existing users using profile-sample-accurate want to reduce
code size as much as possible. Their use cases are different from the scenario
profile symbol list tries to handle -- the major motivation of adding profile
symbol list is to get the major memory/code size saving without introduce
performance regression. So to keep the behavior of profile-sample-accurate
unchanged, we think decoupling these two things and using a new flag to
control the handling of profile symbol list may be better.

When profile-sample-accurate and the new flag profile-accurate-for-symsinlist
are both present, since profile-sample-accurate is a user assertion we let it
have a higher precedence.

Differential Revision: https://reviews.llvm.org/D68047


Modified:
    llvm/trunk/lib/Transforms/IPO/SampleProfile.cpp
    llvm/trunk/test/Transforms/SampleProfile/compressed-profile-symbol-list.ll
    llvm/trunk/test/Transforms/SampleProfile/profile-sample-accurate.ll
    llvm/trunk/test/Transforms/SampleProfile/uncompressed-profile-symbol-list.ll

Modified: llvm/trunk/lib/Transforms/IPO/SampleProfile.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/SampleProfile.cpp?rev=373133&r1=373132&r2=373133&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/IPO/SampleProfile.cpp (original)
+++ llvm/trunk/lib/Transforms/IPO/SampleProfile.cpp Fri Sep 27 15:33:59 2019
@@ -130,6 +130,12 @@ static cl::opt<bool> ProfileSampleAccura
              "callsite and function as having 0 samples. Otherwise, treat "
              "un-sampled callsites and functions conservatively as unknown. "));
 
+static cl::opt<bool> ProfileAccurateForSymsInList(
+    "profile-accurate-for-symsinlist", cl::Hidden, cl::ZeroOrMore,
+    cl::init(true),
+    cl::desc("For symbols in profile symbol list, regard their profiles to "
+             "be accurate. It may be overriden by profile-sample-accurate. "));
+
 namespace {
 
 using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
@@ -418,8 +424,12 @@ protected:
   // names, inline instance names and call target names.
   StringSet<> NamesInProfile;
 
-  // Showing whether ProfileSampleAccurate is enabled for current function.
-  bool ProfSampleAccEnabled = false;
+  // For symbol in profile symbol list, whether to regard their profiles
+  // to be accurate. It is mainly decided by existance of profile symbol
+  // list and -profile-accurate-for-symsinlist flag, but it can be
+  // overriden by -profile-sample-accurate or profile-sample-accurate
+  // attribute.
+  bool ProfAccForSymsInList;
 };
 
 class SampleProfileLoaderLegacyPass : public ModulePass {
@@ -476,7 +486,8 @@ private:
 /// sample count with the hot cutoff computed by ProfileSummaryInfo, it is
 /// regarded as hot if the count is above the cutoff value.
 ///
-/// When profile-sample-accurate is enabled, functions without profile will
+/// When ProfileAccurateForSymsInList is enabled and profile symbol list
+/// is present, functions in the profile symbol list but without profile will
 /// be regarded as cold and much less inlining will happen in CGSCC inlining
 /// pass, so we tend to lower the hot criteria here to allow more early
 /// inlining to happen for warm callsites and it is helpful for performance.
@@ -487,7 +498,7 @@ bool SampleProfileLoader::callsiteIsHot(
 
   assert(PSI && "PSI is expected to be non null");
   uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples();
-  if (ProfSampleAccEnabled)
+  if (ProfAccForSymsInList)
     return !PSI->isColdCount(CallsiteTotalSamples);
   else
     return PSI->isHotCount(CallsiteTotalSamples);
@@ -890,6 +901,14 @@ bool SampleProfileLoader::inlineHotFunct
     Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
   DenseSet<Instruction *> PromotedInsns;
 
+  // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
+  // Profile symbol list is ignored when profile-sample-accurate is on.
+  assert((!ProfAccForSymsInList ||
+          (!ProfileSampleAccurate &&
+           !F.hasFnAttribute("profile-sample-accurate"))) &&
+         "ProfAccForSymsInList should be false when profile-sample-accurate "
+         "is enabled");
+
   DenseMap<Instruction *, const FunctionSamples *> localNotInlinedCallSites;
   bool Changed = false;
   while (true) {
@@ -1667,7 +1686,10 @@ bool SampleProfileLoader::doInitializati
   ProfileIsValid = (Reader->read() == sampleprof_error::success);
   PSL = Reader->getProfileSymbolList();
 
-  if (ProfileSampleAccurate) {
+  // While profile-sample-accurate is on, ignore symbol list.
+  ProfAccForSymsInList =
+      ProfileAccurateForSymsInList && PSL && !ProfileSampleAccurate;
+  if (ProfAccForSymsInList) {
     NamesInProfile.clear();
     if (auto NameTable = Reader->getNameTable())
       NamesInProfile.insert(NameTable->begin(), NameTable->end());
@@ -1765,37 +1787,38 @@ bool SampleProfileLoader::runOnFunction(
   // this will be overwritten in emitAnnotations.
   uint64_t initialEntryCount = -1;
 
-  ProfSampleAccEnabled =
-      ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate");
-  if (ProfSampleAccEnabled) {
-    // PSL -- profile symbol list include all the symbols in sampled binary.
-    // It is used to prevent new functions to be treated as cold.
-    if (PSL) {
-      // Profile symbol list is available, initialize the entry count to 0
-      // only for functions in the list.
-      if (PSL->contains(F.getName()))
-        initialEntryCount = 0;
-
-      // When ProfileSampleAccurate is true, function without sample will be
-      // regarded as cold. To minimize the potential negative performance
-      // impact it could have, we want to be a little conservative here
-      // saying if a function shows up in the profile, no matter as outline
-      // function, inline instance or call targets, treat the function as not
-      // being cold. This will handle the cases such as most callsites of a
-      // function are inlined in sampled binary but not inlined in current
-      // build (because of source code drift, imprecise debug information, or
-      // the callsites are all cold individually but not cold
-      // accumulatively...), so the outline function showing up as cold in
-      // sampled binary will actually not be cold after current build.
-      StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
-      if (NamesInProfile.count(CanonName))
-        initialEntryCount = -1;
-    } else {
-      // If there is no profile symbol list available, initialize all the
-      // function entry counts to 0. It means all the functions without
-      // profile will be regarded as cold.
+  ProfAccForSymsInList = ProfileAccurateForSymsInList && PSL;
+  if (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) {
+    // initialize all the function entry counts to 0. It means all the
+    // functions without profile will be regarded as cold.
+    initialEntryCount = 0;
+    // profile-sample-accurate is a user assertion which has a higher precedence
+    // than symbol list. When profile-sample-accurate is on, ignore symbol list.
+    ProfAccForSymsInList = false;
+  }
+
+  // PSL -- profile symbol list include all the symbols in sampled binary.
+  // If ProfileAccurateForSymsInList is enabled, PSL is used to treat
+  // old functions without samples being cold, without having to worry
+  // about new and hot functions being mistakenly treated as cold.
+  if (ProfAccForSymsInList) {
+    // Initialize the entry count to 0 for functions in the list.
+    if (PSL->contains(F.getName()))
       initialEntryCount = 0;
-    }
+
+    // Function in the symbol list but without sample will be regarded as
+    // cold. To minimize the potential negative performance impact it could
+    // have, we want to be a little conservative here saying if a function
+    // shows up in the profile, no matter as outline function, inline instance
+    // or call targets, treat the function as not being cold. This will handle
+    // the cases such as most callsites of a function are inlined in sampled
+    // binary but not inlined in current build (because of source code drift,
+    // imprecise debug information, or the callsites are all cold individually
+    // but not cold accumulatively...), so the outline function showing up as
+    // cold in sampled binary will actually not be cold after current build.
+    StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
+    if (NamesInProfile.count(CanonName))
+      initialEntryCount = -1;
   }
 
   F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));

Modified: llvm/trunk/test/Transforms/SampleProfile/compressed-profile-symbol-list.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/compressed-profile-symbol-list.ll?rev=373133&r1=373132&r2=373133&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/compressed-profile-symbol-list.ll (original)
+++ llvm/trunk/test/Transforms/SampleProfile/compressed-profile-symbol-list.ll Fri Sep 27 15:33:59 2019
@@ -1,5 +1,5 @@
 ; REQUIRES: zlib
 ; Append inline.prof with profile symbol list and save it after compression.
 ; RUN: llvm-profdata merge --sample --prof-sym-list=%S/Inputs/profile-symbol-list.text --compress-prof-sym-list=true --extbinary %S/Inputs/inline.prof --output=%t.profdata
-; RUN: opt < %S/Inputs/profile-symbol-list.ll -sample-profile -profile-sample-accurate -sample-profile-file=%t.profdata -S | FileCheck %S/Inputs/profile-symbol-list.ll
-; RUN: opt < %S/Inputs/profile-symbol-list.ll -passes=sample-profile -profile-sample-accurate -sample-profile-file=%t.profdata -S | FileCheck %S/Inputs/profile-symbol-list.ll
+; RUN: opt < %S/Inputs/profile-symbol-list.ll -sample-profile -profile-accurate-for-symsinlist -sample-profile-file=%t.profdata -S | FileCheck %S/Inputs/profile-symbol-list.ll
+; RUN: opt < %S/Inputs/profile-symbol-list.ll -passes=sample-profile -profile-accurate-for-symsinlist -sample-profile-file=%t.profdata -S | FileCheck %S/Inputs/profile-symbol-list.ll

Modified: llvm/trunk/test/Transforms/SampleProfile/profile-sample-accurate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/profile-sample-accurate.ll?rev=373133&r1=373132&r2=373133&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/profile-sample-accurate.ll (original)
+++ llvm/trunk/test/Transforms/SampleProfile/profile-sample-accurate.ll Fri Sep 27 15:33:59 2019
@@ -1,8 +1,18 @@
-; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -S | FileCheck %s
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -S | FileCheck %s
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -S | FileCheck %s --check-prefix=CALL_SUM_IS_WARM
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=900000 -profile-sample-accurate -S | FileCheck %s --check-prefix=CALL_SUM_IS_HOT
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -S | FileCheck %s --check-prefix=CALL_SUM_IS_WARM
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=900000 -profile-sample-accurate -S | FileCheck %s --check-prefix=CALL_SUM_IS_HOT
+
 ; RUN: llvm-profdata merge -sample -extbinary -prof-sym-list=%S/Inputs/profile-symbol-list.text %S/Inputs/profsampleacc.extbinary.afdo -o %t.symlist.afdo
-; RUN: opt < %s -sample-profile -sample-profile-file=%t.symlist.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -S | FileCheck %s --check-prefix=PROFSYMLIST
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.symlist.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -S | FileCheck %s --check-prefix=PROFSYMLIST
+; RUN: opt < %s -sample-profile -sample-profile-file=%t.symlist.afdo -profile-summary-cutoff-hot=600000 -profile-accurate-for-symsinlist -S | FileCheck %s --check-prefix=PROFSYMLIST
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.symlist.afdo -profile-summary-cutoff-hot=600000 -profile-accurate-for-symsinlist -S | FileCheck %s --check-prefix=PROFSYMLIST
+;
+; If -profile-accurate-for-symsinlist and -profile-sample-accurate both present,
+; -profile-sample-accurate will override -profile-accurate-for-symsinlist.
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -profile-accurate-for-symsinlist -S | FileCheck %s --check-prefix=CALL_SUM_IS_WARM
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=900000 -profile-sample-accurate -profile-accurate-for-symsinlist -S | FileCheck %s --check-prefix=CALL_SUM_IS_HOT
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -profile-accurate-for-symsinlist -S | FileCheck %s --check-prefix=CALL_SUM_IS_WARM
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=900000 -profile-sample-accurate -profile-accurate-for-symsinlist -S | FileCheck %s --check-prefix=CALL_SUM_IS_HOT
 ;
 ; Original C++ test case
 ;
@@ -22,13 +32,21 @@
 ;
 @.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
 
-; Check _Z3sumii's function entry count will be intialized to 0 if no profile
-; symbol list is available.
-; If symbol list is available, _Z3sumii's function entry count will be
-; initialized to -1 if it shows up in the profile.
+; Check _Z3sumii's function entry count will be 0 when
+; profile-sample-accurate is enabled.
+; CALL_SUM_IS_HOT: define i32 @_Z3sumii{{.*}}!prof ![[ZERO_ID:[0-9]+]]
 ;
-; CHECK: define i32 @_Z3sumii{{.*}}!prof ![[ZERO_ID:[0-9]+]]
+; Check _Z3sumii's function entry count will be nonzero when
+; profile-sample-accurate is enabled because the callsite is warm and not
+; inlined so its function entry count is adjusted to nonzero.
+; CALL_SUM_IS_WARM: define i32 @_Z3sumii{{.*}}!prof ![[NONZERO_ID:[0-9]+]]
+;
+; Check _Z3sumii's function entry count will be initialized to -1 when
+; profile-accurate-for-profsymlist is enabled and _Z3sumii exists in the
+; profile symbol list because it also shows up in the profile as inline
+; instance.
 ; PROFSYMLIST: define i32 @_Z3sumii{{.*}}!prof ![[UNKNOWN_ID:[0-9]+]]
+;
 ; Function Attrs: nounwind uwtable
 define i32 @_Z3sumii(i32 %x, i32 %y) !dbg !4 {
 entry:
@@ -65,11 +83,21 @@ while.body:
   br i1 %cmp1, label %if.then, label %if.else, !dbg !16
 
 ; With the hot cutoff being set to 600000, the inline instance of _Z3sumii
-; in main is neither hot nor cold. Check it will still be inlined when
+; in main is neither hot nor cold. Check it won't be inlined when
 ; profile-sample-accurate is enabled.
-; CHECK: if.then:
-; CHECK-NOT: call i32 @_Z3sumii
-; CHECK: if.else:
+; CALL_SUM_IS_WARM: if.then:
+; CALL_SUM_IS_WARM: call i32 @_Z3sumii
+; CALL_SUM_IS_WARM: if.else:
+;
+; With the hot cutoff being set to 900000, the inline instance of _Z3sumii
+; in main is hot. Check the callsite of _Z3sumii will be inlined when
+; profile-sample-accurate is enabled.
+; CALL_SUM_IS_HOT: if.then:
+; CALL_SUM_IS_HOT-NOT: call i32 @_Z3sumii
+; CALL_SUM_IS_HOT: if.else:
+;
+; Check _Z3sumii will be inlined when profile-accurate-for-profsymlist is
+; enabled
 ; PROFSYMLIST: if.then:
 ; PROFSYMLIST-NOT: call i32 @_Z3sumii
 ; PROFSYMLIST: if.else:
@@ -99,7 +127,8 @@ declare i32 @printf(i8*, ...) #2
 !llvm.module.flags = !{!8, !9}
 !llvm.ident = !{!10}
 
-; CHECK: ![[ZERO_ID]] = !{!"function_entry_count", i64 0}
+; CALL_SUM_IS_HOT: ![[ZERO_ID]] = !{!"function_entry_count", i64 0}
+; CALL_SUM_IS_WARM: ![[NONZERO_ID]] = !{!"function_entry_count", i64 5179}
 ; PROFSYMLIST: ![[UNKNOWN_ID]] = !{!"function_entry_count", i64 -1}
 !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
 !1 = !DIFile(filename: "calls.cc", directory: ".")

Modified: llvm/trunk/test/Transforms/SampleProfile/uncompressed-profile-symbol-list.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/uncompressed-profile-symbol-list.ll?rev=373133&r1=373132&r2=373133&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/uncompressed-profile-symbol-list.ll (original)
+++ llvm/trunk/test/Transforms/SampleProfile/uncompressed-profile-symbol-list.ll Fri Sep 27 15:33:59 2019
@@ -1,4 +1,4 @@
 ; Append inline.prof with profile symbol list and save it without compression.
 ; RUN: llvm-profdata merge --sample --prof-sym-list=%S/Inputs/profile-symbol-list.text --compress-prof-sym-list=false --extbinary %S/Inputs/inline.prof --output=%t.profdata
-; RUN: opt < %S/Inputs/profile-symbol-list.ll -sample-profile -profile-sample-accurate -sample-profile-file=%t.profdata -S | FileCheck %S/Inputs/profile-symbol-list.ll
-; RUN: opt < %S/Inputs/profile-symbol-list.ll -passes=sample-profile -profile-sample-accurate -sample-profile-file=%t.profdata -S | FileCheck %S/Inputs/profile-symbol-list.ll
+; RUN: opt < %S/Inputs/profile-symbol-list.ll -sample-profile -profile-accurate-for-symsinlist -sample-profile-file=%t.profdata -S | FileCheck %S/Inputs/profile-symbol-list.ll
+; RUN: opt < %S/Inputs/profile-symbol-list.ll -passes=sample-profile -profile-accurate-for-symsinlist -sample-profile-file=%t.profdata -S | FileCheck %S/Inputs/profile-symbol-list.ll




More information about the llvm-commits mailing list