[llvm] r373133 - [SampleFDO] Create a separate flag profile-accurate-for-symsinlist to handle
Wei Mi via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 27 15:33:59 PDT 2019
Author: wmi
Date: Fri Sep 27 15:33:59 2019
New Revision: 373133
URL: http://llvm.org/viewvc/llvm-project?rev=373133&view=rev
Log:
[SampleFDO] Create a separate flag profile-accurate-for-symsinlist to handle
profile symbol list.
Currently many existing users using profile-sample-accurate want to reduce
code size as much as possible. Their use cases are different from the scenario
profile symbol list tries to handle -- the major motivation of adding profile
symbol list is to get the major memory/code size saving without introduce
performance regression. So to keep the behavior of profile-sample-accurate
unchanged, we think decoupling these two things and using a new flag to
control the handling of profile symbol list may be better.
When profile-sample-accurate and the new flag profile-accurate-for-symsinlist
are both present, since profile-sample-accurate is a user assertion we let it
have a higher precedence.
Differential Revision: https://reviews.llvm.org/D68047
Modified:
llvm/trunk/lib/Transforms/IPO/SampleProfile.cpp
llvm/trunk/test/Transforms/SampleProfile/compressed-profile-symbol-list.ll
llvm/trunk/test/Transforms/SampleProfile/profile-sample-accurate.ll
llvm/trunk/test/Transforms/SampleProfile/uncompressed-profile-symbol-list.ll
Modified: llvm/trunk/lib/Transforms/IPO/SampleProfile.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/SampleProfile.cpp?rev=373133&r1=373132&r2=373133&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/IPO/SampleProfile.cpp (original)
+++ llvm/trunk/lib/Transforms/IPO/SampleProfile.cpp Fri Sep 27 15:33:59 2019
@@ -130,6 +130,12 @@ static cl::opt<bool> ProfileSampleAccura
"callsite and function as having 0 samples. Otherwise, treat "
"un-sampled callsites and functions conservatively as unknown. "));
+static cl::opt<bool> ProfileAccurateForSymsInList(
+ "profile-accurate-for-symsinlist", cl::Hidden, cl::ZeroOrMore,
+ cl::init(true),
+ cl::desc("For symbols in profile symbol list, regard their profiles to "
+ "be accurate. It may be overriden by profile-sample-accurate. "));
+
namespace {
using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
@@ -418,8 +424,12 @@ protected:
// names, inline instance names and call target names.
StringSet<> NamesInProfile;
- // Showing whether ProfileSampleAccurate is enabled for current function.
- bool ProfSampleAccEnabled = false;
+ // For symbol in profile symbol list, whether to regard their profiles
+ // to be accurate. It is mainly decided by existance of profile symbol
+ // list and -profile-accurate-for-symsinlist flag, but it can be
+ // overriden by -profile-sample-accurate or profile-sample-accurate
+ // attribute.
+ bool ProfAccForSymsInList;
};
class SampleProfileLoaderLegacyPass : public ModulePass {
@@ -476,7 +486,8 @@ private:
/// sample count with the hot cutoff computed by ProfileSummaryInfo, it is
/// regarded as hot if the count is above the cutoff value.
///
-/// When profile-sample-accurate is enabled, functions without profile will
+/// When ProfileAccurateForSymsInList is enabled and profile symbol list
+/// is present, functions in the profile symbol list but without profile will
/// be regarded as cold and much less inlining will happen in CGSCC inlining
/// pass, so we tend to lower the hot criteria here to allow more early
/// inlining to happen for warm callsites and it is helpful for performance.
@@ -487,7 +498,7 @@ bool SampleProfileLoader::callsiteIsHot(
assert(PSI && "PSI is expected to be non null");
uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples();
- if (ProfSampleAccEnabled)
+ if (ProfAccForSymsInList)
return !PSI->isColdCount(CallsiteTotalSamples);
else
return PSI->isHotCount(CallsiteTotalSamples);
@@ -890,6 +901,14 @@ bool SampleProfileLoader::inlineHotFunct
Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
DenseSet<Instruction *> PromotedInsns;
+ // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
+ // Profile symbol list is ignored when profile-sample-accurate is on.
+ assert((!ProfAccForSymsInList ||
+ (!ProfileSampleAccurate &&
+ !F.hasFnAttribute("profile-sample-accurate"))) &&
+ "ProfAccForSymsInList should be false when profile-sample-accurate "
+ "is enabled");
+
DenseMap<Instruction *, const FunctionSamples *> localNotInlinedCallSites;
bool Changed = false;
while (true) {
@@ -1667,7 +1686,10 @@ bool SampleProfileLoader::doInitializati
ProfileIsValid = (Reader->read() == sampleprof_error::success);
PSL = Reader->getProfileSymbolList();
- if (ProfileSampleAccurate) {
+ // While profile-sample-accurate is on, ignore symbol list.
+ ProfAccForSymsInList =
+ ProfileAccurateForSymsInList && PSL && !ProfileSampleAccurate;
+ if (ProfAccForSymsInList) {
NamesInProfile.clear();
if (auto NameTable = Reader->getNameTable())
NamesInProfile.insert(NameTable->begin(), NameTable->end());
@@ -1765,37 +1787,38 @@ bool SampleProfileLoader::runOnFunction(
// this will be overwritten in emitAnnotations.
uint64_t initialEntryCount = -1;
- ProfSampleAccEnabled =
- ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate");
- if (ProfSampleAccEnabled) {
- // PSL -- profile symbol list include all the symbols in sampled binary.
- // It is used to prevent new functions to be treated as cold.
- if (PSL) {
- // Profile symbol list is available, initialize the entry count to 0
- // only for functions in the list.
- if (PSL->contains(F.getName()))
- initialEntryCount = 0;
-
- // When ProfileSampleAccurate is true, function without sample will be
- // regarded as cold. To minimize the potential negative performance
- // impact it could have, we want to be a little conservative here
- // saying if a function shows up in the profile, no matter as outline
- // function, inline instance or call targets, treat the function as not
- // being cold. This will handle the cases such as most callsites of a
- // function are inlined in sampled binary but not inlined in current
- // build (because of source code drift, imprecise debug information, or
- // the callsites are all cold individually but not cold
- // accumulatively...), so the outline function showing up as cold in
- // sampled binary will actually not be cold after current build.
- StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
- if (NamesInProfile.count(CanonName))
- initialEntryCount = -1;
- } else {
- // If there is no profile symbol list available, initialize all the
- // function entry counts to 0. It means all the functions without
- // profile will be regarded as cold.
+ ProfAccForSymsInList = ProfileAccurateForSymsInList && PSL;
+ if (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) {
+ // initialize all the function entry counts to 0. It means all the
+ // functions without profile will be regarded as cold.
+ initialEntryCount = 0;
+ // profile-sample-accurate is a user assertion which has a higher precedence
+ // than symbol list. When profile-sample-accurate is on, ignore symbol list.
+ ProfAccForSymsInList = false;
+ }
+
+ // PSL -- profile symbol list include all the symbols in sampled binary.
+ // If ProfileAccurateForSymsInList is enabled, PSL is used to treat
+ // old functions without samples being cold, without having to worry
+ // about new and hot functions being mistakenly treated as cold.
+ if (ProfAccForSymsInList) {
+ // Initialize the entry count to 0 for functions in the list.
+ if (PSL->contains(F.getName()))
initialEntryCount = 0;
- }
+
+ // Function in the symbol list but without sample will be regarded as
+ // cold. To minimize the potential negative performance impact it could
+ // have, we want to be a little conservative here saying if a function
+ // shows up in the profile, no matter as outline function, inline instance
+ // or call targets, treat the function as not being cold. This will handle
+ // the cases such as most callsites of a function are inlined in sampled
+ // binary but not inlined in current build (because of source code drift,
+ // imprecise debug information, or the callsites are all cold individually
+ // but not cold accumulatively...), so the outline function showing up as
+ // cold in sampled binary will actually not be cold after current build.
+ StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
+ if (NamesInProfile.count(CanonName))
+ initialEntryCount = -1;
}
F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
Modified: llvm/trunk/test/Transforms/SampleProfile/compressed-profile-symbol-list.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/compressed-profile-symbol-list.ll?rev=373133&r1=373132&r2=373133&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/compressed-profile-symbol-list.ll (original)
+++ llvm/trunk/test/Transforms/SampleProfile/compressed-profile-symbol-list.ll Fri Sep 27 15:33:59 2019
@@ -1,5 +1,5 @@
; REQUIRES: zlib
; Append inline.prof with profile symbol list and save it after compression.
; RUN: llvm-profdata merge --sample --prof-sym-list=%S/Inputs/profile-symbol-list.text --compress-prof-sym-list=true --extbinary %S/Inputs/inline.prof --output=%t.profdata
-; RUN: opt < %S/Inputs/profile-symbol-list.ll -sample-profile -profile-sample-accurate -sample-profile-file=%t.profdata -S | FileCheck %S/Inputs/profile-symbol-list.ll
-; RUN: opt < %S/Inputs/profile-symbol-list.ll -passes=sample-profile -profile-sample-accurate -sample-profile-file=%t.profdata -S | FileCheck %S/Inputs/profile-symbol-list.ll
+; RUN: opt < %S/Inputs/profile-symbol-list.ll -sample-profile -profile-accurate-for-symsinlist -sample-profile-file=%t.profdata -S | FileCheck %S/Inputs/profile-symbol-list.ll
+; RUN: opt < %S/Inputs/profile-symbol-list.ll -passes=sample-profile -profile-accurate-for-symsinlist -sample-profile-file=%t.profdata -S | FileCheck %S/Inputs/profile-symbol-list.ll
Modified: llvm/trunk/test/Transforms/SampleProfile/profile-sample-accurate.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/profile-sample-accurate.ll?rev=373133&r1=373132&r2=373133&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/profile-sample-accurate.ll (original)
+++ llvm/trunk/test/Transforms/SampleProfile/profile-sample-accurate.ll Fri Sep 27 15:33:59 2019
@@ -1,8 +1,18 @@
-; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -S | FileCheck %s
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -S | FileCheck %s
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -S | FileCheck %s --check-prefix=CALL_SUM_IS_WARM
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=900000 -profile-sample-accurate -S | FileCheck %s --check-prefix=CALL_SUM_IS_HOT
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -S | FileCheck %s --check-prefix=CALL_SUM_IS_WARM
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=900000 -profile-sample-accurate -S | FileCheck %s --check-prefix=CALL_SUM_IS_HOT
+
; RUN: llvm-profdata merge -sample -extbinary -prof-sym-list=%S/Inputs/profile-symbol-list.text %S/Inputs/profsampleacc.extbinary.afdo -o %t.symlist.afdo
-; RUN: opt < %s -sample-profile -sample-profile-file=%t.symlist.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -S | FileCheck %s --check-prefix=PROFSYMLIST
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.symlist.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -S | FileCheck %s --check-prefix=PROFSYMLIST
+; RUN: opt < %s -sample-profile -sample-profile-file=%t.symlist.afdo -profile-summary-cutoff-hot=600000 -profile-accurate-for-symsinlist -S | FileCheck %s --check-prefix=PROFSYMLIST
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.symlist.afdo -profile-summary-cutoff-hot=600000 -profile-accurate-for-symsinlist -S | FileCheck %s --check-prefix=PROFSYMLIST
+;
+; If -profile-accurate-for-symsinlist and -profile-sample-accurate both present,
+; -profile-sample-accurate will override -profile-accurate-for-symsinlist.
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -profile-accurate-for-symsinlist -S | FileCheck %s --check-prefix=CALL_SUM_IS_WARM
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=900000 -profile-sample-accurate -profile-accurate-for-symsinlist -S | FileCheck %s --check-prefix=CALL_SUM_IS_HOT
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=600000 -profile-sample-accurate -profile-accurate-for-symsinlist -S | FileCheck %s --check-prefix=CALL_SUM_IS_WARM
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profsampleacc.extbinary.afdo -profile-summary-cutoff-hot=900000 -profile-sample-accurate -profile-accurate-for-symsinlist -S | FileCheck %s --check-prefix=CALL_SUM_IS_HOT
;
; Original C++ test case
;
@@ -22,13 +32,21 @@
;
@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
-; Check _Z3sumii's function entry count will be intialized to 0 if no profile
-; symbol list is available.
-; If symbol list is available, _Z3sumii's function entry count will be
-; initialized to -1 if it shows up in the profile.
+; Check _Z3sumii's function entry count will be 0 when
+; profile-sample-accurate is enabled.
+; CALL_SUM_IS_HOT: define i32 @_Z3sumii{{.*}}!prof ![[ZERO_ID:[0-9]+]]
;
-; CHECK: define i32 @_Z3sumii{{.*}}!prof ![[ZERO_ID:[0-9]+]]
+; Check _Z3sumii's function entry count will be nonzero when
+; profile-sample-accurate is enabled because the callsite is warm and not
+; inlined so its function entry count is adjusted to nonzero.
+; CALL_SUM_IS_WARM: define i32 @_Z3sumii{{.*}}!prof ![[NONZERO_ID:[0-9]+]]
+;
+; Check _Z3sumii's function entry count will be initialized to -1 when
+; profile-accurate-for-profsymlist is enabled and _Z3sumii exists in the
+; profile symbol list because it also shows up in the profile as inline
+; instance.
; PROFSYMLIST: define i32 @_Z3sumii{{.*}}!prof ![[UNKNOWN_ID:[0-9]+]]
+;
; Function Attrs: nounwind uwtable
define i32 @_Z3sumii(i32 %x, i32 %y) !dbg !4 {
entry:
@@ -65,11 +83,21 @@ while.body:
br i1 %cmp1, label %if.then, label %if.else, !dbg !16
; With the hot cutoff being set to 600000, the inline instance of _Z3sumii
-; in main is neither hot nor cold. Check it will still be inlined when
+; in main is neither hot nor cold. Check it won't be inlined when
; profile-sample-accurate is enabled.
-; CHECK: if.then:
-; CHECK-NOT: call i32 @_Z3sumii
-; CHECK: if.else:
+; CALL_SUM_IS_WARM: if.then:
+; CALL_SUM_IS_WARM: call i32 @_Z3sumii
+; CALL_SUM_IS_WARM: if.else:
+;
+; With the hot cutoff being set to 900000, the inline instance of _Z3sumii
+; in main is hot. Check the callsite of _Z3sumii will be inlined when
+; profile-sample-accurate is enabled.
+; CALL_SUM_IS_HOT: if.then:
+; CALL_SUM_IS_HOT-NOT: call i32 @_Z3sumii
+; CALL_SUM_IS_HOT: if.else:
+;
+; Check _Z3sumii will be inlined when profile-accurate-for-profsymlist is
+; enabled
; PROFSYMLIST: if.then:
; PROFSYMLIST-NOT: call i32 @_Z3sumii
; PROFSYMLIST: if.else:
@@ -99,7 +127,8 @@ declare i32 @printf(i8*, ...) #2
!llvm.module.flags = !{!8, !9}
!llvm.ident = !{!10}
-; CHECK: ![[ZERO_ID]] = !{!"function_entry_count", i64 0}
+; CALL_SUM_IS_HOT: ![[ZERO_ID]] = !{!"function_entry_count", i64 0}
+; CALL_SUM_IS_WARM: ![[NONZERO_ID]] = !{!"function_entry_count", i64 5179}
; PROFSYMLIST: ![[UNKNOWN_ID]] = !{!"function_entry_count", i64 -1}
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "calls.cc", directory: ".")
Modified: llvm/trunk/test/Transforms/SampleProfile/uncompressed-profile-symbol-list.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/SampleProfile/uncompressed-profile-symbol-list.ll?rev=373133&r1=373132&r2=373133&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/SampleProfile/uncompressed-profile-symbol-list.ll (original)
+++ llvm/trunk/test/Transforms/SampleProfile/uncompressed-profile-symbol-list.ll Fri Sep 27 15:33:59 2019
@@ -1,4 +1,4 @@
; Append inline.prof with profile symbol list and save it without compression.
; RUN: llvm-profdata merge --sample --prof-sym-list=%S/Inputs/profile-symbol-list.text --compress-prof-sym-list=false --extbinary %S/Inputs/inline.prof --output=%t.profdata
-; RUN: opt < %S/Inputs/profile-symbol-list.ll -sample-profile -profile-sample-accurate -sample-profile-file=%t.profdata -S | FileCheck %S/Inputs/profile-symbol-list.ll
-; RUN: opt < %S/Inputs/profile-symbol-list.ll -passes=sample-profile -profile-sample-accurate -sample-profile-file=%t.profdata -S | FileCheck %S/Inputs/profile-symbol-list.ll
+; RUN: opt < %S/Inputs/profile-symbol-list.ll -sample-profile -profile-accurate-for-symsinlist -sample-profile-file=%t.profdata -S | FileCheck %S/Inputs/profile-symbol-list.ll
+; RUN: opt < %S/Inputs/profile-symbol-list.ll -passes=sample-profile -profile-accurate-for-symsinlist -sample-profile-file=%t.profdata -S | FileCheck %S/Inputs/profile-symbol-list.ll
More information about the llvm-commits
mailing list