[llvm] 339b8a0 - [AutoFDO] Use flattened profiles for profile staleness metrics
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 30 11:05:31 PDT 2023
Author: wlei
Date: 2023-03-30T11:05:10-07:00
New Revision: 339b8a0019658746217870215abca77291ad75b3
URL: https://github.com/llvm/llvm-project/commit/339b8a0019658746217870215abca77291ad75b3
DIFF: https://github.com/llvm/llvm-project/commit/339b8a0019658746217870215abca77291ad75b3.diff
LOG: [AutoFDO] Use flattened profiles for profile staleness metrics
For profile staleness report, before it only counts for the top-level function samples in the nested profile, the samples in the inlinees are ignored. This could affect the quality of the metrics when there are heavily inlined functions. This change adds a feature to flatten the nested profile and we're changing to use flatten profile as the input for stale profile detection and matching.
Example for profile flattening:
```
Original profile:
_Z3bazi:20301:1000
1: 1000
3: 2000
5: inline1:1600
1: 600
3: inline2:500
1: 500
Flattened profile:
_Z3bazi:18701:1000
1: 1000
3: 2000
5: 600 inline1:600
inline1:1100:600
1: 600
3: 500 inline2: 500
inline2:500:500
1: 500
```
This feature could be useful for offline analysis, like understanding the hotness of each individual function. So I'm adding the support to `llvm-profdata merge` under `--gen-flattened-profile`.
Reviewed By: hoy, wenlei
Differential Revision: https://reviews.llvm.org/D146452
Added:
llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch-cs.prof
llvm/test/Transforms/SampleProfile/profile-mismatch-flattened-profile.ll
llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile-cs.proftext
llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile.proftext
llvm/test/tools/llvm-profdata/sample-flatten-profile.test
Modified:
llvm/docs/CommandGuide/llvm-profdata.rst
llvm/include/llvm/ProfileData/SampleProf.h
llvm/lib/ProfileData/SampleProf.cpp
llvm/lib/Transforms/IPO/SampleProfile.cpp
llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof
llvm/test/Transforms/SampleProfile/csspgo-inline.ll
llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll
llvm/test/Transforms/SampleProfile/profile-mismatch.ll
llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test
llvm/tools/llvm-profdata/llvm-profdata.cpp
llvm/tools/llvm-profgen/ProfileGenerator.cpp
Removed:
################################################################################
diff --git a/llvm/docs/CommandGuide/llvm-profdata.rst b/llvm/docs/CommandGuide/llvm-profdata.rst
index 89a624555fe44..1ed8531fe2217 100644
--- a/llvm/docs/CommandGuide/llvm-profdata.rst
+++ b/llvm/docs/CommandGuide/llvm-profdata.rst
@@ -161,6 +161,12 @@ OPTIONS
coverage for the optimized target. This option can only be used with
sample-based profile in extbinary format.
+.. option:: --convert-sample-profile-layout=[nest|flat]
+
+ Convert the merged profile into a profile with a new layout. Supported
+ layout are ``nest``(Nested profile, the input should be CS flat profile) and
+ ``flat``(Profile with nested inlinees flattened out).
+
.. option:: --supplement-instr-with-sample=<file>
Supplement an instrumentation profile with sample profile. The sample profile
diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index faee9639ea860..8e76af2316cea 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -96,6 +96,12 @@ enum SampleProfileFormat {
SPF_Binary = 0xff
};
+enum SampleProfileLayout {
+ SPL_None = 0,
+ SPL_Nest = 0x1,
+ SPL_Flat = 0x2,
+};
+
static inline uint64_t SPMagic(SampleProfileFormat Format = SPF_Binary) {
return uint64_t('S') << (64 - 8) | uint64_t('P') << (64 - 16) |
uint64_t('R') << (64 - 24) | uint64_t('O') << (64 - 32) |
@@ -747,6 +753,8 @@ class FunctionSamples {
void setTotalSamples(uint64_t Num) { TotalSamples = Num; }
+ void setHeadSamples(uint64_t Num) { TotalHeadSamples = Num; }
+
sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) {
bool Overflowed;
TotalHeadSamples =
@@ -934,6 +942,8 @@ class FunctionSamples {
return CallsiteSamples;
}
+ CallsiteSampleMap &getCallsiteSamples() { return CallsiteSamples; }
+
/// Return the maximum of sample counts in a function body. When SkipCallSite
/// is false, which is the default, the return count includes samples in the
/// inlined functions. When SkipCallSite is true, the return count only
@@ -1274,12 +1284,16 @@ class SampleContextTrimmer {
SampleProfileMap &ProfileMap;
};
-// CSProfileConverter converts a full context-sensitive flat sample profile into
-// a nested context-sensitive sample profile.
-class CSProfileConverter {
+/// Helper class for profile conversion.
+///
+/// It supports full context-sensitive profile to nested profile conversion,
+/// nested profile to flatten profile conversion, etc.
+class ProfileConverter {
public:
- CSProfileConverter(SampleProfileMap &Profiles);
- void convertProfiles();
+ ProfileConverter(SampleProfileMap &Profiles);
+ // Convert a full context-sensitive flat sample profile into a nested sample
+ // profile.
+ void convertCSProfiles();
struct FrameNode {
FrameNode(StringRef FName = StringRef(),
FunctionSamples *FSamples = nullptr,
@@ -1299,9 +1313,85 @@ class CSProfileConverter {
StringRef CalleeName);
};
+ static void flattenProfile(SampleProfileMap &ProfileMap,
+ bool ProfileIsCS = false) {
+ SampleProfileMap TmpProfiles;
+ flattenProfile(ProfileMap, TmpProfiles, ProfileIsCS);
+ ProfileMap = std::move(TmpProfiles);
+ }
+
+ static void flattenProfile(const SampleProfileMap &InputProfiles,
+ SampleProfileMap &OutputProfiles,
+ bool ProfileIsCS = false) {
+ if (ProfileIsCS) {
+ for (const auto &I : InputProfiles)
+ OutputProfiles[I.second.getName()].merge(I.second);
+ // Retain the profile name and clear the full context for each function
+ // profile.
+ for (auto &I : OutputProfiles)
+ I.second.setContext(SampleContext(I.first));
+ } else {
+ for (const auto &I : InputProfiles)
+ flattenNestedProfile(OutputProfiles, I.second);
+ }
+ }
+
private:
+ static void flattenNestedProfile(SampleProfileMap &OutputProfiles,
+ const FunctionSamples &FS) {
+ // To retain the context, checksum, attributes of the original profile, make
+ // a copy of it if no profile is found.
+ SampleContext &Context = FS.getContext();
+ auto Ret = OutputProfiles.emplace(Context, FS);
+ FunctionSamples &Profile = Ret.first->second;
+ if (Ret.second) {
+ // When it's the copy of the old profile, just clear all the inlinees'
+ // samples.
+ Profile.getCallsiteSamples().clear();
+ // We recompute TotalSamples later, so here set to zero.
+ Profile.setTotalSamples(0);
+ } else {
+ for (const auto &Line : FS.getBodySamples()) {
+ Profile.addBodySamples(Line.first.LineOffset, Line.first.Discriminator,
+ Line.second.getSamples());
+ }
+ }
+
+ assert(Profile.getCallsiteSamples().empty() &&
+ "There should be no inlinees' profiles after flattening.");
+
+ // TotalSamples might not be equal to the sum of all samples from
+ // BodySamples and CallsiteSamples. So here we use "TotalSamples =
+ // Original_TotalSamples - All_of_Callsite_TotalSamples +
+ // All_of_Callsite_HeadSamples" to compute the new TotalSamples.
+ uint64_t TotalSamples = FS.getTotalSamples();
+
+ for (const auto &I : FS.getCallsiteSamples()) {
+ for (const auto &Callee : I.second) {
+ const auto &CalleeProfile = Callee.second;
+ // Add body sample.
+ Profile.addBodySamples(I.first.LineOffset, I.first.Discriminator,
+ CalleeProfile.getHeadSamplesEstimate());
+ // Add callsite sample.
+ Profile.addCalledTargetSamples(
+ I.first.LineOffset, I.first.Discriminator, CalleeProfile.getName(),
+ CalleeProfile.getHeadSamplesEstimate());
+ // Update total samples.
+ TotalSamples = TotalSamples >= CalleeProfile.getTotalSamples()
+ ? TotalSamples - CalleeProfile.getTotalSamples()
+ : 0;
+ TotalSamples += CalleeProfile.getHeadSamplesEstimate();
+ // Recursively convert callee profile.
+ flattenNestedProfile(OutputProfiles, CalleeProfile);
+ }
+ }
+ Profile.addTotalSamples(TotalSamples);
+
+ Profile.setHeadSamples(Profile.getHeadSamplesEstimate());
+ }
+
// Nest all children profiles into the profile of Node.
- void convertProfiles(FrameNode &Node);
+ void convertCSProfiles(FrameNode &Node);
FrameNode *getOrCreateContextPath(const SampleContext &Context);
SampleProfileMap &ProfileMap;
diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp
index 198209c91f3a9..3c60ef436db91 100644
--- a/llvm/lib/ProfileData/SampleProf.cpp
+++ b/llvm/lib/ProfileData/SampleProf.cpp
@@ -461,9 +461,9 @@ void ProfileSymbolList::dump(raw_ostream &OS) const {
OS << Sym << "\n";
}
-CSProfileConverter::FrameNode *
-CSProfileConverter::FrameNode::getOrCreateChildFrame(
- const LineLocation &CallSite, StringRef CalleeName) {
+ProfileConverter::FrameNode *
+ProfileConverter::FrameNode::getOrCreateChildFrame(const LineLocation &CallSite,
+ StringRef CalleeName) {
uint64_t Hash = FunctionSamples::getCallSiteHash(CalleeName, CallSite);
auto It = AllChildFrames.find(Hash);
if (It != AllChildFrames.end()) {
@@ -476,7 +476,7 @@ CSProfileConverter::FrameNode::getOrCreateChildFrame(
return &AllChildFrames[Hash];
}
-CSProfileConverter::CSProfileConverter(SampleProfileMap &Profiles)
+ProfileConverter::ProfileConverter(SampleProfileMap &Profiles)
: ProfileMap(Profiles) {
for (auto &FuncSample : Profiles) {
FunctionSamples *FSamples = &FuncSample.second;
@@ -486,8 +486,8 @@ CSProfileConverter::CSProfileConverter(SampleProfileMap &Profiles)
}
}
-CSProfileConverter::FrameNode *
-CSProfileConverter::getOrCreateContextPath(const SampleContext &Context) {
+ProfileConverter::FrameNode *
+ProfileConverter::getOrCreateContextPath(const SampleContext &Context) {
auto Node = &RootFrame;
LineLocation CallSiteLoc(0, 0);
for (auto &Callsite : Context.getContextFrames()) {
@@ -497,14 +497,14 @@ CSProfileConverter::getOrCreateContextPath(const SampleContext &Context) {
return Node;
}
-void CSProfileConverter::convertProfiles(CSProfileConverter::FrameNode &Node) {
+void ProfileConverter::convertCSProfiles(ProfileConverter::FrameNode &Node) {
// Process each child profile. Add each child profile to callsite profile map
// of the current node `Node` if `Node` comes with a profile. Otherwise
// promote the child profile to a standalone profile.
auto *NodeProfile = Node.FuncSamples;
for (auto &It : Node.AllChildFrames) {
auto &ChildNode = It.second;
- convertProfiles(ChildNode);
+ convertCSProfiles(ChildNode);
auto *ChildProfile = ChildNode.FuncSamples;
if (!ChildProfile)
continue;
@@ -544,4 +544,4 @@ void CSProfileConverter::convertProfiles(CSProfileConverter::FrameNode &Node) {
}
}
-void CSProfileConverter::convertProfiles() { convertProfiles(RootFrame); }
+void ProfileConverter::convertCSProfiles() { convertCSProfiles(RootFrame); }
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index e745ff3a853d7..79e7c653f3441 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -139,6 +139,11 @@ static cl::opt<bool> PersistProfileStaleness(
cl::desc("Compute stale profile statistical metrics and write it into the "
"native object file(.llvm_stats section)."));
+static cl::opt<bool> FlattenProfileForMatching(
+ "flatten-profile-for-matching", cl::Hidden, cl::init(true),
+ cl::desc(
+ "Use flattened profile for stale profile detection and matching."));
+
static cl::opt<bool> ProfileSampleAccurate(
"profile-sample-accurate", cl::Hidden, cl::init(false),
cl::desc("If the sample profile is accurate, we will mark all un-sampled "
@@ -434,6 +439,7 @@ class SampleProfileMatcher {
Module &M;
SampleProfileReader &Reader;
const PseudoProbeManager *ProbeManager;
+ SampleProfileMap FlattenedProfiles;
// Profile mismatching statstics.
uint64_t TotalProfiledCallsites = 0;
@@ -448,7 +454,21 @@ class SampleProfileMatcher {
public:
SampleProfileMatcher(Module &M, SampleProfileReader &Reader,
const PseudoProbeManager *ProbeManager)
- : M(M), Reader(Reader), ProbeManager(ProbeManager) {}
+ : M(M), Reader(Reader), ProbeManager(ProbeManager) {
+ if (FlattenProfileForMatching) {
+ ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
+ FunctionSamples::ProfileIsCS);
+ }
+ }
+
+ FunctionSamples *getFlattenedSamplesFor(const Function &F) {
+ StringRef CanonFName = FunctionSamples::getCanonicalFnName(F);
+ auto It = FlattenedProfiles.find(CanonFName);
+ if (It != FlattenedProfiles.end())
+ return &It->second;
+ return nullptr;
+ }
+
void detectProfileMismatch();
void detectProfileMismatch(const Function &F, const FunctionSamples &FS);
};
@@ -2156,7 +2176,11 @@ void SampleProfileMatcher::detectProfileMismatch() {
for (auto &F : M) {
if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
continue;
- FunctionSamples *FS = Reader.getSamplesFor(F);
+ FunctionSamples *FS = nullptr;
+ if (FlattenProfileForMatching)
+ FS = getFlattenedSamplesFor(F);
+ else
+ FS = Reader.getSamplesFor(F);
if (!FS)
continue;
detectProfileMismatch(F, *FS);
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch-cs.prof b/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch-cs.prof
new file mode 100644
index 0000000000000..0a04602b2a012
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch-cs.prof
@@ -0,0 +1,18 @@
+[main]:30:0
+ 0: 0
+ 1.1: 0
+ 3: 10 matched:10
+ 4: 10
+ 5: 10 bar_mismatch:10
+ 7: 5 foo:5
+ 8: 0
+[main:7 @ foo]:15:5
+ 1: 5
+ 2: 5
+ 3: 5 inlinee_mismatch:5
+[bar]:10:10
+ 1: 10
+[matched]:10:10
+ 1: 10
+[main:7 @ foo:3 @ inlinee_mismatch]:5:5
+ 1: 5
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof b/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof
index 0bb17b2f8f6e4..818a048b8cabb 100644
--- a/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof
+++ b/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof
@@ -5,9 +5,11 @@ main:30:0
4: 10
5: 10 bar_mismatch:10
8: 0
- 7: foo:10
+ 7: foo:15
1: 5
2: 5
+ 3: inlinee_mismatch:5
+ 1: 5
bar:10:10
1: 10
matched:10:10
diff --git a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll
index c88de5f56c743..177329f954979 100644
--- a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll
+++ b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll
@@ -10,7 +10,7 @@
; RUN: llvm-profdata merge --sample --extbinary --use-md5 %S/Inputs/profile-context-tracker.prof -o %t.md5
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.md5 -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE
-; RUN: llvm-profdata merge --sample --text --gen-cs-nested-profile %S/Inputs/profile-context-tracker.prof -o %t.prof
+; RUN: llvm-profdata merge --sample --text --convert-sample-profile-layout=nest %S/Inputs/profile-context-tracker.prof -o %t.prof
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE
; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, so we get less inlining for given profile
diff --git a/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll b/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll
index da9c37937d2ae..030b5aa188165 100644
--- a/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll
+++ b/llvm/test/Transforms/SampleProfile/csspgo-use-preinliner.ll
@@ -3,7 +3,7 @@
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/csspgo-use-preinliner.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=0 -S 2>&1 | FileCheck %s --check-prefix=DEFAULT
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/csspgo-use-preinliner.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=1 -S 2>&1 | FileCheck %s --check-prefix=PREINLINE
-; RUN: llvm-profdata merge --sample --text --gen-cs-nested-profile -generate-merged-base-profiles=0 %S/Inputs/csspgo-use-preinliner.prof -o %t.prof
+; RUN: llvm-profdata merge --sample --text --convert-sample-profile-layout=nest -generate-merged-base-profiles=0 %S/Inputs/csspgo-use-preinliner.prof -o %t.prof
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=0 -S 2>&1 | FileCheck %s --check-prefix=DEFAULT
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -pass-remarks=inline -sample-profile-prioritized-inline -profile-sample-accurate -sample-profile-use-preinliner=1 -S 2>&1 | FileCheck %s --check-prefix=PREINLINE
diff --git a/llvm/test/Transforms/SampleProfile/profile-mismatch-flattened-profile.ll b/llvm/test/Transforms/SampleProfile/profile-mismatch-flattened-profile.ll
new file mode 100644
index 0000000000000..ef11652fd1a87
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/profile-mismatch-flattened-profile.ll
@@ -0,0 +1,13 @@
+; REQUIRES: x86_64-linux
+; RUN: opt < %S/profile-mismatch.ll -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch.prof -report-profile-staleness -persist-profile-staleness -flatten-profile-for-matching=1 -S 2>%t -o %t.ll
+; RUN: FileCheck %s --input-file %t
+; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD
+
+; RUN: opt < %S/profile-mismatch.ll -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch-cs.prof -report-profile-staleness -persist-profile-staleness -flatten-profile-for-matching=1 -S 2>%t -o %t.ll
+; RUN: FileCheck %s --input-file %t
+; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD
+
+
+; CHECK: (3/4) of callsites' profile are invalid and (20/30) of samples are discarded due to callsite location mismatch.
+
+; CHECK-MD: ![[#]] = !{!"NumMismatchedCallsites", i64 3, !"TotalProfiledCallsites", i64 4, !"MismatchedCallsiteSamples", i64 20, !"TotalCallsiteSamples", i64 30}
diff --git a/llvm/test/Transforms/SampleProfile/profile-mismatch.ll b/llvm/test/Transforms/SampleProfile/profile-mismatch.ll
index cf07974da27fe..8340c3b0e62d5 100644
--- a/llvm/test/Transforms/SampleProfile/profile-mismatch.ll
+++ b/llvm/test/Transforms/SampleProfile/profile-mismatch.ll
@@ -1,5 +1,5 @@
; REQUIRES: x86_64-linux
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch.prof -report-profile-staleness -persist-profile-staleness -S 2>%t -o %t.ll
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch.prof -report-profile-staleness -persist-profile-staleness -flatten-profile-for-matching=0 -S 2>%t -o %t.ll
; RUN: FileCheck %s --input-file %t
; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD
; RUN: llc < %t.ll -filetype=obj -o %t.obj
diff --git a/llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile-cs.proftext b/llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile-cs.proftext
new file mode 100644
index 0000000000000..5cd880b63baad
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile-cs.proftext
@@ -0,0 +1,20 @@
+[baz]:150:10
+ 1: 10
+ 3: 20
+ 5: 20 foo:20
+[foo]:102:1
+ 1: 1
+ 3: 1
+[main]:91:1
+ 4: 1
+ 4.2: 1
+ 7: 1
+ 9: 3 bar:2 foo:1
+ 10: 3 baz:2 foo:1
+[main:10 @ foo]:2:1
+ 3: 1 bar:1
+ 4: 1
+[bar]:1:1
+ 1: 1
+[main:10 @ foo:3 @ bar]:1:1
+ 1: 1
diff --git a/llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile.proftext b/llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile.proftext
new file mode 100644
index 0000000000000..46564f65121e6
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/Inputs/sample-flatten-profile.proftext
@@ -0,0 +1,44 @@
+baz:160:10
+ 1: 10
+ 3: 20
+ 5: foo:30
+ 1: 20
+ 3: bar:10
+ 1: 10
+ !CFGChecksum: 4
+ !Attributes: 4
+ !CFGChecksum: 3
+ !Attributes: 3
+ !CFGChecksum: 1
+ !Attributes: 1
+main:110:1
+ 4: 1
+ 4.2: 1
+ 7: 1
+ 9: 3 bar:2 foo:1
+ 10: foo:2
+ 4: 1
+ 3: bar:1
+ 1: 1
+ !CFGChecksum: 4
+ !Attributes: 4
+ !CFGChecksum: 3
+ !Attributes: 3
+ 10: baz:20
+ 10: 1
+ 6: bar:3
+ 1: 2
+ 7: 1
+ !CFGChecksum: 4
+ !Attributes: 4
+ !CFGChecksum: 2
+ !Attributes: 2
+foo:102:1
+ 1: 1
+ 3: 1
+ !CFGChecksum: 3
+ !Attributes: 3
+bar:1:1
+ 1: 1
+ !CFGChecksum: 4
+ !Attributes: 4
diff --git a/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test b/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test
index d458d7fad1be6..7b01324219115 100644
--- a/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test
+++ b/llvm/test/tools/llvm-profdata/cs-sample-nested-profile.test
@@ -1,14 +1,14 @@
-RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=0
+RUN: llvm-profdata merge --sample --text -output=%t.proftext %S/Inputs/cs-sample-preinline.proftext --convert-sample-profile-layout=nest -generate-merged-base-profiles=0
RUN: FileCheck %s < %t.proftext --match-full-lines --strict-whitespace
-RUN: llvm-profdata merge --sample --text -output=%t.probe.proftext %S/Inputs/cs-sample-preinline-probe.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=0
+RUN: llvm-profdata merge --sample --text -output=%t.probe.proftext %S/Inputs/cs-sample-preinline-probe.proftext --convert-sample-profile-layout=nest -generate-merged-base-profiles=0
RUN: FileCheck %s < %t.probe.proftext --match-full-lines --strict-whitespace -check-prefix=PROBE
-RUN: llvm-profdata merge --sample --extbinary -output=%t.profbin %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=0
+RUN: llvm-profdata merge --sample --extbinary -output=%t.profbin %S/Inputs/cs-sample-preinline.proftext --convert-sample-profile-layout=nest -generate-merged-base-profiles=0
RUN: llvm-profdata merge --sample --text -output=%t2.proftext %t.profbin
RUN: FileCheck %s < %t2.proftext --match-full-lines --strict-whitespace
RUN: llvm-profdata show --sample -show-sec-info-only %t.profbin | FileCheck %s -check-prefix=PREINLINE
-RUN: llvm-profdata merge --sample --text -output=%t3.proftext %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=1
+RUN: llvm-profdata merge --sample --text -output=%t3.proftext %S/Inputs/cs-sample-preinline.proftext --convert-sample-profile-layout=nest -generate-merged-base-profiles=1
RUN: FileCheck %s < %t3.proftext --match-full-lines --strict-whitespace -check-prefix=RECOUNT
-RUN: llvm-profdata merge --sample --extbinary -output=%t2.profbin %S/Inputs/cs-sample-preinline.proftext --gen-cs-nested-profile=1 -generate-merged-base-profiles=1
+RUN: llvm-profdata merge --sample --extbinary -output=%t2.profbin %S/Inputs/cs-sample-preinline.proftext --convert-sample-profile-layout=nest -generate-merged-base-profiles=1
RUN: llvm-profdata show -sample -detailed-summary %S/Inputs/cs-sample-preinline.proftext | FileCheck %s -check-prefix=SUMMARY
RUN: llvm-profdata show -sample -detailed-summary %t2.profbin | FileCheck %s -check-prefix=SUMMARY-NEST
RUN: llvm-profdata show -sample -detailed-summary %t3.proftext | FileCheck %s -check-prefix=SUMMARY-NEST
diff --git a/llvm/test/tools/llvm-profdata/sample-flatten-profile.test b/llvm/test/tools/llvm-profdata/sample-flatten-profile.test
new file mode 100644
index 0000000000000..90effcb25190e
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/sample-flatten-profile.test
@@ -0,0 +1,50 @@
+; RUN: llvm-profdata merge --sample --convert-sample-profile-layout=flat --text %S/Inputs/sample-flatten-profile.proftext -o - | FileCheck %s --match-full-lines --strict-whitespace
+; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/sample-flatten-profile.proftext -o %t2 && llvm-profdata merge --sample --convert-sample-profile-layout=flat --text %t2 -o - | FileCheck %s --match-full-lines --strict-whitespace
+
+; RUN: llvm-profdata merge --sample --convert-sample-profile-layout=flat --text %S/Inputs/sample-flatten-profile-cs.proftext -o - | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=CHECK-CS
+; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/sample-flatten-profile-cs.proftext -o %t2 && llvm-profdata merge --sample --convert-sample-profile-layout=flat --text %t2 -o - | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=CHECK-CS
+
+; CHECK:baz:169:10
+; CHECK-NEXT: 1: 10
+; CHECK-NEXT: 3: 20
+; CHECK-NEXT: 5: 20 foo:20
+; CHECK-NEXT: 6: 2 bar:2
+; CHECK-NEXT: 10: 1
+; CHECK-NEXT: !CFGChecksum: 1
+; CHECK-NEXT: !Attributes: 1
+; CHECK-NEXT:foo:134:21
+; CHECK-NEXT: 1: 21
+; CHECK-NEXT: 3: 12 bar:11
+; CHECK-NEXT: 4: 1
+; CHECK-NEXT: !CFGChecksum: 3
+; CHECK-NEXT: !Attributes: 3
+; CHECK-NEXT:main:91:1
+; CHECK-NEXT: 4: 1
+; CHECK-NEXT: 4.2: 1
+; CHECK-NEXT: 7: 1
+; CHECK-NEXT: 9: 3 bar:2 foo:1
+; CHECK-NEXT: 10: 3 baz:2 foo:1
+; CHECK-NEXT: !CFGChecksum: 2
+; CHECK-NEXT: !Attributes: 2
+; CHECK-NEXT:bar:15:14
+; CHECK-NEXT: 1: 14
+; CHECK-NEXT: 7: 1
+; CHECK-NEXT: !CFGChecksum: 4
+; CHECK-NEXT: !Attributes: 4
+
+; CHECK-CS:baz:150:10
+; CHECK-CS-NEXT: 1: 10
+; CHECK-CS-NEXT: 3: 20
+; CHECK-CS-NEXT: 5: 20 foo:20
+; CHECK-CS-NEXT:foo:104:2
+; CHECK-CS-NEXT: 1: 1
+; CHECK-CS-NEXT: 3: 2 bar:1
+; CHECK-CS-NEXT: 4: 1
+; CHECK-CS-NEXT:main:91:1
+; CHECK-CS-NEXT: 4: 1
+; CHECK-CS-NEXT: 4.2: 1
+; CHECK-CS-NEXT: 7: 1
+; CHECK-CS-NEXT: 9: 3 bar:2 foo:1
+; CHECK-CS-NEXT: 10: 3 baz:2 foo:1
+; CHECK-CS-NEXT:bar:2:2
+; CHECK-CS-NEXT: 1: 2
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 28c4db6b5c4a2..64cf3cf044078 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -968,7 +968,8 @@ static void
mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
StringRef OutputFilename, ProfileFormat OutputFormat,
StringRef ProfileSymbolListFile, bool CompressAllSections,
- bool UseMD5, bool GenPartialProfile, bool GenCSNestedProfile,
+ bool UseMD5, bool GenPartialProfile,
+ SampleProfileLayout ProfileLayout,
bool SampleMergeColdContext, bool SampleTrimColdContext,
bool SampleColdContextFrameDepth, FailureMode FailMode,
bool DropProfileSymbolList, size_t OutputSizeLimit) {
@@ -1048,9 +1049,12 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
SampleMergeColdContext, SampleColdContextFrameDepth, false);
}
- if (ProfileIsCS && GenCSNestedProfile) {
- CSProfileConverter CSConverter(ProfileMap);
- CSConverter.convertProfiles();
+ if (ProfileLayout == llvm::sampleprof::SPL_Flat) {
+ ProfileConverter::flattenProfile(ProfileMap, FunctionSamples::ProfileIsCS);
+ ProfileIsCS = FunctionSamples::ProfileIsCS = false;
+ } else if (ProfileIsCS && ProfileLayout == llvm::sampleprof::SPL_Nest) {
+ ProfileConverter CSConverter(ProfileMap);
+ CSConverter.convertCSProfiles();
ProfileIsCS = FunctionSamples::ProfileIsCS = false;
}
@@ -1241,9 +1245,15 @@ static int merge_main(int argc, const char *argv[]) {
"instr-prof-cold-threshold", cl::init(0), cl::Hidden,
cl::desc("User specified cold threshold for instr profile which will "
"override the cold threshold got from profile summary. "));
- cl::opt<bool> GenCSNestedProfile(
- "gen-cs-nested-profile", cl::Hidden, cl::init(false),
- cl::desc("Generate nested function profiles for CSSPGO"));
+ cl::opt<SampleProfileLayout> ProfileLayout(
+ "convert-sample-profile-layout",
+ cl::desc("Convert the generated profile to a profile with a new layout"),
+ cl::init(SPL_None),
+ cl::values(
+ clEnumValN(SPL_Nest, "nest",
+ "Nested profile, the input should be CS flat profile"),
+ clEnumValN(SPL_Flat, "flat",
+ "Profile with nested inlinee flatten out")));
cl::opt<std::string> DebugInfoFilename(
"debug-info", cl::init(""),
cl::desc("Use the provided debug info to correlate the raw profile."));
@@ -1298,12 +1308,12 @@ static int merge_main(int argc, const char *argv[]) {
OutputFilename, OutputFormat, OutputSparse, NumThreads,
FailureMode, ProfiledBinary);
else
- mergeSampleProfile(
- WeightedInputs, Remapper.get(), OutputFilename, OutputFormat,
- ProfileSymbolListFile, CompressAllSections, UseMD5, GenPartialProfile,
- GenCSNestedProfile, SampleMergeColdContext, SampleTrimColdContext,
- SampleColdContextFrameDepth, FailureMode, DropProfileSymbolList,
- OutputSizeLimit);
+ mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename,
+ OutputFormat, ProfileSymbolListFile, CompressAllSections,
+ UseMD5, GenPartialProfile, ProfileLayout,
+ SampleMergeColdContext, SampleTrimColdContext,
+ SampleColdContextFrameDepth, FailureMode,
+ DropProfileSymbolList, OutputSizeLimit);
return 0;
}
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index dfc42a5f4e021..2728f80da64bc 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -1026,8 +1026,8 @@ void CSProfileGenerator::postProcessProfiles() {
calculateAndShowDensity(ContextLessProfiles);
if (GenCSNestedProfile) {
- CSProfileConverter CSConverter(ProfileMap);
- CSConverter.convertProfiles();
+ ProfileConverter CSConverter(ProfileMap);
+ CSConverter.convertCSProfiles();
FunctionSamples::ProfileIsCS = false;
}
}
More information about the llvm-commits
mailing list