[llvm] 863184d - [CSSPGO] Aggregation by the last K context frames for cold profiles
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 14 10:34:13 PDT 2021
Author: wlei
Date: 2021-06-14T10:33:43-07:00
New Revision: 863184dd6920a65dd122603be7e595bc88d89e8b
URL: https://github.com/llvm/llvm-project/commit/863184dd6920a65dd122603be7e595bc88d89e8b
DIFF: https://github.com/llvm/llvm-project/commit/863184dd6920a65dd122603be7e595bc88d89e8b.diff
LOG: [CSSPGO] Aggregation by the last K context frames for cold profiles
This change provides the option to merge and aggregate cold context by the last k frames instead of context-less name. By default K = 1 means the context-less one.
This is for better perf tuning. The more selective merging and trimming will rely on llvm-profgen's preinliner.
Reviewed By: wenlei, hoy
Differential Revision: https://reviews.llvm.org/D104131
Added:
Modified:
llvm/include/llvm/ProfileData/SampleProf.h
llvm/lib/ProfileData/SampleProf.cpp
llvm/test/tools/llvm-profgen/merge-cold-profile.test
llvm/tools/llvm-profdata/llvm-profdata.cpp
llvm/tools/llvm-profgen/ProfileGenerator.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 31d3d85da242..4861cdb98d1d 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -430,6 +430,22 @@ class SampleContext {
return ContextStr.split(" @ ");
}
+ // Reconstruct a new context with the last k frames, return the context-less
+ // name if K = 1
+ StringRef getContextWithLastKFrames(uint32_t K) {
+ if (K == 1)
+ return getNameWithoutContext();
+
+ size_t I = FullContext.size();
+ while (K--) {
+ I = FullContext.find_last_of(" @ ", I);
+ if (I == StringRef::npos)
+ return FullContext;
+ I -= 2;
+ }
+ return FullContext.slice(I + 3, StringRef::npos);
+ }
+
// Decode context string for a frame to get function name and location.
// `ContextStr` is in the form of `FuncName:StartLine.Discriminator`.
static void decodeContextString(StringRef ContextStr, StringRef &FName,
@@ -993,8 +1009,9 @@ class SampleContextTrimmer {
: ProfileMap(Profiles){};
// Trim and merge cold context profile when requested.
void trimAndMergeColdContextProfiles(uint64_t ColdCountThreshold,
- bool TrimColdContext = true,
- bool MergeColdContext = true);
+ bool TrimColdContext,
+ bool MergeColdContext,
+ uint32_t ColdContextFrameLength);
// Canonicalize context profile name and attributes.
void canonicalizeContextProfiles();
diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp
index 52aeafc492c3..b6abf6e0c313 100644
--- a/llvm/lib/ProfileData/SampleProf.cpp
+++ b/llvm/lib/ProfileData/SampleProf.cpp
@@ -324,7 +324,8 @@ std::error_code ProfileSymbolList::read(const uint8_t *Data,
}
void SampleContextTrimmer::trimAndMergeColdContextProfiles(
- uint64_t ColdCountThreshold, bool TrimColdContext, bool MergeColdContext) {
+ uint64_t ColdCountThreshold, bool TrimColdContext, bool MergeColdContext,
+ uint32_t ColdContextFrameLength) {
if (!TrimColdContext && !MergeColdContext)
return;
@@ -342,21 +343,24 @@ void SampleContextTrimmer::trimAndMergeColdContextProfiles(
ColdProfiles.emplace_back(I.getKey(), &I.second);
}
- // Remove the cold profile from ProfileMap and merge them into BaseProileMap
- StringMap<FunctionSamples> BaseProfileMap;
+ // Remove the cold profile from ProfileMap and merge them into
+ // MergedProfileMap by the last K frames of context
+ StringMap<FunctionSamples> MergedProfileMap;
for (const auto &I : ColdProfiles) {
if (MergeColdContext) {
- auto Ret = BaseProfileMap.try_emplace(
- I.second->getContext().getNameWithoutContext(), FunctionSamples());
- FunctionSamples &BaseProfile = Ret.first->second;
- BaseProfile.merge(*I.second);
+ auto Ret = MergedProfileMap.try_emplace(
+ I.second->getContext().getContextWithLastKFrames(
+ ColdContextFrameLength),
+ FunctionSamples());
+ FunctionSamples &MergedProfile = Ret.first->second;
+ MergedProfile.merge(*I.second);
}
ProfileMap.erase(I.first);
}
- // Merge the base profiles into ProfileMap;
- for (const auto &I : BaseProfileMap) {
- // Filter the cold base profile
+ // Move the merged profiles into ProfileMap;
+ for (const auto &I : MergedProfileMap) {
+ // Filter the cold merged profile
if (TrimColdContext && I.second.getTotalSamples() < ColdCountThreshold &&
ProfileMap.find(I.getKey()) == ProfileMap.end())
continue;
diff --git a/llvm/test/tools/llvm-profgen/merge-cold-profile.test b/llvm/test/tools/llvm-profgen/merge-cold-profile.test
index 906fee7687ec..3d749db1ce32 100644
--- a/llvm/test/tools/llvm-profgen/merge-cold-profile.test
+++ b/llvm/test/tools/llvm-profgen/merge-cold-profile.test
@@ -10,6 +10,10 @@
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t3 --compress-recursion=-1 --profile-summary-cold-count=10 --csprof-merge-cold-context=0
; RUN: FileCheck %s --input-file %t3 --check-prefix=CHECK-UNMERGED
+; Test --csprof-frame-depth-for-cold-context
+; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-cold-count=100 --csprof-trim-cold-context=0 --csprof-frame-depth-for-cold-context=2
+; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-COLD-CONTEXT-LENGTH
+
; CHECK: [fa]:14:4
; CHECK-NEXT: 1: 4
; CHECK-NEXT: 2: 18446744073709551615
@@ -56,6 +60,38 @@
; CHECK-UNMERGED-NOT: [fa]
; CHECK-UNMERGED-NOT: [fb]
+; CHECK-COLD-CONTEXT-LENGTH: [fb:5 @ fb]:13:4
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 1: 4
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 2: 3
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 3: 1
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 5: 4 fb:4
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 6: 1 fa:1
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: !CFGChecksum: 72617220756
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: !Attributes: 0
+; CHECK-COLD-CONTEXT-LENGTH-NEXT:[fb:6 @ fa]:10:3
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 1: 3
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 2: 18446744073709551615
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 3: 3
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 4: 1
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 5: 1
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 7: 1 fb:1
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 8: 1 fa:1
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: !CFGChecksum: 120515930909
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: !Attributes: 0
+; CHECK-COLD-CONTEXT-LENGTH-NEXT:[fa:7 @ fb]:6:2
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 1: 2
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 3: 2
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 6: 2 fa:2
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: !CFGChecksum: 72617220756
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: !Attributes: 0
+; CHECK-COLD-CONTEXT-LENGTH-NEXT:[fa:8 @ fa]:4:1
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 1: 1
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 2: 18446744073709551615
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 3: 1
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 4: 1
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 7: 1 fb:1
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: !CFGChecksum: 120515930909
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: !Attributes: 0
; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling
; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index cd9629f07707..c15d467762a2 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -689,7 +689,7 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
StringRef ProfileSymbolListFile, bool CompressAllSections,
bool UseMD5, bool GenPartialProfile,
bool SampleMergeColdContext, bool SampleTrimColdContext,
- FailureMode FailMode) {
+ bool SampleColdContextFrameDepth, FailureMode FailMode) {
using namespace sampleprof;
StringMap<FunctionSamples> ProfileMap;
SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
@@ -758,9 +758,9 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
// Trim and merge cold context profile using cold threshold above;
SampleContextTrimmer(ProfileMap)
- .trimAndMergeColdContextProfiles(SampleProfColdThreshold,
- SampleTrimColdContext,
- SampleMergeColdContext);
+ .trimAndMergeColdContextProfiles(
+ SampleProfColdThreshold, SampleTrimColdContext,
+ SampleMergeColdContext, SampleColdContextFrameDepth);
}
auto WriterOrErr =
@@ -914,6 +914,10 @@ static int merge_main(int argc, const char *argv[]) {
"sample-trim-cold-context", cl::init(false), cl::Hidden,
cl::desc(
"Trim context sample profiles whose count is below cold threshold"));
+ cl::opt<uint32_t> SampleColdContextFrameDepth(
+ "sample-frame-depth-for-cold-context", cl::init(1), cl::ZeroOrMore,
+ cl::desc("Keep the last K frames while merging cold profile. 1 means the "
+ "context-less base profile"));
cl::opt<bool> GenPartialProfile(
"gen-partial-profile", cl::init(false), cl::Hidden,
cl::desc("Generate a partial profile (only meaningful for -extbinary)"));
@@ -985,7 +989,8 @@ static int merge_main(int argc, const char *argv[]) {
mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename,
OutputFormat, ProfileSymbolListFile, CompressAllSections,
UseMD5, GenPartialProfile, SampleMergeColdContext,
- SampleTrimColdContext, FailureMode);
+ SampleTrimColdContext, SampleColdContextFrameDepth,
+ FailureMode);
return 0;
}
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 613623178853..c2d1753d78f1 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -43,6 +43,11 @@ static cl::opt<bool> CSProfTrimColdContext(
cl::desc("If the total count of the profile after all merge is done "
"is still smaller than threshold, it will be trimmed."));
+static cl::opt<uint32_t> CSProfColdContextFrameDepth(
+ "csprof-frame-depth-for-cold-context", cl::init(1), cl::ZeroOrMore,
+ cl::desc("Keep the last K frames while merging cold profile. 1 means the "
+ "context-less base profile"));
+
extern cl::opt<int> ProfileSummaryCutoffCold;
using namespace llvm;
@@ -401,7 +406,8 @@ void CSProfileGenerator::postProcessProfiles() {
// Trim and merge cold context profile using cold threshold above;
SampleContextTrimmer(ProfileMap)
.trimAndMergeColdContextProfiles(
- ColdCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext);
+ ColdCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext,
+ CSProfColdContextFrameDepth);
}
void CSProfileGenerator::computeSummaryAndThreshold() {
More information about the llvm-commits
mailing list