[llvm] 863184d - [CSSPGO] Aggregation by the last K context frames for cold profiles

via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 14 10:34:13 PDT 2021


Author: wlei
Date: 2021-06-14T10:33:43-07:00
New Revision: 863184dd6920a65dd122603be7e595bc88d89e8b

URL: https://github.com/llvm/llvm-project/commit/863184dd6920a65dd122603be7e595bc88d89e8b
DIFF: https://github.com/llvm/llvm-project/commit/863184dd6920a65dd122603be7e595bc88d89e8b.diff

LOG: [CSSPGO] Aggregation by the last K context frames for cold profiles

This change provides the option to merge and aggregate cold context by the last k frames instead of context-less name. By default K = 1 means the context-less one.

This is for better perf tuning. The more selective merging and trimming will rely on llvm-profgen's preinliner.

Reviewed By: wenlei, hoy

Differential Revision: https://reviews.llvm.org/D104131

Added: 
    

Modified: 
    llvm/include/llvm/ProfileData/SampleProf.h
    llvm/lib/ProfileData/SampleProf.cpp
    llvm/test/tools/llvm-profgen/merge-cold-profile.test
    llvm/tools/llvm-profdata/llvm-profdata.cpp
    llvm/tools/llvm-profgen/ProfileGenerator.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 31d3d85da242..4861cdb98d1d 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -430,6 +430,22 @@ class SampleContext {
     return ContextStr.split(" @ ");
   }
 
+  // Reconstruct a new context with the last k frames, return the context-less
+  // name if K = 1
+  StringRef getContextWithLastKFrames(uint32_t K) {
+    if (K == 1)
+      return getNameWithoutContext();
+
+    size_t I = FullContext.size();
+    while (K--) {
+      I = FullContext.find_last_of(" @ ", I);
+      if (I == StringRef::npos)
+        return FullContext;
+      I -= 2;
+    }
+    return FullContext.slice(I + 3, StringRef::npos);
+  }
+
   // Decode context string for a frame to get function name and location.
   // `ContextStr` is in the form of `FuncName:StartLine.Discriminator`.
   static void decodeContextString(StringRef ContextStr, StringRef &FName,
@@ -993,8 +1009,9 @@ class SampleContextTrimmer {
       : ProfileMap(Profiles){};
   // Trim and merge cold context profile when requested.
   void trimAndMergeColdContextProfiles(uint64_t ColdCountThreshold,
-                                       bool TrimColdContext = true,
-                                       bool MergeColdContext = true);
+                                       bool TrimColdContext,
+                                       bool MergeColdContext,
+                                       uint32_t ColdContextFrameLength);
   // Canonicalize context profile name and attributes.
   void canonicalizeContextProfiles();
 

diff  --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp
index 52aeafc492c3..b6abf6e0c313 100644
--- a/llvm/lib/ProfileData/SampleProf.cpp
+++ b/llvm/lib/ProfileData/SampleProf.cpp
@@ -324,7 +324,8 @@ std::error_code ProfileSymbolList::read(const uint8_t *Data,
 }
 
 void SampleContextTrimmer::trimAndMergeColdContextProfiles(
-    uint64_t ColdCountThreshold, bool TrimColdContext, bool MergeColdContext) {
+    uint64_t ColdCountThreshold, bool TrimColdContext, bool MergeColdContext,
+    uint32_t ColdContextFrameLength) {
   if (!TrimColdContext && !MergeColdContext)
     return;
 
@@ -342,21 +343,24 @@ void SampleContextTrimmer::trimAndMergeColdContextProfiles(
     ColdProfiles.emplace_back(I.getKey(), &I.second);
   }
 
-  // Remove the cold profile from ProfileMap and merge them into BaseProileMap
-  StringMap<FunctionSamples> BaseProfileMap;
+  // Remove the cold profile from ProfileMap and merge them into
+  // MergedProfileMap by the last K frames of context
+  StringMap<FunctionSamples> MergedProfileMap;
   for (const auto &I : ColdProfiles) {
     if (MergeColdContext) {
-      auto Ret = BaseProfileMap.try_emplace(
-          I.second->getContext().getNameWithoutContext(), FunctionSamples());
-      FunctionSamples &BaseProfile = Ret.first->second;
-      BaseProfile.merge(*I.second);
+      auto Ret = MergedProfileMap.try_emplace(
+          I.second->getContext().getContextWithLastKFrames(
+              ColdContextFrameLength),
+          FunctionSamples());
+      FunctionSamples &MergedProfile = Ret.first->second;
+      MergedProfile.merge(*I.second);
     }
     ProfileMap.erase(I.first);
   }
 
-  // Merge the base profiles into ProfileMap;
-  for (const auto &I : BaseProfileMap) {
-    // Filter the cold base profile
+  // Move the merged profiles into ProfileMap;
+  for (const auto &I : MergedProfileMap) {
+    // Filter the cold merged profile
     if (TrimColdContext && I.second.getTotalSamples() < ColdCountThreshold &&
         ProfileMap.find(I.getKey()) == ProfileMap.end())
       continue;

diff  --git a/llvm/test/tools/llvm-profgen/merge-cold-profile.test b/llvm/test/tools/llvm-profgen/merge-cold-profile.test
index 906fee7687ec..3d749db1ce32 100644
--- a/llvm/test/tools/llvm-profgen/merge-cold-profile.test
+++ b/llvm/test/tools/llvm-profgen/merge-cold-profile.test
@@ -10,6 +10,10 @@
 ; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t3 --compress-recursion=-1 --profile-summary-cold-count=10 --csprof-merge-cold-context=0
 ; RUN: FileCheck %s --input-file %t3 --check-prefix=CHECK-UNMERGED
 
+; Test --csprof-frame-depth-for-cold-context
+; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t2 --compress-recursion=-1 --profile-summary-cold-count=100 --csprof-trim-cold-context=0 --csprof-frame-depth-for-cold-context=2
+; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK-COLD-CONTEXT-LENGTH
+
 ; CHECK:     [fa]:14:4
 ; CHECK-NEXT: 1: 4
 ; CHECK-NEXT: 2: 18446744073709551615
@@ -56,6 +60,38 @@
 ; CHECK-UNMERGED-NOT: [fa]
 ; CHECK-UNMERGED-NOT: [fb]
 
+; CHECK-COLD-CONTEXT-LENGTH:     [fb:5 @ fb]:13:4
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 1: 4
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 2: 3
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 3: 1
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 5: 4 fb:4
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 6: 1 fa:1
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: !CFGChecksum: 72617220756
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: !Attributes: 0
+; CHECK-COLD-CONTEXT-LENGTH-NEXT:[fb:6 @ fa]:10:3
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 1: 3
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 2: 18446744073709551615
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 3: 3
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 4: 1
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 5: 1
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 7: 1 fb:1
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 8: 1 fa:1
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: !CFGChecksum: 120515930909
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: !Attributes: 0
+; CHECK-COLD-CONTEXT-LENGTH-NEXT:[fa:7 @ fb]:6:2
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 1: 2
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 3: 2
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 6: 2 fa:2
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: !CFGChecksum: 72617220756
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: !Attributes: 0
+; CHECK-COLD-CONTEXT-LENGTH-NEXT:[fa:8 @ fa]:4:1
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 1: 1
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 2: 18446744073709551615
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 3: 1
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 4: 1
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: 7: 1 fb:1
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: !CFGChecksum: 120515930909
+; CHECK-COLD-CONTEXT-LENGTH-NEXT: !Attributes: 0
 
 ; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling
 ; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls

diff  --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index cd9629f07707..c15d467762a2 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -689,7 +689,7 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
                    StringRef ProfileSymbolListFile, bool CompressAllSections,
                    bool UseMD5, bool GenPartialProfile,
                    bool SampleMergeColdContext, bool SampleTrimColdContext,
-                   FailureMode FailMode) {
+                   bool SampleColdContextFrameDepth, FailureMode FailMode) {
   using namespace sampleprof;
   StringMap<FunctionSamples> ProfileMap;
   SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
@@ -758,9 +758,9 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
 
     // Trim and merge cold context profile using cold threshold above;
     SampleContextTrimmer(ProfileMap)
-        .trimAndMergeColdContextProfiles(SampleProfColdThreshold,
-                                         SampleTrimColdContext,
-                                         SampleMergeColdContext);
+        .trimAndMergeColdContextProfiles(
+            SampleProfColdThreshold, SampleTrimColdContext,
+            SampleMergeColdContext, SampleColdContextFrameDepth);
   }
 
   auto WriterOrErr =
@@ -914,6 +914,10 @@ static int merge_main(int argc, const char *argv[]) {
       "sample-trim-cold-context", cl::init(false), cl::Hidden,
       cl::desc(
           "Trim context sample profiles whose count is below cold threshold"));
+  cl::opt<uint32_t> SampleColdContextFrameDepth(
+      "sample-frame-depth-for-cold-context", cl::init(1), cl::ZeroOrMore,
+      cl::desc("Keep the last K frames while merging cold profile. 1 means the "
+               "context-less base profile"));
   cl::opt<bool> GenPartialProfile(
       "gen-partial-profile", cl::init(false), cl::Hidden,
       cl::desc("Generate a partial profile (only meaningful for -extbinary)"));
@@ -985,7 +989,8 @@ static int merge_main(int argc, const char *argv[]) {
     mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename,
                        OutputFormat, ProfileSymbolListFile, CompressAllSections,
                        UseMD5, GenPartialProfile, SampleMergeColdContext,
-                       SampleTrimColdContext, FailureMode);
+                       SampleTrimColdContext, SampleColdContextFrameDepth,
+                       FailureMode);
 
   return 0;
 }

diff  --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 613623178853..c2d1753d78f1 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -43,6 +43,11 @@ static cl::opt<bool> CSProfTrimColdContext(
     cl::desc("If the total count of the profile after all merge is done "
              "is still smaller than threshold, it will be trimmed."));
 
+static cl::opt<uint32_t> CSProfColdContextFrameDepth(
+    "csprof-frame-depth-for-cold-context", cl::init(1), cl::ZeroOrMore,
+    cl::desc("Keep the last K frames while merging cold profile. 1 means the "
+             "context-less base profile"));
+
 extern cl::opt<int> ProfileSummaryCutoffCold;
 
 using namespace llvm;
@@ -401,7 +406,8 @@ void CSProfileGenerator::postProcessProfiles() {
   // Trim and merge cold context profile using cold threshold above;
   SampleContextTrimmer(ProfileMap)
       .trimAndMergeColdContextProfiles(
-          ColdCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext);
+          ColdCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext,
+          CSProfColdContextFrameDepth);
 }
 
 void CSProfileGenerator::computeSummaryAndThreshold() {


        


More information about the llvm-commits mailing list