[llvm] e10b73f - [CSSPGO][llvm-profgen] Merge and trim profile for cold context to reduce profile size

via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 4 11:05:53 PST 2021


Author: wlei
Date: 2021-02-04T11:05:03-08:00
New Revision: e10b73f6463fca53155ee7cc88ffa87ee96fb1b9

URL: https://github.com/llvm/llvm-project/commit/e10b73f6463fca53155ee7cc88ffa87ee96fb1b9
DIFF: https://github.com/llvm/llvm-project/commit/e10b73f6463fca53155ee7cc88ffa87ee96fb1b9.diff

LOG: [CSSPGO][llvm-profgen] Merge and trim profile for cold context to reduce profile size

This change allows merging and trimming cold context profile in llvm-profgen to solve profile size bloat problem. Currently when the profile's total sample is below threshold(supported by a switch), it will be considered cold and merged into a base context-less profile, which will at least keep the profile quality as good as the baseline(non-cs).

For example, two input profiles:
 [main @ foo @ bar]:60
 [main @ bar]:50
Under threshold = 100, the two profiles will be merge into one with the base context, get result:
 [bar]:110

Added two switches:
`--csprof-cold-thres=<value>`: Specified the total samples threshold for a context profile to be considered cold, with 100 being the default. Any cold context profiles will be merged into context-less base profile by default.
`--csprof-keep-cold`: Force profile generation to keep cold context profiles instead of dropping them. By default, any cold context will not be written to output profile.

Results:
Though not yet evaluating it with the latest CSSPGO, our internal branch shows neutral on performance but significantly reduce the profile size. Detailed evaluation on llvm-profgen with CSSPGO will come later.

Differential Revision: https://reviews.llvm.org/D94111

Added: 
    llvm/test/tools/llvm-profgen/merge-cold-profile.test

Modified: 
    llvm/test/tools/llvm-profgen/inline-cs-noprobe.test
    llvm/test/tools/llvm-profgen/inline-cs-pseudoprobe.test
    llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test
    llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test
    llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test
    llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test
    llvm/tools/llvm-profgen/ProfileGenerator.cpp
    llvm/tools/llvm-profgen/ProfileGenerator.h

Removed: 
    


################################################################################
diff  --git a/llvm/test/tools/llvm-profgen/inline-cs-noprobe.test b/llvm/test/tools/llvm-profgen/inline-cs-noprobe.test
index 98767a9b29b7..943832ebef10 100644
--- a/llvm/test/tools/llvm-profgen/inline-cs-noprobe.test
+++ b/llvm/test/tools/llvm-profgen/inline-cs-noprobe.test
@@ -1,4 +1,4 @@
-; RUN: llvm-profgen --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --show-unwinder-output | FileCheck %s --check-prefix=CHECK-UNWINDER
+; RUN: llvm-profgen --perfscript=%S/Inputs/inline-cs-noprobe.perfscript --binary=%S/Inputs/inline-cs-noprobe.perfbin --output=%t --show-unwinder-output --csprof-cold-thres=0 | FileCheck %s --check-prefix=CHECK-UNWINDER
 ; RUN: FileCheck %s --input-file %t
 
 ; CHECK:[main:1 @ foo]:44:0

diff  --git a/llvm/test/tools/llvm-profgen/inline-cs-pseudoprobe.test b/llvm/test/tools/llvm-profgen/inline-cs-pseudoprobe.test
index 19928322a66d..c7aa1dea21bb 100644
--- a/llvm/test/tools/llvm-profgen/inline-cs-pseudoprobe.test
+++ b/llvm/test/tools/llvm-profgen/inline-cs-pseudoprobe.test
@@ -1,4 +1,4 @@
-; RUN: llvm-profgen --perfscript=%S/Inputs/inline-cs-pseudoprobe.perfscript --binary=%S/Inputs/inline-cs-pseudoprobe.perfbin --output=%t --show-unwinder-output | FileCheck %s --check-prefix=CHECK-UNWINDER
+; RUN: llvm-profgen --perfscript=%S/Inputs/inline-cs-pseudoprobe.perfscript --binary=%S/Inputs/inline-cs-pseudoprobe.perfbin --output=%t --show-unwinder-output --csprof-cold-thres=0 | FileCheck %s --check-prefix=CHECK-UNWINDER
 ; RUN: FileCheck %s --input-file %t
 
 ; CHECK:     [main:2 @ foo]:74:0

diff  --git a/llvm/test/tools/llvm-profgen/merge-cold-profile.test b/llvm/test/tools/llvm-profgen/merge-cold-profile.test
new file mode 100644
index 000000000000..e0c65ac44e2b
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/merge-cold-profile.test
@@ -0,0 +1,70 @@
+; Used the data from recursion-compression.test, refer it for the unmerged output
+; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=-1 --csprof-cold-thres=8
+; RUN: FileCheck %s --input-file %t
+
+; Test --csprof-keep-cold
+; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=-1 --csprof-cold-thres=100 --csprof-keep-cold
+; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-KEEP-COLD
+
+; CHECK:     [fa]:14:4
+; CHECK-NEXT: 1: 4
+; CHECK-NEXT: 3: 4
+; CHECK-NEXT: 4: 2
+; CHECK-NEXT: 5: 1
+; CHECK-NEXT: 7: 2 fb:2
+; CHECK-NEXT: 8: 1 fa:1
+; CHECK-NEXT: !CFGChecksum: 120515930909
+; CHECK-NEXT:[main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb]:13:4
+; CHECK-NEXT: 1: 4
+; CHECK-NEXT: 2: 3
+; CHECK-NEXT: 3: 1
+; CHECK-NEXT: 5: 4 fb:4
+; CHECK-NEXT: 6: 1 fa:1
+; CHECK-NEXT: !CFGChecksum: 72617220756
+
+; CHECK-KEEP-COLD:     [fb]:19:6
+; CHECK-KEEP-COLD-NEXT: 1: 6
+; CHECK-KEEP-COLD-NEXT: 2: 3
+; CHECK-KEEP-COLD-NEXT: 3: 3
+; CHECK-KEEP-COLD-NEXT: 5: 4 fb:4
+; CHECK-KEEP-COLD-NEXT: 6: 3 fa:3
+; CHECK-KEEP-COLD-NEXT: !CFGChecksum: 72617220756
+; CHECK-KEEP-COLD-NEXT:[fa]:14:4
+; CHECK-KEEP-COLD-NEXT: 1: 4
+; CHECK-KEEP-COLD-NEXT: 3: 4
+; CHECK-KEEP-COLD-NEXT: 4: 2
+; CHECK-KEEP-COLD-NEXT: 5: 1
+; CHECK-KEEP-COLD-NEXT: 7: 2 fb:2
+; CHECK-KEEP-COLD-NEXT: 8: 1 fa:1
+; CHECK-KEEP-COLD-NEXT: !CFGChecksum: 120515930909
+
+
+; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling
+; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls
+; -g test.c  -o a.out
+
+; Copied from recursion-compression.test
+#include <stdio.h>
+
+int fb(int n) {
+  if(n > 10) return fb(n / 2);
+  return fa(n - 1);
+}
+
+int fa(int n) {
+  if(n < 2) return n;
+  if(n % 2) return fb(n - 1);
+  return fa(n - 1);
+}
+
+void foo() {
+  int s, i = 0;
+  while (i++ < 10000)
+    s += fa(i);
+  printf("sum is %d\n", s);
+}
+
+int main() {
+  foo();
+  return 0;
+}

diff  --git a/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test b/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test
index 9beecb271fc0..2e60883afa62 100644
--- a/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test
+++ b/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test
@@ -1,4 +1,4 @@
-; RUN: llvm-profgen --perfscript=%S/Inputs/noinline-cs-noprobe.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --show-unwinder-output | FileCheck %s --check-prefix=CHECK-UNWINDER
+; RUN: llvm-profgen --perfscript=%S/Inputs/noinline-cs-noprobe.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --show-unwinder-output --csprof-cold-thres=0 | FileCheck %s --check-prefix=CHECK-UNWINDER
 ; RUN: FileCheck %s --input-file %t
 
 ; CHECK:[main:1 @ foo:3 @ bar]:12:3

diff  --git a/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test b/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test
index 0491a62ff69b..a0e5507c70dd 100644
--- a/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test
+++ b/llvm/test/tools/llvm-profgen/noinline-cs-pseudoprobe.test
@@ -1,4 +1,4 @@
-; RUN: llvm-profgen --perfscript=%S/Inputs/noinline-cs-pseudoprobe.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t --show-unwinder-output | FileCheck %s --check-prefix=CHECK-UNWINDER
+; RUN: llvm-profgen --perfscript=%S/Inputs/noinline-cs-pseudoprobe.perfscript --binary=%S/Inputs/noinline-cs-pseudoprobe.perfbin --output=%t --show-unwinder-output --csprof-cold-thres=0 | FileCheck %s --check-prefix=CHECK-UNWINDER
 ; RUN: FileCheck %s --input-file %t
 
 ; CHECK:     [main:2 @ foo]:75:0

diff  --git a/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test b/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test
index 47e0a51a4261..43f495398bb0 100644
--- a/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test
+++ b/llvm/test/tools/llvm-profgen/recursion-compression-noprobe.test
@@ -1,7 +1,7 @@
 ; Firstly test uncompression(--compress-recursion=0)
-; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-noprobe.perfscript --binary=%S/Inputs/recursion-compression-noprobe.perfbin --output=%t --compress-recursion=0
+; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-noprobe.perfscript --binary=%S/Inputs/recursion-compression-noprobe.perfbin --output=%t --compress-recursion=0 --csprof-cold-thres=0
 ; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-UNCOMPRESS
-; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-noprobe.perfscript --binary=%S/Inputs/recursion-compression-noprobe.perfbin --output=%t
+; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-noprobe.perfscript --binary=%S/Inputs/recursion-compression-noprobe.perfbin --output=%t --csprof-cold-thres=0
 ; RUN: FileCheck %s --input-file %t
 
 ; CHECK-UNCOMPRESS:[main:1 @ foo:3 @ fa]:14:0

diff  --git a/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test b/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test
index 86afe6c632bd..0d4e7dbb1dd4 100644
--- a/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test
+++ b/llvm/test/tools/llvm-profgen/recursion-compression-pseudoprobe.test
@@ -1,7 +1,7 @@
 ; Firstly test uncompression(--compress-recursion=0)
-; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=0
+; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=0 --csprof-cold-thres=0
 ; RUN: FileCheck %s --input-file %t -check-prefix=CHECK-UNCOMPRESS
-; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --show-unwinder-output | FileCheck %s --check-prefix=CHECK-UNWINDER
+; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --show-unwinder-output --csprof-cold-thres=0 | FileCheck %s --check-prefix=CHECK-UNWINDER
 ; RUN: FileCheck %s --input-file %t
 
 ; CHECK-UNCOMPRESS: [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:5 @ fb:6 @ fa:8 @ fa:7 @ fb:6 @ fa]:4:1

diff  --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index f769bd592f87..b2a8d60d5caf 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -29,6 +29,19 @@ static cl::opt<int32_t, true> RecursionCompression(
     cl::Hidden,
     cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
 
+static cl::opt<uint64_t> CSProfColdThres(
+    "csprof-cold-thres", cl::init(100), cl::ZeroOrMore,
+    cl::desc("Specify the total samples threshold for a context profile to "
+             "be considered cold, any cold profiles will be merged into "
+             "context-less base profiles"));
+
+static cl::opt<bool> CSProfKeepCold(
+    "csprof-keep-cold", cl::init(false), cl::ZeroOrMore,
+    cl::desc("This works together with --csprof-cold-thres. If the total count "
+             "of the profile after all merge is done is still smaller than the "
+             "csprof-cold-thres, it will be trimmed unless csprof-keep-cold "
+             "flag is specified."));
+
 using namespace llvm;
 using namespace sampleprof;
 
@@ -68,6 +81,7 @@ void ProfileGenerator::write() {
   if (std::error_code EC = WriterOrErr.getError())
     exitWithError(EC, OutputFilename);
   auto Writer = std::move(WriterOrErr.get());
+  mergeAndTrimColdProfile(ProfileMap);
   Writer->write(ProfileMap);
 }
 
@@ -329,6 +343,49 @@ void CSProfileGenerator::populateInferredFunctionSamples() {
   }
 }
 
+void CSProfileGenerator::mergeAndTrimColdProfile(
+    StringMap<FunctionSamples> &ProfileMap) {
+  // Nothing to merge if sample threshold is zero
+  if (!CSProfColdThres)
+    return;
+
+  // Filter the cold profiles from ProfileMap and move them into a tmp
+  // container
+  std::vector<std::pair<StringRef, const FunctionSamples *>> ToRemoveVec;
+  for (const auto &I : ProfileMap) {
+    const FunctionSamples &FunctionProfile = I.second;
+    if (FunctionProfile.getTotalSamples() >= CSProfColdThres)
+      continue;
+    ToRemoveVec.emplace_back(I.getKey(), &I.second);
+  }
+
+  // Remove the code profile from ProfileMap and merge them into BaseProileMap
+  StringMap<FunctionSamples> BaseProfileMap;
+  for (const auto &I : ToRemoveVec) {
+    auto Ret =
+        BaseProfileMap.try_emplace(I.second->getName(), FunctionSamples());
+    FunctionSamples &BaseProfile = Ret.first->second;
+    BaseProfile.merge(*I.second);
+    ProfileMap.erase(I.first);
+  }
+
+  // Merge the base profiles into ProfileMap;
+  for (const auto &I : BaseProfileMap) {
+    // Filter the cold base profile
+    if (!CSProfKeepCold && I.second.getTotalSamples() < CSProfColdThres &&
+        ProfileMap.find(I.getKey()) == ProfileMap.end())
+      continue;
+    // Merge the profile if the original profile exists, otherwise just insert
+    // as a new profile
+    FunctionSamples &OrigProfile = getFunctionProfileForContext(I.getKey());
+    StringRef TmpName = OrigProfile.getName();
+    OrigProfile.merge(I.second);
+    // Should use the name ref from ProfileMap's key to avoid name being freed
+    // from BaseProfileMap
+    OrigProfile.setName(TmpName);
+  }
+}
+
 // Helper function to extract context prefix string stack
 // Extract context stack for reusing, leaf context stack will
 // be added compressed while looking up function profile

diff  --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h
index 14e58fc9c895..9cb04c4de34d 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.h
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.h
@@ -28,7 +28,10 @@ class ProfileGenerator {
   create(const BinarySampleCounterMap &BinarySampleCounters,
          enum PerfScriptType SampleType);
   virtual void generateProfile() = 0;
-
+  // Merge and trim profile with cold context before serialization,
+  // only eligible for CS profile
+  virtual void
+  mergeAndTrimColdProfile(StringMap<FunctionSamples> &ProfileMap){};
   // Use SampleProfileWriter to serialize profile map
   void write();
 
@@ -200,6 +203,9 @@ class CSProfileGenerator : public ProfileGenerator {
 protected:
   // Lookup or create FunctionSamples for the context
   FunctionSamples &getFunctionProfileForContext(StringRef ContextId);
+  // Merge cold context profile whose total sample is below threshold
+  // into base profile.
+  void mergeAndTrimColdProfile(StringMap<FunctionSamples> &ProfileMap) override;
 
 private:
   // Helper function for updating body sample for a leaf location in


        


More information about the llvm-commits mailing list