[llvm] ce6bfe9 - [CSSPGO][llvm-profgen] Use profile summary based threshold for context trimming and merging

Wenlei He via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 22 09:07:40 PDT 2021


Author: Wenlei He
Date: 2021-03-22T08:56:59-07:00
New Revision: ce6bfe94115a3fa16f5f7f7898a62c07f26333fc

URL: https://github.com/llvm/llvm-project/commit/ce6bfe94115a3fa16f5f7f7898a62c07f26333fc
DIFF: https://github.com/llvm/llvm-project/commit/ce6bfe94115a3fa16f5f7f7898a62c07f26333fc.diff

LOG: [CSSPGO][llvm-profgen] Use profile summary based threshold for context trimming and merging

Switch to use cold threshold from profile summary for cold context merging and trimming, instead of relying on hard coded values. Minor refactoring included for switch names, etc.

Differential Revision: https://reviews.llvm.org/D98921

Added: 
    

Modified: 
    llvm/include/llvm/Analysis/ProfileSummaryInfo.h
    llvm/lib/Analysis/ProfileSummaryInfo.cpp
    llvm/test/tools/llvm-profgen/merge-cold-profile.test
    llvm/tools/llvm-profgen/ProfileGenerator.cpp
    llvm/tools/llvm-profgen/ProfileGenerator.h

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
index c7967df12418..e3e6268004f8 100644
--- a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
+++ b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
@@ -38,7 +38,7 @@ class Function;
 // units. This would require making this depend on BFI.
 class ProfileSummaryInfo {
 private:
-  const Module &M;
+  const Module *M;
   std::unique_ptr<ProfileSummary> Summary;
   void computeThresholds();
   // Count thresholds to answer isHotCount and isColdCount queries.
@@ -58,7 +58,9 @@ class ProfileSummaryInfo {
   mutable DenseMap<int, uint64_t> ThresholdCache;
 
 public:
-  ProfileSummaryInfo(const Module &M) : M(M) { refresh(); }
+  ProfileSummaryInfo(const Module &M) : M(&M) { refresh(); }
+  ProfileSummaryInfo(std::unique_ptr<ProfileSummary> PSI)
+      : M(nullptr), Summary(std::move(PSI)) {}
 
   ProfileSummaryInfo(ProfileSummaryInfo &&Arg) = default;
 

diff  --git a/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/llvm/lib/Analysis/ProfileSummaryInfo.cpp
index c9671d4f5c2e..67712c3d818e 100644
--- a/llvm/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/llvm/lib/Analysis/ProfileSummaryInfo.cpp
@@ -95,13 +95,13 @@ void ProfileSummaryInfo::refresh() {
   if (hasProfileSummary())
     return;
   // First try to get context sensitive ProfileSummary.
-  auto *SummaryMD = M.getProfileSummary(/* IsCS */ true);
+  auto *SummaryMD = M->getProfileSummary(/* IsCS */ true);
   if (SummaryMD)
     Summary.reset(ProfileSummary::getFromMD(SummaryMD));
 
   if (!hasProfileSummary()) {
     // This will actually return PSK_Instr or PSK_Sample summary.
-    SummaryMD = M.getProfileSummary(/* IsCS */ false);
+    SummaryMD = M->getProfileSummary(/* IsCS */ false);
     if (SummaryMD)
       Summary.reset(ProfileSummary::getFromMD(SummaryMD));
   }

diff  --git a/llvm/test/tools/llvm-profgen/merge-cold-profile.test b/llvm/test/tools/llvm-profgen/merge-cold-profile.test
index 43dc73e739ad..0549befd34e9 100644
--- a/llvm/test/tools/llvm-profgen/merge-cold-profile.test
+++ b/llvm/test/tools/llvm-profgen/merge-cold-profile.test
@@ -2,10 +2,14 @@
 ; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=-1 --csprof-cold-thres=8
 ; RUN: FileCheck %s --input-file %t
 
-; Test --csprof-keep-cold
-; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=-1 --csprof-cold-thres=100 --csprof-keep-cold
+; Test --csprof-trim-cold-context=0
+; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=-1 --csprof-cold-thres=100 --csprof-trim-cold-context=0
 ; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-KEEP-COLD
 
+; Test --csprof-merge-cold-context=0
+; RUN: llvm-profgen --perfscript=%S/Inputs/recursion-compression-pseudoprobe.perfscript --binary=%S/Inputs/recursion-compression-pseudoprobe.perfbin --output=%t --compress-recursion=-1 --csprof-cold-thres=10 --csprof-merge-cold-context=0
+; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-UNMERGED
+
 ; CHECK:     [fa]:14:4
 ; CHECK-NEXT: 1: 4
 ; CHECK-NEXT: 3: 4
@@ -40,6 +44,16 @@
 ; CHECK-KEEP-COLD-NEXT: 8: 1 fa:1
 ; CHECK-KEEP-COLD-NEXT: !CFGChecksum: 120515930909
 
+; CHECK-UNMERGED:     [main:2 @ foo:5 @ fa:8 @ fa:7 @ fb:5 @ fb]:13:4
+; CHECK-UNMERGED-NEXT: 1: 4
+; CHECK-UNMERGED-NEXT: 2: 3
+; CHECK-UNMERGED-NEXT: 3: 1
+; CHECK-UNMERGED-NEXT: 5: 4 fb:4
+; CHECK-UNMERGED-NEXT: 6: 1 fa:1
+; CHECK-UNMERGED-NEXT: !CFGChecksum: 72617220756
+; CHECK-UNMERGED-NOT: [fa]
+; CHECK-UNMERGED-NOT: [fb]
+
 
 ; clang -O3 -fexperimental-new-pass-manager -fuse-ld=lld -fpseudo-probe-for-profiling
 ; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls

diff  --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 81b0c912884f..b3fb015b6725 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "ProfileGenerator.h"
+#include "llvm/ProfileData/ProfileCommon.h"
 
 static cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
                                            cl::Required,
@@ -31,18 +32,23 @@ static cl::opt<int32_t, true> RecursionCompression(
     cl::Hidden,
     cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
 
-static cl::opt<uint64_t> CSProfColdThres(
+static cl::opt<uint64_t> CSProfColdThreshold(
     "csprof-cold-thres", cl::init(100), cl::ZeroOrMore,
     cl::desc("Specify the total samples threshold for a context profile to "
              "be considered cold, any cold profiles will be merged into "
              "context-less base profiles"));
 
-static cl::opt<bool> CSProfKeepCold(
-    "csprof-keep-cold", cl::init(false), cl::ZeroOrMore,
+static cl::opt<bool> CSProfMergeColdContext(
+    "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore,
     cl::desc("This works together with --csprof-cold-thres. If the total count "
-             "of the profile after all merge is done is still smaller than the "
-             "csprof-cold-thres, it will be trimmed unless csprof-keep-cold "
-             "flag is specified."));
+             "of context profile is smaller than the threshold, it will be "
+             "merged into context-less base profile."));
+
+static cl::opt<bool> CSProfTrimColdContext(
+    "csprof-trim-cold-context", cl::init(true), cl::ZeroOrMore,
+    cl::desc("This works together with --csprof-cold-thres. If the total count "
+             "of the profile after all merge is done is still smaller than "
+             "threshold, it will be trimmed."));
 
 using namespace llvm;
 using namespace sampleprof;
@@ -197,6 +203,7 @@ CSProfileGenerator::getFunctionProfileForContext(StringRef ContextStr,
       FContext.setAttribute(ContextWasInlined);
     FunctionSamples &FProfile = Ret.first->second;
     FProfile.setContext(FContext);
+    FProfile.setName(FContext.getNameWithoutContext());
   }
   return Ret.first->second;
 }
@@ -226,6 +233,10 @@ void CSProfileGenerator::generateProfile() {
   // functions, we estimate it from inlinee's profile using the entry of the
   // body sample.
   populateInferredFunctionSamples();
+
+  // Compute hot/cold threshold based on profile. This will be used for cold
+  // context profile merging/trimming.
+  computeSummaryAndThreshold();
 }
 
 void CSProfileGenerator::updateBodySamplesforFunctionProfile(
@@ -381,36 +392,54 @@ void CSProfileGenerator::populateInferredFunctionSamples() {
   }
 }
 
+void CSProfileGenerator::computeSummaryAndThreshold() {
+  SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
+  auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
+  PSI.reset(new ProfileSummaryInfo(std::move(Summary)));
+}
+
 void CSProfileGenerator::mergeAndTrimColdProfile(
     StringMap<FunctionSamples> &ProfileMap) {
+  if (!CSProfMergeColdContext && !CSProfTrimColdContext)
+    return;
+
+  // Use threshold calculated from profile summary unless specified
+  uint64_t ColdThreshold = PSI->getColdCountThreshold();
+  if (CSProfColdThreshold.getNumOccurrences()) {
+    ColdThreshold = CSProfColdThreshold;
+  }
+
   // Nothing to merge if sample threshold is zero
-  if (!CSProfColdThres)
+  if (ColdThreshold == 0)
     return;
 
   // Filter the cold profiles from ProfileMap and move them into a tmp
   // container
-  std::vector<std::pair<StringRef, const FunctionSamples *>> ToRemoveVec;
+  std::vector<std::pair<StringRef, const FunctionSamples *>> ColdProfiles;
   for (const auto &I : ProfileMap) {
     const FunctionSamples &FunctionProfile = I.second;
-    if (FunctionProfile.getTotalSamples() >= CSProfColdThres)
+    if (FunctionProfile.getTotalSamples() >= ColdThreshold)
       continue;
-    ToRemoveVec.emplace_back(I.getKey(), &I.second);
+    ColdProfiles.emplace_back(I.getKey(), &I.second);
   }
 
   // Remove the code profile from ProfileMap and merge them into BaseProileMap
   StringMap<FunctionSamples> BaseProfileMap;
-  for (const auto &I : ToRemoveVec) {
-    auto Ret = BaseProfileMap.try_emplace(
-        I.second->getContext().getNameWithoutContext(), FunctionSamples());
-    FunctionSamples &BaseProfile = Ret.first->second;
-    BaseProfile.merge(*I.second);
+  for (const auto &I : ColdProfiles) {
+    if (CSProfMergeColdContext) {
+      auto Ret = BaseProfileMap.try_emplace(
+          I.second->getContext().getNameWithoutContext(), FunctionSamples());
+      FunctionSamples &BaseProfile = Ret.first->second;
+      BaseProfile.merge(*I.second);
+    }
     ProfileMap.erase(I.first);
   }
 
   // Merge the base profiles into ProfileMap;
   for (const auto &I : BaseProfileMap) {
     // Filter the cold base profile
-    if (!CSProfKeepCold && I.second.getTotalSamples() < CSProfColdThres &&
+    if (CSProfTrimColdContext &&
+        I.second.getTotalSamples() < CSProfColdThreshold &&
         ProfileMap.find(I.getKey()) == ProfileMap.end())
       continue;
     // Merge the profile if the original profile exists, otherwise just insert
@@ -470,6 +499,10 @@ void PseudoProbeCSProfileGenerator::generateProfile() {
                                         ContextStrStack, Binary);
     }
   }
+
+  // Compute hot/cold threshold based on profile. This will be used for cold
+  // context profile merging/trimming.
+  computeSummaryAndThreshold();
 }
 
 void PseudoProbeCSProfileGenerator::extractProbesFromRange(

diff  --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h
index 2205f781e682..ff0116fb5c35 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.h
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.h
@@ -11,7 +11,9 @@
 #include "ErrorHandling.h"
 #include "PerfReader.h"
 #include "ProfiledBinary.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/ProfileData/SampleProfWriter.h"
+#include <memory>
 
 using namespace llvm;
 using namespace sampleprof;
@@ -179,6 +181,7 @@ class CSProfileGenerator : public ProfileGenerator {
   // Merge cold context profile whose total sample is below threshold
   // into base profile.
   void mergeAndTrimColdProfile(StringMap<FunctionSamples> &ProfileMap);
+  void computeSummaryAndThreshold();
   void write(std::unique_ptr<SampleProfileWriter> Writer,
              StringMap<FunctionSamples> &ProfileMap) override;
 
@@ -197,6 +200,9 @@ class CSProfileGenerator : public ProfileGenerator {
                                        ProfiledBinary *Binary);
   void populateInferredFunctionSamples();
 
+  // Profile summary to answer isHotCount and isColdCount queries.
+  std::unique_ptr<ProfileSummaryInfo> PSI;
+
 public:
   // Deduplicate adjacent repeated context sequences up to a given sequence
   // length. -1 means no size limit.


        


More information about the llvm-commits mailing list