[llvm] [SampleProfile] Add option to limit number of (indirect) call target and inlined callsites when reading a Sample Profile (PR #74677)

William Junda Huang via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 6 18:03:18 PST 2023


https://github.com/huangjd updated https://github.com/llvm/llvm-project/pull/74677

>From 264cab4cb58fdf7e089362db0ecf737391532fae Mon Sep 17 00:00:00 2001
From: William Huang <williamjhuang at google.com>
Date: Wed, 6 Dec 2023 23:22:15 +0000
Subject: [PATCH 1/2] [SampleProfile] Add option to limit number of (indirect)
 call target and inlined callsites when reading a Sample Profile

Sample profile generated in production environment can contain entries
with a huge amount of indirect call targets or inlined callsites due to
usages like Listener pattern, CRTP, etc. This will cause a combinatorial
blow up when constructing the call graph from the profile for inlining,
slowing down the compilation time by 10+ times.

Since we actually don't inline indirect call for more than a few call
targets, a limit is added to sample profile parsing so that it will only
keep the top N indirect call targets or inlined callsites, ranking by
sample count. Lowest is dropped first.

Use -sample-profile-call-target-max and
-sample-profile-inline-callsite-max to control the max number to kept,
default is 3.

This option also works on llvm-profdata merge, but it only controls the
input reader, not the output after merging multiple profiles
---
 llvm/include/llvm/ProfileData/SampleProf.h    |  34 +++++
 .../llvm/ProfileData/SampleProfReader.h       |  14 ++
 llvm/lib/ProfileData/SampleProfReader.cpp     |  94 +++++++++----
 llvm/lib/Transforms/IPO/SampleProfile.cpp     |  12 ++
 .../Inputs/sample-max-callsite.proftext       |  20 +++
 .../llvm-profdata/sample-max-callsite.test    | 124 ++++++++++++++++++
 llvm/tools/llvm-profdata/llvm-profdata.cpp    |  10 ++
 7 files changed, 285 insertions(+), 23 deletions(-)
 create mode 100644 llvm/test/tools/llvm-profdata/Inputs/sample-max-callsite.proftext
 create mode 100644 llvm/test/tools/llvm-profdata/sample-max-callsite.test

diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index f001f5ee9d39b..e7bbc171c98e9 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -781,6 +781,14 @@ class FunctionSamples {
         Num, Weight);
   }
 
+  SampleRecord &getOrCreateBodySample(uint32_t LineOffset,
+                                      uint32_t Discriminator,
+                                      uint64_t Num, uint64_t Weight = 1) {
+    SampleRecord &Sample = BodySamples[LineLocation(LineOffset, Discriminator)];
+    Sample.addSamples(Num, Weight);
+    return Sample;
+  }
+
   sampleprof_error addCalledTargetSamples(uint32_t LineOffset,
                                           uint32_t Discriminator,
                                           FunctionId Func,
@@ -975,6 +983,32 @@ class FunctionSamples {
     return CallsiteSamples;
   }
 
+  /// For each location with inlined function samples, if the number of
+  /// functions exceed ProfileInlineCallsiteMax, keep removing the function with
+  /// fewest total count until the number drops below ProfileInlineCallsiteMax.
+  void trimCallsiteSamples(size_t ProfileInlineCallsiteMax) {
+    for (auto &CallsiteSample : CallsiteSamples) {
+      FunctionSamplesMap &FunctionSamples = CallsiteSample.second;
+      if (ProfileInlineCallsiteMax < FunctionSamples.size()) {
+        auto It = llvm::map_range(FunctionSamples,
+                                  [](FunctionSamplesMap::value_type &V){
+                                    return V.second.getTotalSamples();
+                                  });
+        std::vector<uint64_t> TotalSamples(It.begin(), It.end());
+        std::nth_element(TotalSamples.begin(),
+                         TotalSamples.begin() + ProfileInlineCallsiteMax - 1,
+                         TotalSamples.end(), std::greater<uint64_t>());
+        uint64_t Threshold = TotalSamples[ProfileInlineCallsiteMax - 1];
+        for (auto It = FunctionSamples.begin(); It != FunctionSamples.end();) {
+          if (It->second.getTotalSamples() < Threshold)
+            It = FunctionSamples.erase(It);
+          else
+            ++It;
+        }
+      }
+    }
+  }
+
   /// Return the maximum of sample counts in a function body. When SkipCallSite
   /// is false, which is the default, the return count includes samples in the
   /// inlined functions. When SkipCallSite is true, the return count only
diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index 9e8f543909cdb..f09dea4a01e0b 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -494,6 +494,10 @@ class SampleProfileReader {
 
   void setModule(const Module *Mod) { M = Mod; }
 
+  void setProfileCallTargetMax(size_t N) { ProfileCallTargetMax = N; }
+
+  void setProfileInlineCallsiteMax(size_t N) { ProfileInlineCallsiteMax = N; }
+
 protected:
   /// Map every function to its associated profile.
   ///
@@ -552,6 +556,16 @@ class SampleProfileReader {
   /// Whether the profile uses MD5 for Sample Contexts and function names. This
   /// can be one-way overriden by the user to force use MD5.
   bool ProfileIsMD5 = false;
+
+  /// Number of call targets to keep in a sample record. Only those with highest
+  /// count are kept. 0 = unlimited.
+  /// Same as ProfileCallTargetMax option from SampleProfile.cpp.
+  uint32_t ProfileCallTargetMax = 0;
+
+  /// Number of inlined callsites to keep in a line location. Only those with
+  /// highest count are kept. 0 = unlimited.
+  /// Same as ProfileInlineCallsiteMax option from SampleProfile.cpp.
+  uint32_t ProfileInlineCallsiteMax = 0;
 };
 
 class SampleProfileReaderText : public SampleProfileReader {
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index ed92713c2c627..7b4f604d80666 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -325,6 +325,16 @@ std::error_code SampleProfileReaderText::readImpl() {
   // top-level or nested function profile.
   uint32_t DepthMetadata = 0;
 
+  // Pop inline stack until size == Depth, handle ProfileInlineCallsiteMax here
+  // because the current FunctionSamples is done adding inlined callsites.
+  auto popInlineStack = [&](uint32_t Depth) {
+    while (InlineStack.size() > Depth) {
+      if (ProfileInlineCallsiteMax != 0)
+        InlineStack.back()->trimCallsiteSamples(ProfileInlineCallsiteMax);
+      InlineStack.pop_back();
+    }
+  };
+
   ProfileIsFS = ProfileIsFSDisciminator;
   FunctionSamples::ProfileIsFS = ProfileIsFS;
   for (; !LineIt.is_at_eof(); ++LineIt) {
@@ -358,7 +368,7 @@ std::error_code SampleProfileReaderText::readImpl() {
       FunctionSamples &FProfile = Profiles.Create(FContext);
       MergeResult(Result, FProfile.addTotalSamples(NumSamples));
       MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
-      InlineStack.clear();
+      popInlineStack(0);
       InlineStack.push_back(&FProfile);
     } else {
       uint64_t NumSamples;
@@ -386,9 +396,7 @@ std::error_code SampleProfileReaderText::readImpl() {
       // Here we handle FS discriminators.
       Discriminator &= getDiscriminatorMask();
 
-      while (InlineStack.size() > Depth) {
-        InlineStack.pop_back();
-      }
+      popInlineStack(Depth);
       switch (LineTy) {
       case LineType::CallSiteProfile: {
         FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
@@ -400,15 +408,26 @@ std::error_code SampleProfileReaderText::readImpl() {
         break;
       }
       case LineType::BodyProfile: {
-        while (InlineStack.size() > Depth) {
-          InlineStack.pop_back();
-        }
         FunctionSamples &FProfile = *InlineStack.back();
-        for (const auto &name_count : TargetCountMap) {
-          MergeResult(Result, FProfile.addCalledTargetSamples(
-                                  LineOffset, Discriminator,
-                                  FunctionId(name_count.first),
-                                  name_count.second));
+        if (ProfileCallTargetMax != 0)  {
+          std::multimap<uint64_t, FunctionId> CallTargets;
+          for (const auto &CallTarget : TargetCountMap) {
+            CallTargets.emplace(CallTarget.second, CallTarget.first);
+            if (CallTargets.size() > ProfileCallTargetMax)
+              CallTargets.erase(CallTargets.begin());
+          }
+          for (const auto &CallTarget : CallTargets) {
+            MergeResult(Result, FProfile.addCalledTargetSamples(
+                                    LineOffset, Discriminator,
+                                    CallTarget.second, CallTarget.first));
+          }
+        } else {
+          for (const auto &name_count : TargetCountMap) {
+            MergeResult(Result, FProfile.addCalledTargetSamples(
+                                    LineOffset, Discriminator,
+                                    FunctionId(name_count.first),
+                                    name_count.second));
+          }
         }
         MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator,
                                                     NumSamples));
@@ -430,6 +449,7 @@ std::error_code SampleProfileReaderText::readImpl() {
       }
     }
   }
+  popInlineStack(0);
 
   assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
          "Cannot have both context-sensitive and regular profile");
@@ -604,20 +624,45 @@ SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
     // Here we handle FS discriminators:
     uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask();
 
-    for (uint32_t J = 0; J < *NumCalls; ++J) {
-      auto CalledFunction(readStringFromTable());
-      if (std::error_code EC = CalledFunction.getError())
-        return EC;
+    SampleRecord &Sample =
+        FProfile.getOrCreateBodySample(*LineOffset, DiscriminatorVal,
+                                       *NumSamples);
+
+    if (ProfileCallTargetMax != 0) {
+      // ProfileCallTargetMax is only used by SampleProfile.cpp at compilation,
+      // where the top ProfileCallTargetMax mostly called targets are kept and
+      // others are dropped.
+      std::multimap<uint64_t, FunctionId> CallTargets;
+      for (uint32_t J = 0; J < *NumCalls; ++J) {
+        auto CalledFunction(readStringFromTable());
+        if (std::error_code EC = CalledFunction.getError())
+          return EC;
 
-      auto CalledFunctionSamples = readNumber<uint64_t>();
-      if (std::error_code EC = CalledFunctionSamples.getError())
-        return EC;
+        auto CalledFunctionSamples = readNumber<uint64_t>();
+        if (std::error_code EC = CalledFunctionSamples.getError())
+          return EC;
 
-      FProfile.addCalledTargetSamples(*LineOffset, DiscriminatorVal,
-                                      *CalledFunction, *CalledFunctionSamples);
-    }
+        CallTargets.emplace(*CalledFunctionSamples, *CalledFunction);
+        if (CallTargets.size() > ProfileCallTargetMax)
+          CallTargets.erase(CallTargets.begin());
+      }
+
+      for (auto & CallTarget : CallTargets) {
+        Sample.addCalledTarget(CallTarget.second, CallTarget.first);
+      }
+    } else {
+      for (uint32_t J = 0; J < *NumCalls; ++J) {
+        auto CalledFunction(readStringFromTable());
+        if (std::error_code EC = CalledFunction.getError())
+          return EC;
 
-    FProfile.addBodySamples(*LineOffset, DiscriminatorVal, *NumSamples);
+        auto CalledFunctionSamples = readNumber<uint64_t>();
+        if (std::error_code EC = CalledFunctionSamples.getError())
+          return EC;
+
+        Sample.addCalledTarget(*CalledFunction, *CalledFunctionSamples);
+      }
+    }
   }
 
   // Read all the samples for inlined function calls.
@@ -648,6 +693,9 @@ SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
       return EC;
   }
 
+  if (ProfileInlineCallsiteMax != 0)
+    FProfile.trimCallsiteSamples(ProfileInlineCallsiteMax);
+
   return sampleprof_error::success;
 }
 
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 6c6f0a0eca72a..e6d87a6b3d65a 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -213,6 +213,16 @@ cl::opt<int> ProfileInlineLimitMax(
     cl::desc("The upper bound of size growth limit for "
              "proirity-based sample profile loader inlining."));
 
+static cl::opt<uint32_t> ProfileCallTargetMax(
+    "sample-profile-call-target-max", cl::Hidden, cl::init(3),
+    cl::desc("In a sample record, only keep top N frequent indirect call "
+             "targets at the same location."));
+
+static cl::opt<uint32_t> ProfileInlineCallsiteMax(
+    "sample-profile-inline-callsite-max", cl::Hidden, cl::init(3),
+    cl::desc("In an inlined callsite map, only keep top N frequently inlined "
+             "callsites at the same location."));
+
 cl::opt<int> SampleHotCallSiteThreshold(
     "sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000),
     cl::desc("Hot callsite threshold for proirity-based sample profile loader "
@@ -2015,6 +2025,8 @@ bool SampleProfileLoader::doInitialization(Module &M,
   // set module before reading the profile so reader may be able to only
   // read the function profiles which are used by the current module.
   Reader->setModule(&M);
+  Reader->setProfileCallTargetMax(ProfileCallTargetMax);
+  Reader->setProfileInlineCallsiteMax(ProfileInlineCallsiteMax);
   if (std::error_code EC = Reader->read()) {
     std::string Msg = "profile reading failed: " + EC.message();
     Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
diff --git a/llvm/test/tools/llvm-profdata/Inputs/sample-max-callsite.proftext b/llvm/test/tools/llvm-profdata/Inputs/sample-max-callsite.proftext
new file mode 100644
index 0000000000000..c59e87ec254dc
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/Inputs/sample-max-callsite.proftext
@@ -0,0 +1,20 @@
+main:184019:0
+ 4: 534
+ 4.2: 534
+ 5: 1075
+ 5.1: 1075
+ 6: 2080
+ 7: 534 _Z3bazi:1
+ 9: 2064 _Z3bari:1471 _Z3fooi:631 _Z3gooi:123 _Z3hooi:999
+ 10: inline1:1000
+  1: 1000
+  2: 1001 func1:10 func2:30 func3:20
+ 10: inline2:2000
+  1: 2000
+  3: inlineinline1:3
+  3: inlineinline2:2
+  3: inlineinline3:1
+ 10: inline3:45
+ 10.1: inline4:1
+_Z3bari:20301:1437
+ 1: 1437
diff --git a/llvm/test/tools/llvm-profdata/sample-max-callsite.test b/llvm/test/tools/llvm-profdata/sample-max-callsite.test
new file mode 100644
index 0000000000000..723f6df75548b
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/sample-max-callsite.test
@@ -0,0 +1,124 @@
+# Test options sample-profile-call-target-max and
+# sample-profile-inline-callsite-max in llvm-profdata. Same options are
+# available in clang when compiling using a sample profile.
+
+RUN: llvm-profdata merge -text -sample -sample-profile-call-target-max=3 %p/Inputs/sample-max-callsite.proftext | FileCheck %s --check-prefix=SHOW30
+RUN: llvm-profdata merge -text -sample -sample-profile-call-target-max=2 %p/Inputs/sample-max-callsite.proftext | FileCheck %s --check-prefix=SHOW20
+RUN: llvm-profdata merge -text -sample -sample-profile-inline-callsite-max=2 %p/Inputs/sample-max-callsite.proftext | FileCheck %s --check-prefix=SHOW02
+RUN: llvm-profdata merge -text -sample -sample-profile-inline-callsite-max=1 %p/Inputs/sample-max-callsite.proftext | FileCheck %s --check-prefix=SHOW01
+RUN: llvm-profdata merge -text -sample -sample-profile-call-target-max=1 -sample-profile-inline-callsite-max=1 %p/Inputs/sample-max-callsite.proftext | FileCheck %s --check-prefix=SHOW11
+
+RUN: llvm-profdata merge -text -sample -sample-profile-call-target-max=0 -sample-profile-inline-callsite-max=0 %p/Inputs/sample-max-callsite.proftext | FileCheck %s --check-prefix=SHOW
+RUN: llvm-profdata merge -text -sample -sample-profile-call-target-max=999 -sample-profile-inline-callsite-max=999 %p/Inputs/sample-max-callsite.proftext  | FileCheck %s --check-prefix=SHOW
+
+SHOW30: main:184019:0
+SHOW30-NEXT:  4: 534
+SHOW30-NEXT:  4.2: 534
+SHOW30-NEXT:  5: 1075
+SHOW30-NEXT:  5.1: 1075
+SHOW30-NEXT:  6: 2080
+SHOW30-NEXT:  7: 534 _Z3bazi:1
+SHOW30-NEXT:  9: 2064 _Z3bari:1471 _Z3hooi:999 _Z3fooi:631
+SHOW30-NEXT:  10: inline1:1000
+SHOW30-NEXT:   1: 1000
+SHOW30-NEXT:   2: 1001 func2:30 func3:20 func1:10
+SHOW30-NEXT:  10: inline2:2000
+SHOW30-NEXT:   1: 2000
+SHOW30-NEXT:   3: inlineinline1:3
+SHOW30-NEXT:   3: inlineinline2:2
+SHOW30-NEXT:   3: inlineinline3:1
+SHOW30-NEXT:  10: inline3:45
+SHOW30-NEXT:  10.1: inline4:1
+SHOW30-NEXT: _Z3bari:20301:1437
+SHOW30-NEXT:  1: 1437
+
+SHOW20: main:184019:0
+SHOW20-NEXT:  4: 534
+SHOW20-NEXT:  4.2: 534
+SHOW20-NEXT:  5: 1075
+SHOW20-NEXT:  5.1: 1075
+SHOW20-NEXT:  6: 2080
+SHOW20-NEXT:  7: 534 _Z3bazi:1
+SHOW20-NEXT:  9: 2064 _Z3bari:1471 _Z3hooi:999
+SHOW20-NEXT:  10: inline1:1000
+SHOW20-NEXT:   1: 1000
+SHOW20-NEXT:   2: 1001 func2:30 func3:20
+SHOW20-NEXT:  10: inline2:2000
+SHOW20-NEXT:   1: 2000
+SHOW20-NEXT:   3: inlineinline1:3
+SHOW20-NEXT:   3: inlineinline2:2
+SHOW20-NEXT:   3: inlineinline3:1
+SHOW20-NEXT:  10: inline3:45
+SHOW20-NEXT:  10.1: inline4:1
+SHOW20-NEXT: _Z3bari:20301:1437
+SHOW20-NEXT:  1: 1437
+
+SHOW02: main:184019:0
+SHOW02-NEXT:  4: 534
+SHOW02-NEXT:  4.2: 534
+SHOW02-NEXT:  5: 1075
+SHOW02-NEXT:  5.1: 1075
+SHOW02-NEXT:  6: 2080
+SHOW02-NEXT:  7: 534 _Z3bazi:1
+SHOW02-NEXT:  9: 2064 _Z3bari:1471 _Z3hooi:999 _Z3fooi:631 _Z3gooi:123
+SHOW02-NEXT:  10: inline1:1000
+SHOW02-NEXT:   1: 1000
+SHOW02-NEXT:   2: 1001 func2:30 func3:20 func1:10
+SHOW02-NEXT:  10: inline2:2000
+SHOW02-NEXT:   1: 2000
+SHOW02-NEXT:   3: inlineinline1:3
+SHOW02-NEXT:   3: inlineinline2:2
+SHOW02-NEXT:  10.1: inline4:1
+SHOW02-NEXT: _Z3bari:20301:1437
+SHOW02-NEXT:  1: 1437
+
+SHOW01: main:184019:0
+SHOW01-NEXT:  4: 534
+SHOW01-NEXT:  4.2: 534
+SHOW01-NEXT:  5: 1075
+SHOW01-NEXT:  5.1: 1075
+SHOW01-NEXT:  6: 2080
+SHOW01-NEXT:  7: 534 _Z3bazi:1
+SHOW01-NEXT:  9: 2064 _Z3bari:1471 _Z3hooi:999 _Z3fooi:631 _Z3gooi:123
+SHOW01-NEXT:  10: inline2:2000
+SHOW01-NEXT:   1: 2000
+SHOW01-NEXT:   3: inlineinline1:3
+SHOW01-NEXT:  10.1: inline4:1
+SHOW01-NEXT: _Z3bari:20301:1437
+SHOW01-NEXT:  1: 1437
+
+SHOW11: main:184019:0
+SHOW11-NEXT:  4: 534
+SHOW11-NEXT:  4.2: 534
+SHOW11-NEXT:  5: 1075
+SHOW11-NEXT:  5.1: 1075
+SHOW11-NEXT:  6: 2080
+SHOW11-NEXT:  7: 534 _Z3bazi:1
+SHOW11-NEXT:  9: 2064 _Z3bari:1471
+SHOW11-NEXT:  10: inline2:2000
+SHOW11-NEXT:   1: 2000
+SHOW11-NEXT:   3: inlineinline1:3
+SHOW11-NEXT:  10.1: inline4:1
+SHOW11-NEXT: _Z3bari:20301:1437
+SHOW11-NEXT:  1: 1437
+
+SHOW: main:184019:0
+SHOW-NEXT:  4: 534
+SHOW-NEXT:  4.2: 534
+SHOW-NEXT:  5: 1075
+SHOW-NEXT:  5.1: 1075
+SHOW-NEXT:  6: 2080
+SHOW-NEXT:  7: 534 _Z3bazi:1
+SHOW-NEXT:  9: 2064 _Z3bari:1471 _Z3hooi:999 _Z3fooi:631 _Z3gooi:123
+SHOW-NEXT:  10: inline1:1000
+SHOW-NEXT:   1: 1000
+SHOW-NEXT:   2: 1001 func2:30 func3:20 func1:10
+SHOW-NEXT:  10: inline2:2000
+SHOW-NEXT:   1: 2000
+SHOW-NEXT:   3: inlineinline1:3
+SHOW-NEXT:   3: inlineinline2:2
+SHOW-NEXT:   3: inlineinline3:1
+SHOW-NEXT:  10: inline3:45
+SHOW-NEXT:  10.1: inline4:1
+SHOW-NEXT: _Z3bari:20301:1437
+SHOW-NEXT:  1: 1437
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 63e34d81f1892..5a89185465336 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -175,6 +175,14 @@ cl::opt<bool> CompressAllSections(
     cl::sub(MergeSubcommand),
     cl::desc("Compress all sections when writing the profile (only "
              "meaningful for -extbinary)"));
+cl::opt<uint32_t> ProfileCallTargetMax(
+    "sample-profile-call-target-max", cl::Hidden, cl::init(0),
+    cl::desc("While reading a profile, in a sample record, only keep top N "
+             "frequent indirect call targets at the same location."));
+cl::opt<uint32_t> ProfileInlineCallsiteMax(
+    "sample-profile-inline-callsite-max", cl::Hidden, cl::init(0),
+    cl::desc("While reading a profile, in an inlined callsite map, only keep "
+             "top N frequently inlined callsites at the same location."));
 cl::opt<bool> SampleMergeColdContext(
     "sample-merge-cold-context", cl::init(false), cl::Hidden,
     cl::sub(MergeSubcommand),
@@ -1377,6 +1385,8 @@ static void mergeSampleProfile(const WeightedFileVector &Inputs,
     // merged profile map.
     Readers.push_back(std::move(ReaderOrErr.get()));
     const auto Reader = Readers.back().get();
+    Reader->setProfileCallTargetMax(ProfileCallTargetMax);
+    Reader->setProfileInlineCallsiteMax(ProfileInlineCallsiteMax);
     if (std::error_code EC = Reader->read()) {
       warnOrExitGivenError(FailMode, EC, Input.Filename);
       Readers.pop_back();

>From d032a7311bebdc9435cdd4a090c2ab17a3441fda Mon Sep 17 00:00:00 2001
From: William Huang <williamjhuang at google.com>
Date: Thu, 7 Dec 2023 02:03:01 +0000
Subject: [PATCH 2/2] Format

---
 llvm/include/llvm/ProfileData/SampleProf.h |  6 +++---
 llvm/lib/ProfileData/SampleProfReader.cpp  | 17 ++++++++---------
 2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index e7bbc171c98e9..08647f5cfb6dd 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -782,8 +782,8 @@ class FunctionSamples {
   }
 
   SampleRecord &getOrCreateBodySample(uint32_t LineOffset,
-                                      uint32_t Discriminator,
-                                      uint64_t Num, uint64_t Weight = 1) {
+                                      uint32_t Discriminator, uint64_t Num,
+                                      uint64_t Weight = 1) {
     SampleRecord &Sample = BodySamples[LineLocation(LineOffset, Discriminator)];
     Sample.addSamples(Num, Weight);
     return Sample;
@@ -991,7 +991,7 @@ class FunctionSamples {
       FunctionSamplesMap &FunctionSamples = CallsiteSample.second;
       if (ProfileInlineCallsiteMax < FunctionSamples.size()) {
         auto It = llvm::map_range(FunctionSamples,
-                                  [](FunctionSamplesMap::value_type &V){
+                                  [](FunctionSamplesMap::value_type &V) {
                                     return V.second.getTotalSamples();
                                   });
         std::vector<uint64_t> TotalSamples(It.begin(), It.end());
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index 7b4f604d80666..515478db3198c 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -409,7 +409,7 @@ std::error_code SampleProfileReaderText::readImpl() {
       }
       case LineType::BodyProfile: {
         FunctionSamples &FProfile = *InlineStack.back();
-        if (ProfileCallTargetMax != 0)  {
+        if (ProfileCallTargetMax != 0) {
           std::multimap<uint64_t, FunctionId> CallTargets;
           for (const auto &CallTarget : TargetCountMap) {
             CallTargets.emplace(CallTarget.second, CallTarget.first);
@@ -423,10 +423,10 @@ std::error_code SampleProfileReaderText::readImpl() {
           }
         } else {
           for (const auto &name_count : TargetCountMap) {
-            MergeResult(Result, FProfile.addCalledTargetSamples(
-                                    LineOffset, Discriminator,
-                                    FunctionId(name_count.first),
-                                    name_count.second));
+            MergeResult(Result,
+                        FProfile.addCalledTargetSamples(
+                            LineOffset, Discriminator,
+                            FunctionId(name_count.first), name_count.second));
           }
         }
         MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator,
@@ -624,9 +624,8 @@ SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
     // Here we handle FS discriminators:
     uint32_t DiscriminatorVal = (*Discriminator) & getDiscriminatorMask();
 
-    SampleRecord &Sample =
-        FProfile.getOrCreateBodySample(*LineOffset, DiscriminatorVal,
-                                       *NumSamples);
+    SampleRecord &Sample = FProfile.getOrCreateBodySample(
+        *LineOffset, DiscriminatorVal, *NumSamples);
 
     if (ProfileCallTargetMax != 0) {
       // ProfileCallTargetMax is only used by SampleProfile.cpp at compilation,
@@ -647,7 +646,7 @@ SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
           CallTargets.erase(CallTargets.begin());
       }
 
-      for (auto & CallTarget : CallTargets) {
+      for (auto &CallTarget : CallTargets) {
         Sample.addCalledTarget(CallTarget.second, CallTarget.first);
       }
     } else {



More information about the llvm-commits mailing list