[llvm] 5b72d0e - [llvm-profdata] Add option to cap profile output size

Mon Jan 9 14:01:20 PST 2023

Author: William Huang
Date: 2023-01-09T22:01:10Z
New Revision: 5b72d0e4f5eeb8f90c744cac8e0728cffeca61a9

URL: https://github.com/llvm/llvm-project/commit/5b72d0e4f5eeb8f90c744cac8e0728cffeca61a9
DIFF: https://github.com/llvm/llvm-project/commit/5b72d0e4f5eeb8f90c744cac8e0728cffeca61a9.diff

LOG: [llvm-profdata] Add option to cap profile output size

Allow user to specify `--output-size-limit=n` to cap the size of generated profile to be strictly under n. Functions with the lowest total sample count are dropped first if necessary. Due to using a heuristic, excessive functions may be dropped to satisfy the size requirement

Reviewed By: snehasish

Differential Revision: https://reviews.llvm.org/D139603

Added: 
    llvm/test/tools/llvm-profdata/output-size-limit.test

Modified: 
    llvm/include/llvm/ProfileData/SampleProfWriter.h
    llvm/lib/ProfileData/SampleProfWriter.cpp
    llvm/tools/llvm-profdata/llvm-profdata.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ProfileData/SampleProfWriter.h b/llvm/include/llvm/ProfileData/SampleProfWriter.h
index b1ed0335e9c93..506c902f3d129 100644

--- a/llvm/include/llvm/ProfileData/SampleProfWriter.h
+++ b/llvm/include/llvm/ProfileData/SampleProfWriter.h
@@ -35,6 +35,56 @@ enum SectionLayout {
   NumOfLayout,
 };
 
+/// When writing a profile with size limit, user may want to use a 
diff erent
+/// strategy to reduce function count other than dropping functions with fewest
+/// samples first. In this case a class implementing the same interfaces should
+/// be provided to SampleProfileWriter::writeWithSizeLimit().
+class FunctionPruningStrategy {
+protected:
+  SampleProfileMap &ProfileMap;
+  size_t OutputSizeLimit;
+
+public:
+  /// \p ProfileMap A reference to the original profile map. It will be modified
+  /// by Erase().
+  /// \p OutputSizeLimit Size limit in bytes of the output profile. This is
+  /// necessary to estimate how many functions to remove.
+  FunctionPruningStrategy(SampleProfileMap &ProfileMap, size_t OutputSizeLimit)
+      : ProfileMap(ProfileMap), OutputSizeLimit(OutputSizeLimit) {}
+
+  virtual ~FunctionPruningStrategy() = default;
+
+  /// SampleProfileWriter::writeWithSizeLimit() calls this after every write
+  /// iteration if the output size still exceeds the limit. This function
+  /// should erase some functions from the profile map so that the writer tries
+  /// to write the profile again with fewer functions. At least 1 entry from the
+  /// profile map must be erased.
+  ///
+  /// \p CurrentOutputSize Number of bytes in the output if current profile map
+  /// is written.
+  virtual void Erase(size_t CurrentOutputSize) = 0;
+};
+
+class DefaultFunctionPruningStrategy : public FunctionPruningStrategy {
+  std::vector<NameFunctionSamples> SortedFunctions;
+
+public:
+  DefaultFunctionPruningStrategy(SampleProfileMap &ProfileMap,
+                                 size_t OutputSizeLimit);
+
+  /// In this default implementation, functions with fewest samples are dropped
+  /// first. Since the exact size of the output cannot be easily calculated due
+  /// to compression, we use a heuristic to remove as many functions as
+  /// necessary but not too many, aiming to minimize the number of write
+  /// iterations.
+  /// Empirically, functions with larger total sample count contain linearly
+  /// more sample entries, meaning it takes linearly more space to write them.
+  /// The cumulative length is therefore quadratic if all functions are sorted
+  /// by total sample count.
+  /// TODO: Find better heuristic.
+  void Erase(size_t CurrentOutputSize) override;
+};
+
 /// Sample-based profile writer. Base class.
 class SampleProfileWriter {
 public:
@@ -50,6 +100,17 @@ class SampleProfileWriter {
   /// \returns status code of the file update operation.
   virtual std::error_code write(const SampleProfileMap &ProfileMap);
 
+  /// Write sample profiles up to given size limit, using the pruning strategy
+  /// to drop some functions if necessary.
+  ///
+  /// \returns status code of the file update operation.
+  template <typename FunctionPruningStrategy = DefaultFunctionPruningStrategy>
+  std::error_code writeWithSizeLimit(SampleProfileMap &ProfileMap,
+                                     size_t OutputSizeLimit) {
+    FunctionPruningStrategy Strategy(ProfileMap, OutputSizeLimit);
+    return writeWithSizeLimitInternal(ProfileMap, OutputSizeLimit, &Strategy);
+  }
+
   raw_ostream &getOutputStream() { return *OutputStream; }
 
   /// Profile writer factory.
@@ -79,6 +140,10 @@ class SampleProfileWriter {
   // Write function profiles to the profile file.
   virtual std::error_code writeFuncProfiles(const SampleProfileMap &ProfileMap);
 
+  std::error_code writeWithSizeLimitInternal(SampleProfileMap &ProfileMap,
+                                             size_t OutputSizeLimit,
+                                             FunctionPruningStrategy *Strategy);
+
   /// Output stream where to emit the profile to.
   std::unique_ptr<raw_ostream> OutputStream;
 

diff  --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp
index 093790afe2d66..e52c7bac70311 100644
--- a/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -30,6 +30,7 @@
 #include "llvm/Support/MD5.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
+#include <cmath>
 #include <cstdint>
 #include <memory>
 #include <set>
@@ -37,9 +38,96 @@
 #include <utility>
 #include <vector>
 
+#define DEBUG_TYPE "llvm-profdata"
+
 using namespace llvm;
 using namespace sampleprof;
 
+namespace llvm {
+namespace support {
+namespace endian {
+namespace {
+
+// Adapter class to llvm::support::endian::Writer for pwrite().
+struct SeekableWriter {
+  raw_pwrite_stream &OS;
+  endianness Endian;
+  SeekableWriter(raw_pwrite_stream &OS, endianness Endian)
+      : OS(OS), Endian(Endian) {}
+
+  template <typename ValueType>
+  void pwrite(ValueType Val, size_t Offset) {
+    std::string StringBuf;
+    raw_string_ostream SStream(StringBuf);
+    Writer(SStream, Endian).write(Val);
+    OS.pwrite(StringBuf.data(), StringBuf.size(), Offset);
+  }
+};
+
+} // namespace
+} // namespace endian
+} // namespace support
+} // namespace llvm
+
+DefaultFunctionPruningStrategy::DefaultFunctionPruningStrategy(
+    SampleProfileMap &ProfileMap, size_t OutputSizeLimit)
+    : FunctionPruningStrategy(ProfileMap, OutputSizeLimit) {
+  sortFuncProfiles(ProfileMap, SortedFunctions);
+}
+
+void DefaultFunctionPruningStrategy::Erase(size_t CurrentOutputSize) {
+  double D = (double)OutputSizeLimit / CurrentOutputSize;
+  size_t NewSize = (size_t)round(ProfileMap.size() * D * D);
+  size_t NumToRemove = ProfileMap.size() - NewSize;
+  if (NumToRemove < 1)
+    NumToRemove = 1;
+
+  assert(NumToRemove <= SortedFunctions.size());
+  llvm::for_each(
+      llvm::make_range(SortedFunctions.begin() + SortedFunctions.size() -
+                           NumToRemove,
+                       SortedFunctions.end()),
+      [&](const NameFunctionSamples &E) { ProfileMap.erase(E.first); });
+  SortedFunctions.resize(SortedFunctions.size() - NumToRemove);
+}
+
+std::error_code SampleProfileWriter::writeWithSizeLimitInternal(
+    SampleProfileMap &ProfileMap, size_t OutputSizeLimit,
+    FunctionPruningStrategy *Strategy) {
+  if (OutputSizeLimit == 0)
+    return write(ProfileMap);
+
+  size_t OriginalFunctionCount = ProfileMap.size();
+
+  SmallVector<char> StringBuffer;
+  std::unique_ptr<raw_ostream> BufferStream(
+      new raw_svector_ostream(StringBuffer));
+  OutputStream.swap(BufferStream);
+
+  if (std::error_code EC = write(ProfileMap))
+    return EC;
+  size_t IterationCount = 0;
+  while (StringBuffer.size() > OutputSizeLimit) {
+    Strategy->Erase(StringBuffer.size());
+
+    if (ProfileMap.size() == 0)
+      return sampleprof_error::too_large;
+
+    StringBuffer.clear();
+    OutputStream.reset(new raw_svector_ostream(StringBuffer));
+    if (std::error_code EC = write(ProfileMap))
+      return EC;
+    IterationCount++;
+  }
+
+  OutputStream.swap(BufferStream);
+  OutputStream->write(StringBuffer.data(), StringBuffer.size());
+  LLVM_DEBUG(dbgs() << "Profile originally has " << OriginalFunctionCount
+                    << " functions, reduced to " << ProfileMap.size() << " in "
+                    << IterationCount << " iterations\n");
+  return sampleprof_error::success;
+}
+
 std::error_code
 SampleProfileWriter::writeFuncProfiles(const SampleProfileMap &ProfileMap) {
   std::vector<NameFunctionSamples> V;
@@ -116,6 +204,12 @@ std::error_code SampleProfileWriterExtBinaryBase::addNewSection(
 
 std::error_code
 SampleProfileWriterExtBinaryBase::write(const SampleProfileMap &ProfileMap) {
+  // When calling write on a 
diff erent profile map, existing states should be
+  // cleared.
+  NameTable.clear();
+  CSNameTable.clear();
+  SecHdrTable.clear();
+
   if (std::error_code EC = writeHeader(ProfileMap))
     return EC;
 
@@ -605,14 +699,10 @@ std::error_code SampleProfileWriterCompactBinary::writeFuncOffsetTable() {
   auto &OS = *OutputStream;
 
   // Fill the slot remembered by TableOffset with the offset of FuncOffsetTable.
-  auto &OFS = static_cast<raw_fd_ostream &>(OS);
   uint64_t FuncOffsetTableStart = OS.tell();
-  if (OFS.seek(TableOffset) == (uint64_t)-1)
-    return sampleprof_error::ostream_seek_unsupported;
-  support::endian::Writer Writer(*OutputStream, support::little);
-  Writer.write(FuncOffsetTableStart);
-  if (OFS.seek(FuncOffsetTableStart) == (uint64_t)-1)
-    return sampleprof_error::ostream_seek_unsupported;
+  support::endian::SeekableWriter Writer(static_cast<raw_pwrite_stream &>(OS),
+                                         support::little);
+  Writer.pwrite(FuncOffsetTableStart, TableOffset);
 
   // Write out the table size.
   encodeULEB128(FuncOffsetTable.size(), OS);
@@ -650,6 +740,10 @@ SampleProfileWriterBinary::writeMagicIdent(SampleProfileFormat Format) {
 
 std::error_code
 SampleProfileWriterBinary::writeHeader(const SampleProfileMap &ProfileMap) {
+  // When calling write on a 
diff erent profile map, existing names should be
+  // cleared.
+  NameTable.clear();
+
   writeMagicIdent(Format);
 
   computeSummary(ProfileMap);
@@ -690,14 +784,6 @@ void SampleProfileWriterExtBinaryBase::allocSecHdrTable() {
 }
 
 std::error_code SampleProfileWriterExtBinaryBase::writeSecHdrTable() {
-  auto &OFS = static_cast<raw_fd_ostream &>(*OutputStream);
-  uint64_t Saved = OutputStream->tell();
-
-  // Set OutputStream to the location saved in SecHdrTableOffset.
-  if (OFS.seek(SecHdrTableOffset) == (uint64_t)-1)
-    return sampleprof_error::ostream_seek_unsupported;
-  support::endian::Writer Writer(*OutputStream, support::little);
-
   assert(SecHdrTable.size() == SectionHdrLayout.size() &&
          "SecHdrTable entries doesn't match SectionHdrLayout");
   SmallVector<uint32_t, 16> IndexMap(SecHdrTable.size(), -1);
@@ -714,21 +800,23 @@ std::error_code SampleProfileWriterExtBinaryBase::writeSecHdrTable() {
   // needs to be computed after SecLBRProfile (the order in SecHdrTable),
   // but it needs to be read before SecLBRProfile (the order in
   // SectionHdrLayout). So we use IndexMap above to switch the order.
+  support::endian::SeekableWriter Writer(
+      static_cast<raw_pwrite_stream &>(*OutputStream), support::little);
   for (uint32_t LayoutIdx = 0; LayoutIdx < SectionHdrLayout.size();
        LayoutIdx++) {
     assert(IndexMap[LayoutIdx] < SecHdrTable.size() &&
            "Incorrect LayoutIdx in SecHdrTable");
     auto Entry = SecHdrTable[IndexMap[LayoutIdx]];
-    Writer.write(static_cast<uint64_t>(Entry.Type));
-    Writer.write(static_cast<uint64_t>(Entry.Flags));
-    Writer.write(static_cast<uint64_t>(Entry.Offset));
-    Writer.write(static_cast<uint64_t>(Entry.Size));
+    Writer.pwrite(static_cast<uint64_t>(Entry.Type),
+                  SecHdrTableOffset + 4 * LayoutIdx * sizeof(uint64_t));
+    Writer.pwrite(static_cast<uint64_t>(Entry.Flags),
+                  SecHdrTableOffset + (4 * LayoutIdx + 1) * sizeof(uint64_t));
+    Writer.pwrite(static_cast<uint64_t>(Entry.Offset),
+                  SecHdrTableOffset + (4 * LayoutIdx + 2) * sizeof(uint64_t));
+    Writer.pwrite(static_cast<uint64_t>(Entry.Size),
+                  SecHdrTableOffset + (4 * LayoutIdx + 3) * sizeof(uint64_t));
   }
 
-  // Reset OutputStream.
-  if (OFS.seek(Saved) == (uint64_t)-1)
-    return sampleprof_error::ostream_seek_unsupported;
-
   return sampleprof_error::success;
 }
 

diff  --git a/llvm/test/tools/llvm-profdata/output-size-limit.test b/llvm/test/tools/llvm-profdata/output-size-limit.test
new file mode 100644
index 0000000000000..bdecae0149c29
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/output-size-limit.test
@@ -0,0 +1,119 @@
+Tests for output-size-limit option. Functions with least sample count are dropped.
+
+1- No effect if output size limit >= original size
+RUN: llvm-profdata merge --sample --text --output-size-limit=212 %p/Inputs/sample-profile.proftext | FileCheck %s --check-prefix=TEST_TEXT1
+TEST_TEXT1: main:184019:0
+TEST_TEXT1-NEXT:  4: 534
+TEST_TEXT1-NEXT:  4.2: 534
+TEST_TEXT1-NEXT:  5: 1075
+TEST_TEXT1-NEXT:  5.1: 1075
+TEST_TEXT1-NEXT:  6: 2080
+TEST_TEXT1-NEXT:  7: 534
+TEST_TEXT1-NEXT:  9: 2064 _Z3bari:1471 _Z3fooi:631
+TEST_TEXT1-NEXT:  10: inline1:1000
+TEST_TEXT1-NEXT:   1: 1000
+TEST_TEXT1-NEXT:  10: inline2:2000
+TEST_TEXT1-NEXT:   1: 2000
+TEST_TEXT1-NEXT: _Z3bari:20301:1437
+TEST_TEXT1-NEXT:  1: 1437
+TEST_TEXT1-NEXT: _Z3fooi:7711:610
+TEST_TEXT1-NEXT:  1: 610
+
+2- 1 function dropped
+RUN: llvm-profdata merge --sample --text --output-size-limit=211 %p/Inputs/sample-profile.proftext | FileCheck %s --check-prefix=TEST_TEXT2
+RUN: llvm-profdata merge --sample --text --output-size-limit=187 %p/Inputs/sample-profile.proftext | FileCheck %s --check-prefix=TEST_TEXT2
+TEST_TEXT2: main:184019:0
+TEST_TEXT2-NEXT:  4: 534
+TEST_TEXT2-NEXT:  4.2: 534
+TEST_TEXT2-NEXT:  5: 1075
+TEST_TEXT2-NEXT:  5.1: 1075
+TEST_TEXT2-NEXT:  6: 2080
+TEST_TEXT2-NEXT:  7: 534
+TEST_TEXT2-NEXT:  9: 2064 _Z3bari:1471 _Z3fooi:631
+TEST_TEXT2-NEXT:  10: inline1:1000
+TEST_TEXT2-NEXT:   1: 1000
+TEST_TEXT2-NEXT:  10: inline2:2000
+TEST_TEXT2-NEXT:   1: 2000
+TEST_TEXT2-NEXT: _Z3bari:20301:1437
+TEST_TEXT2-NEXT:  1: 1437
+
+3- 2 functions dropped
+RUN: llvm-profdata merge --sample --text --output-size-limit=170 %p/Inputs/sample-profile.proftext | FileCheck %s --check-prefix=TEST_TEXT3
+TEST_TEXT3: main:184019:0
+TEST_TEXT3-NEXT:  4: 534
+TEST_TEXT3-NEXT:  4.2: 534
+TEST_TEXT3-NEXT:  5: 1075
+TEST_TEXT3-NEXT:  5.1: 1075
+TEST_TEXT3-NEXT:  6: 2080
+TEST_TEXT3-NEXT:  7: 534
+TEST_TEXT3-NEXT:  9: 2064 _Z3bari:1471 _Z3fooi:631
+TEST_TEXT3-NEXT:  10: inline1:1000
+TEST_TEXT3-NEXT:   1: 1000
+TEST_TEXT3-NEXT:  10: inline2:2000
+TEST_TEXT3-NEXT:   1: 2000
+
+4- All functions dropped, should report an error
+RUN: not llvm-profdata merge --sample --text --output-size-limit=158 %p/Inputs/sample-profile.proftext 2>&1 | FileCheck %s --check-prefix=INVALID1
+INVALID1: error: Too much profile data
+
+5- ExtBinary form, no function dropped. Check output size and file content converted back to text
+RUN: llvm-profdata merge --sample --extbinary --output-size-limit=489  %p/Inputs/sample-profile.proftext -o %t.output
+RUN: test $(stat -c %%s %t.output) -le 489
+RUN: llvm-profdata merge --sample --text %t.output | FileCheck %s --check-prefix=TEST_EXTBINARY1
+TEST_EXTBINARY1: main:184019:0
+TEST_EXTBINARY1-NEXT:  4: 534
+TEST_EXTBINARY1-NEXT:  4.2: 534
+TEST_EXTBINARY1-NEXT:  5: 1075
+TEST_EXTBINARY1-NEXT:  5.1: 1075
+TEST_EXTBINARY1-NEXT:  6: 2080
+TEST_EXTBINARY1-NEXT:  7: 534
+TEST_EXTBINARY1-NEXT:  9: 2064 _Z3bari:1471 _Z3fooi:631
+TEST_EXTBINARY1-NEXT:  10: inline1:1000
+TEST_EXTBINARY1-NEXT:   1: 1000
+TEST_EXTBINARY1-NEXT:  10: inline2:2000
+TEST_EXTBINARY1-NEXT:   1: 2000
+TEST_EXTBINARY1-NEXT: _Z3bari:20301:1437
+TEST_EXTBINARY1-NEXT:  1: 1437
+TEST_EXTBINARY1-NEXT: _Z3fooi:7711:610
+TEST_EXTBINARY1-NEXT:  1: 610
+
+6- ExtBinary form, 1 function dropped
+RUN: llvm-profdata merge --sample --extbinary --output-size-limit=488  %p/Inputs/sample-profile.proftext -o %t.output
+RUN: test $(stat -c %%s %t.output) -le 488
+RUN: llvm-profdata merge --sample --text %t.output | FileCheck %s --check-prefix=TEST_EXTBINARY2
+TEST_EXTBINARY2: main:184019:0
+TEST_EXTBINARY2-NEXT:  4: 534
+TEST_EXTBINARY2-NEXT:  4.2: 534
+TEST_EXTBINARY2-NEXT:  5: 1075
+TEST_EXTBINARY2-NEXT:  5.1: 1075
+TEST_EXTBINARY2-NEXT:  6: 2080
+TEST_EXTBINARY2-NEXT:  7: 534
+TEST_EXTBINARY2-NEXT:  9: 2064 _Z3bari:1471 _Z3fooi:631
+TEST_EXTBINARY2-NEXT:  10: inline1:1000
+TEST_EXTBINARY2-NEXT:   1: 1000
+TEST_EXTBINARY2-NEXT:  10: inline2:2000
+TEST_EXTBINARY2-NEXT:   1: 2000
+TEST_EXTBINARY2-NEXT: _Z3bari:20301:1437
+TEST_EXTBINARY2-NEXT:  1: 1437
+
+7- ExtBinary form, 2 functions dropped
+RUN: llvm-profdata merge --sample --extbinary --output-size-limit=474  %p/Inputs/sample-profile.proftext -o %t.output
+RUN: test $(stat -c %%s %t.output) -le 474
+RUN: llvm-profdata merge --sample --text %t.output | FileCheck %s --check-prefix=TEST_EXTBINARY3
+TEST_EXTBINARY3: main:184019:0
+TEST_EXTBINARY3-NEXT:  4: 534
+TEST_EXTBINARY3-NEXT:  4.2: 534
+TEST_EXTBINARY3-NEXT:  5: 1075
+TEST_EXTBINARY3-NEXT:  5.1: 1075
+TEST_EXTBINARY3-NEXT:  6: 2080
+TEST_EXTBINARY3-NEXT:  7: 534
+TEST_EXTBINARY3-NEXT:  9: 2064 _Z3bari:1471 _Z3fooi:631
+TEST_EXTBINARY3-NEXT:  10: inline1:1000
+TEST_EXTBINARY3-NEXT:   1: 1000
+TEST_EXTBINARY3-NEXT:  10: inline2:2000
+TEST_EXTBINARY3-NEXT:   1: 2000
+
+8- ExtBinary form, all functions dropped
+RUN: not llvm-profdata merge --sample --extbinary --output-size-limit=400  %p/Inputs/sample-profile.proftext 2>&1 | FileCheck %s --check-prefix=INVALID2
+INVALID2: error: Too much profile data
+

diff  --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 90c9e560d47c7..0b7d9341b611c 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -967,7 +967,7 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
                    bool UseMD5, bool GenPartialProfile, bool GenCSNestedProfile,
                    bool SampleMergeColdContext, bool SampleTrimColdContext,
                    bool SampleColdContextFrameDepth, FailureMode FailMode,
-                   bool DropProfileSymbolList) {
+                   bool DropProfileSymbolList, size_t OutputSizeLimit) {
   using namespace sampleprof;
   SampleProfileMap ProfileMap;
   SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
@@ -1049,6 +1049,13 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
     ProfileIsCS = FunctionSamples::ProfileIsCS = false;
   }
 
+  // If limiting the output size, write to a string buffer first, and drop
+  // functions if the output size exceeds limit. This iterates multiple times
+  // until the limit is satisfied.
+  SmallVector<char> StringBuffer;
+  std::unique_ptr<raw_ostream> BufferStream(
+      new raw_svector_ostream(StringBuffer));
+
   auto WriterOrErr =
       SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]);
   if (std::error_code EC = WriterOrErr.getError())
@@ -1060,7 +1067,9 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
   auto Buffer = getInputFileBuf(ProfileSymbolListFile);
   handleExtBinaryWriter(*Writer, OutputFormat, Buffer.get(), WriterList,
                         CompressAllSections, UseMD5, GenPartialProfile);
-  if (std::error_code EC = Writer->write(ProfileMap))
+
+  if (std::error_code EC =
+          Writer->writeWithSizeLimit(ProfileMap, OutputSizeLimit))
     exitWithErrorCode(std::move(EC));
 }
 
@@ -1203,6 +1212,11 @@ static int merge_main(int argc, const char *argv[]) {
       "sample-frame-depth-for-cold-context", cl::init(1),
       cl::desc("Keep the last K frames while merging cold profile. 1 means the "
                "context-less base profile"));
+  cl::opt<size_t> OutputSizeLimit(
+      "output-size-limit", cl::init(0), cl::Hidden,
+      cl::desc("Trim cold functions until profile size is below specified "
+               "limit in bytes. This uses a heursitic and functions may be "
+               "excessively trimmed"));
   cl::opt<bool> GenPartialProfile(
       "gen-partial-profile", cl::init(false), cl::Hidden,
       cl::desc("Generate a partial profile (only meaningful for -extbinary)"));
@@ -1289,7 +1303,8 @@ static int merge_main(int argc, const char *argv[]) {
         WeightedInputs, Remapper.get(), OutputFilename, OutputFormat,
         ProfileSymbolListFile, CompressAllSections, UseMD5, GenPartialProfile,
         GenCSNestedProfile, SampleMergeColdContext, SampleTrimColdContext,
-        SampleColdContextFrameDepth, FailureMode, DropProfileSymbolList);
+        SampleColdContextFrameDepth, FailureMode, DropProfileSymbolList,
+        OutputSizeLimit);
   return 0;
 }