[llvm] 79971d0 - [llvm-profdata] Add option to cap profile output size

William Huang via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 8 14:22:29 PST 2023


Author: William Huang
Date: 2023-02-08T22:21:33Z
New Revision: 79971d0d771a273eaf13697c3a2e55fc0194bc09

URL: https://github.com/llvm/llvm-project/commit/79971d0d771a273eaf13697c3a2e55fc0194bc09
DIFF: https://github.com/llvm/llvm-project/commit/79971d0d771a273eaf13697c3a2e55fc0194bc09.diff

LOG: [llvm-profdata] Add option to cap profile output size

D139603 (add option to llvm-profdata to reduce output profile size) contains test cases that are not cross-platform. Moving those tests to unit test and making sure the feature is callable from llvm library

Reviewed By: snehasish

Differential Revision: https://reviews.llvm.org/D141446

Added: 
    llvm/unittests/tools/llvm-profdata/CMakeLists.txt
    llvm/unittests/tools/llvm-profdata/OutputSizeLimitTest.cpp

Modified: 
    llvm/include/llvm/ProfileData/SampleProf.h
    llvm/include/llvm/ProfileData/SampleProfWriter.h
    llvm/lib/ProfileData/SampleProfWriter.cpp
    llvm/tools/llvm-profdata/llvm-profdata.cpp
    llvm/unittests/tools/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 13f0157222eca..faee9639ea860 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -427,6 +427,14 @@ class SampleRecord {
   void print(raw_ostream &OS, unsigned Indent) const;
   void dump() const;
 
+  bool operator==(const SampleRecord &Other) const {
+    return NumSamples == Other.NumSamples && CallTargets == Other.CallTargets;
+  }
+
+  bool operator!=(const SampleRecord &Other) const {
+    return !(*this == Other);
+  }
+
 private:
   uint64_t NumSamples = 0;
   CallTargetMap CallTargets;
@@ -1149,6 +1157,21 @@ class FunctionSamples {
   // all the inline instances and names of call targets.
   void findAllNames(DenseSet<StringRef> &NameSet) const;
 
+  bool operator==(const FunctionSamples &Other) const {
+    return (GUIDToFuncNameMap == Other.GUIDToFuncNameMap ||
+            (GUIDToFuncNameMap && Other.GUIDToFuncNameMap &&
+             *GUIDToFuncNameMap == *Other.GUIDToFuncNameMap)) &&
+           FunctionHash == Other.FunctionHash && Context == Other.Context &&
+           TotalSamples == Other.TotalSamples &&
+           TotalHeadSamples == Other.TotalHeadSamples &&
+           BodySamples == Other.BodySamples &&
+           CallsiteSamples == Other.CallsiteSamples;
+  }
+
+  bool operator!=(const FunctionSamples &Other) const {
+    return !(*this == Other);
+  }
+
 private:
   /// CFG hash value for the function.
   uint64_t FunctionHash = 0;

diff  --git a/llvm/include/llvm/ProfileData/SampleProfWriter.h b/llvm/include/llvm/ProfileData/SampleProfWriter.h
index b1ed0335e9c93..4edb3b049e704 100644
--- a/llvm/include/llvm/ProfileData/SampleProfWriter.h
+++ b/llvm/include/llvm/ProfileData/SampleProfWriter.h
@@ -35,6 +35,56 @@ enum SectionLayout {
   NumOfLayout,
 };
 
+/// When writing a profile with size limit, user may want to use a 
diff erent
+/// strategy to reduce function count other than dropping functions with fewest
+/// samples first. In this case a class implementing the same interfaces should
+/// be provided to SampleProfileWriter::writeWithSizeLimit().
+class FunctionPruningStrategy {
+protected:
+  SampleProfileMap &ProfileMap;
+  size_t OutputSizeLimit;
+
+public:
+  /// \p ProfileMap A reference to the original profile map. It will be modified
+  /// by Erase().
+  /// \p OutputSizeLimit Size limit in bytes of the output profile. This is
+  /// necessary to estimate how many functions to remove.
+  FunctionPruningStrategy(SampleProfileMap &ProfileMap, size_t OutputSizeLimit)
+      : ProfileMap(ProfileMap), OutputSizeLimit(OutputSizeLimit) {}
+
+  virtual ~FunctionPruningStrategy() = default;
+
+  /// SampleProfileWriter::writeWithSizeLimit() calls this after every write
+  /// iteration if the output size still exceeds the limit. This function
+  /// should erase some functions from the profile map so that the writer tries
+  /// to write the profile again with fewer functions. At least 1 entry from the
+  /// profile map must be erased.
+  ///
+  /// \p CurrentOutputSize Number of bytes in the output if current profile map
+  /// is written.
+  virtual void Erase(size_t CurrentOutputSize) = 0;
+};
+
+class DefaultFunctionPruningStrategy : public FunctionPruningStrategy {
+  std::vector<NameFunctionSamples> SortedFunctions;
+
+public:
+  DefaultFunctionPruningStrategy(SampleProfileMap &ProfileMap,
+                                 size_t OutputSizeLimit);
+
+  /// In this default implementation, functions with fewest samples are dropped
+  /// first. Since the exact size of the output cannot be easily calculated due
+  /// to compression, we use a heuristic to remove as many functions as
+  /// necessary but not too many, aiming to minimize the number of write
+  /// iterations.
+  /// Empirically, functions with larger total sample count contain linearly
+  /// more sample entries, meaning it takes linearly more space to write them.
+  /// The cumulative length is therefore quadratic if all functions are sorted
+  /// by total sample count.
+  /// TODO: Find better heuristic.
+  void Erase(size_t CurrentOutputSize) override;
+};
+
 /// Sample-based profile writer. Base class.
 class SampleProfileWriter {
 public:
@@ -50,6 +100,17 @@ class SampleProfileWriter {
   /// \returns status code of the file update operation.
   virtual std::error_code write(const SampleProfileMap &ProfileMap);
 
+  /// Write sample profiles up to given size limit, using the pruning strategy
+  /// to drop some functions if necessary.
+  ///
+  /// \returns status code of the file update operation.
+  template <typename FunctionPruningStrategy = DefaultFunctionPruningStrategy>
+  std::error_code writeWithSizeLimit(SampleProfileMap &ProfileMap,
+                                     size_t OutputSizeLimit) {
+    FunctionPruningStrategy Strategy(ProfileMap, OutputSizeLimit);
+    return writeWithSizeLimitInternal(ProfileMap, OutputSizeLimit, &Strategy);
+  }
+
   raw_ostream &getOutputStream() { return *OutputStream; }
 
   /// Profile writer factory.
@@ -79,6 +140,15 @@ class SampleProfileWriter {
   // Write function profiles to the profile file.
   virtual std::error_code writeFuncProfiles(const SampleProfileMap &ProfileMap);
 
+  std::error_code writeWithSizeLimitInternal(SampleProfileMap &ProfileMap,
+                                             size_t OutputSizeLimit,
+                                             FunctionPruningStrategy *Strategy);
+
+  /// For writeWithSizeLimit in text mode, each newline takes 1 additional byte
+  /// on Windows when actually written to the file, but not written to a memory
+  /// buffer. This needs to be accounted for when rewriting the profile.
+  size_t LineCount;
+
   /// Output stream where to emit the profile to.
   std::unique_ptr<raw_ostream> OutputStream;
 
@@ -102,6 +172,7 @@ class SampleProfileWriterText : public SampleProfileWriter {
       : SampleProfileWriter(OS), Indent(0) {}
 
   std::error_code writeHeader(const SampleProfileMap &ProfileMap) override {
+    LineCount = 0;
     return sampleprof_error::success;
   }
 

diff  --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp
index 093790afe2d66..bce858a99a819 100644
--- a/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -30,6 +30,7 @@
 #include "llvm/Support/MD5.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
+#include <cmath>
 #include <cstdint>
 #include <memory>
 #include <set>
@@ -37,9 +38,109 @@
 #include <utility>
 #include <vector>
 
+#define DEBUG_TYPE "llvm-profdata"
+
 using namespace llvm;
 using namespace sampleprof;
 
+namespace llvm {
+namespace support {
+namespace endian {
+namespace {
+
+// Adapter class to llvm::support::endian::Writer for pwrite().
+struct SeekableWriter {
+  raw_pwrite_stream &OS;
+  endianness Endian;
+  SeekableWriter(raw_pwrite_stream &OS, endianness Endian)
+      : OS(OS), Endian(Endian) {}
+
+  template <typename ValueType>
+  void pwrite(ValueType Val, size_t Offset) {
+    std::string StringBuf;
+    raw_string_ostream SStream(StringBuf);
+    Writer(SStream, Endian).write(Val);
+    OS.pwrite(StringBuf.data(), StringBuf.size(), Offset);
+  }
+};
+
+} // namespace
+} // namespace endian
+} // namespace support
+} // namespace llvm
+
+DefaultFunctionPruningStrategy::DefaultFunctionPruningStrategy(
+    SampleProfileMap &ProfileMap, size_t OutputSizeLimit)
+    : FunctionPruningStrategy(ProfileMap, OutputSizeLimit) {
+  sortFuncProfiles(ProfileMap, SortedFunctions);
+}
+
+void DefaultFunctionPruningStrategy::Erase(size_t CurrentOutputSize) {
+  double D = (double)OutputSizeLimit / CurrentOutputSize;
+  size_t NewSize = (size_t)round(ProfileMap.size() * D * D);
+  size_t NumToRemove = ProfileMap.size() - NewSize;
+  if (NumToRemove < 1)
+    NumToRemove = 1;
+
+  assert(NumToRemove <= SortedFunctions.size());
+  llvm::for_each(
+      llvm::make_range(SortedFunctions.begin() + SortedFunctions.size() -
+                           NumToRemove,
+                       SortedFunctions.end()),
+      [&](const NameFunctionSamples &E) { ProfileMap.erase(E.first); });
+  SortedFunctions.resize(SortedFunctions.size() - NumToRemove);
+}
+
+std::error_code SampleProfileWriter::writeWithSizeLimitInternal(
+    SampleProfileMap &ProfileMap, size_t OutputSizeLimit,
+    FunctionPruningStrategy *Strategy) {
+  if (OutputSizeLimit == 0)
+    return write(ProfileMap);
+
+  size_t OriginalFunctionCount = ProfileMap.size();
+
+  std::unique_ptr<raw_ostream> OriginalOutputStream;
+  OutputStream.swap(OriginalOutputStream);
+
+  size_t IterationCount = 0;
+  size_t TotalSize;
+
+  SmallVector<char> StringBuffer;
+  do {
+    StringBuffer.clear();
+    OutputStream.reset(new raw_svector_ostream(StringBuffer));
+    if (std::error_code EC = write(ProfileMap))
+      return EC;
+
+    TotalSize = StringBuffer.size();
+    // On Windows every "\n" is actually written as "\r\n" to disk but not to
+    // memory buffer, this 
diff erence should be added when considering the total
+    // output size.
+#ifdef _WIN32
+    if (Format == SPF_Text)
+      TotalSize += LineCount;
+#endif
+    if (TotalSize <= OutputSizeLimit)
+      break;
+
+    Strategy->Erase(TotalSize);
+    IterationCount++;
+  } while (ProfileMap.size() != 0);
+
+  if (ProfileMap.size() == 0)
+    return sampleprof_error::too_large;
+
+  OutputStream.swap(OriginalOutputStream);
+  OutputStream->write(StringBuffer.data(), StringBuffer.size());
+  LLVM_DEBUG(dbgs() << "Profile originally has " << OriginalFunctionCount
+                    << " functions, reduced to " << ProfileMap.size() << " in "
+                    << IterationCount << " iterations\n");
+  // Silence warning on Release build.
+  (void)OriginalFunctionCount;
+  (void)IterationCount;
+  return sampleprof_error::success;
+}
+
 std::error_code
 SampleProfileWriter::writeFuncProfiles(const SampleProfileMap &ProfileMap) {
   std::vector<NameFunctionSamples> V;
@@ -116,6 +217,12 @@ std::error_code SampleProfileWriterExtBinaryBase::addNewSection(
 
 std::error_code
 SampleProfileWriterExtBinaryBase::write(const SampleProfileMap &ProfileMap) {
+  // When calling write on a 
diff erent profile map, existing states should be
+  // cleared.
+  NameTable.clear();
+  CSNameTable.clear();
+  SecHdrTable.clear();
+
   if (std::error_code EC = writeHeader(ProfileMap))
     return EC;
 
@@ -477,6 +584,7 @@ std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) {
   if (Indent == 0)
     OS << ":" << S.getHeadSamples();
   OS << "\n";
+  LineCount++;
 
   SampleSorter<LineLocation, SampleRecord> SortedSamples(S.getBodySamples());
   for (const auto &I : SortedSamples.get()) {
@@ -493,6 +601,7 @@ std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) {
     for (const auto &J : Sample.getSortedCallTargets())
       OS << " " << J.first << ":" << J.second;
     OS << "\n";
+    LineCount++;
   }
 
   SampleSorter<LineLocation, FunctionSamplesMap> SortedCallsiteSamples(
@@ -515,11 +624,13 @@ std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) {
   if (FunctionSamples::ProfileIsProbeBased) {
     OS.indent(Indent + 1);
     OS << "!CFGChecksum: " << S.getFunctionHash() << "\n";
+    LineCount++;
   }
 
   if (S.getContext().getAllAttributes()) {
     OS.indent(Indent + 1);
     OS << "!Attributes: " << S.getContext().getAllAttributes() << "\n";
+    LineCount++;
   }
 
   return sampleprof_error::success;
@@ -605,14 +716,10 @@ std::error_code SampleProfileWriterCompactBinary::writeFuncOffsetTable() {
   auto &OS = *OutputStream;
 
   // Fill the slot remembered by TableOffset with the offset of FuncOffsetTable.
-  auto &OFS = static_cast<raw_fd_ostream &>(OS);
   uint64_t FuncOffsetTableStart = OS.tell();
-  if (OFS.seek(TableOffset) == (uint64_t)-1)
-    return sampleprof_error::ostream_seek_unsupported;
-  support::endian::Writer Writer(*OutputStream, support::little);
-  Writer.write(FuncOffsetTableStart);
-  if (OFS.seek(FuncOffsetTableStart) == (uint64_t)-1)
-    return sampleprof_error::ostream_seek_unsupported;
+  support::endian::SeekableWriter Writer(static_cast<raw_pwrite_stream &>(OS),
+                                         support::little);
+  Writer.pwrite(FuncOffsetTableStart, TableOffset);
 
   // Write out the table size.
   encodeULEB128(FuncOffsetTable.size(), OS);
@@ -623,6 +730,7 @@ std::error_code SampleProfileWriterCompactBinary::writeFuncOffsetTable() {
       return EC;
     encodeULEB128(Entry.second, OS);
   }
+  FuncOffsetTable.clear();
   return sampleprof_error::success;
 }
 
@@ -650,6 +758,10 @@ SampleProfileWriterBinary::writeMagicIdent(SampleProfileFormat Format) {
 
 std::error_code
 SampleProfileWriterBinary::writeHeader(const SampleProfileMap &ProfileMap) {
+  // When calling write on a 
diff erent profile map, existing names should be
+  // cleared.
+  NameTable.clear();
+
   writeMagicIdent(Format);
 
   computeSummary(ProfileMap);
@@ -690,14 +802,6 @@ void SampleProfileWriterExtBinaryBase::allocSecHdrTable() {
 }
 
 std::error_code SampleProfileWriterExtBinaryBase::writeSecHdrTable() {
-  auto &OFS = static_cast<raw_fd_ostream &>(*OutputStream);
-  uint64_t Saved = OutputStream->tell();
-
-  // Set OutputStream to the location saved in SecHdrTableOffset.
-  if (OFS.seek(SecHdrTableOffset) == (uint64_t)-1)
-    return sampleprof_error::ostream_seek_unsupported;
-  support::endian::Writer Writer(*OutputStream, support::little);
-
   assert(SecHdrTable.size() == SectionHdrLayout.size() &&
          "SecHdrTable entries doesn't match SectionHdrLayout");
   SmallVector<uint32_t, 16> IndexMap(SecHdrTable.size(), -1);
@@ -714,21 +818,23 @@ std::error_code SampleProfileWriterExtBinaryBase::writeSecHdrTable() {
   // needs to be computed after SecLBRProfile (the order in SecHdrTable),
   // but it needs to be read before SecLBRProfile (the order in
   // SectionHdrLayout). So we use IndexMap above to switch the order.
+  support::endian::SeekableWriter Writer(
+      static_cast<raw_pwrite_stream &>(*OutputStream), support::little);
   for (uint32_t LayoutIdx = 0; LayoutIdx < SectionHdrLayout.size();
        LayoutIdx++) {
     assert(IndexMap[LayoutIdx] < SecHdrTable.size() &&
            "Incorrect LayoutIdx in SecHdrTable");
     auto Entry = SecHdrTable[IndexMap[LayoutIdx]];
-    Writer.write(static_cast<uint64_t>(Entry.Type));
-    Writer.write(static_cast<uint64_t>(Entry.Flags));
-    Writer.write(static_cast<uint64_t>(Entry.Offset));
-    Writer.write(static_cast<uint64_t>(Entry.Size));
+    Writer.pwrite(static_cast<uint64_t>(Entry.Type),
+                  SecHdrTableOffset + 4 * LayoutIdx * sizeof(uint64_t));
+    Writer.pwrite(static_cast<uint64_t>(Entry.Flags),
+                  SecHdrTableOffset + (4 * LayoutIdx + 1) * sizeof(uint64_t));
+    Writer.pwrite(static_cast<uint64_t>(Entry.Offset),
+                  SecHdrTableOffset + (4 * LayoutIdx + 2) * sizeof(uint64_t));
+    Writer.pwrite(static_cast<uint64_t>(Entry.Size),
+                  SecHdrTableOffset + (4 * LayoutIdx + 3) * sizeof(uint64_t));
   }
 
-  // Reset OutputStream.
-  if (OFS.seek(Saved) == (uint64_t)-1)
-    return sampleprof_error::ostream_seek_unsupported;
-
   return sampleprof_error::success;
 }
 

diff  --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 72ca0891c7402..8ebbac5021be5 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -970,7 +970,7 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
                    bool UseMD5, bool GenPartialProfile, bool GenCSNestedProfile,
                    bool SampleMergeColdContext, bool SampleTrimColdContext,
                    bool SampleColdContextFrameDepth, FailureMode FailMode,
-                   bool DropProfileSymbolList) {
+                   bool DropProfileSymbolList, size_t OutputSizeLimit) {
   using namespace sampleprof;
   SampleProfileMap ProfileMap;
   SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
@@ -1064,7 +1064,10 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
   auto Buffer = getInputFileBuf(ProfileSymbolListFile);
   handleExtBinaryWriter(*Writer, OutputFormat, Buffer.get(), WriterList,
                         CompressAllSections, UseMD5, GenPartialProfile);
-  if (std::error_code EC = Writer->write(ProfileMap))
+
+  // If OutputSizeLimit is 0 (default), it is the same as write().
+  if (std::error_code EC =
+          Writer->writeWithSizeLimit(ProfileMap, OutputSizeLimit))
     exitWithErrorCode(std::move(EC));
 }
 
@@ -1207,6 +1210,11 @@ static int merge_main(int argc, const char *argv[]) {
       "sample-frame-depth-for-cold-context", cl::init(1),
       cl::desc("Keep the last K frames while merging cold profile. 1 means the "
                "context-less base profile"));
+  cl::opt<size_t> OutputSizeLimit(
+      "output-size-limit", cl::init(0), cl::Hidden,
+      cl::desc("Trim cold functions until profile size is below specified "
+               "limit in bytes. This uses a heursitic and functions may be "
+               "excessively trimmed"));
   cl::opt<bool> GenPartialProfile(
       "gen-partial-profile", cl::init(false), cl::Hidden,
       cl::desc("Generate a partial profile (only meaningful for -extbinary)"));
@@ -1293,7 +1301,8 @@ static int merge_main(int argc, const char *argv[]) {
         WeightedInputs, Remapper.get(), OutputFilename, OutputFormat,
         ProfileSymbolListFile, CompressAllSections, UseMD5, GenPartialProfile,
         GenCSNestedProfile, SampleMergeColdContext, SampleTrimColdContext,
-        SampleColdContextFrameDepth, FailureMode, DropProfileSymbolList);
+        SampleColdContextFrameDepth, FailureMode, DropProfileSymbolList,
+        OutputSizeLimit);
   return 0;
 }
 

diff  --git a/llvm/unittests/tools/CMakeLists.txt b/llvm/unittests/tools/CMakeLists.txt
index 7ef64f1180a00..e032113fa77e6 100644
--- a/llvm/unittests/tools/CMakeLists.txt
+++ b/llvm/unittests/tools/CMakeLists.txt
@@ -7,5 +7,6 @@ endif()
 add_subdirectory(
   llvm-exegesis
 )
+add_subdirectory(llvm-profdata)
 add_subdirectory(llvm-profgen)
 add_subdirectory(llvm-mca)

diff  --git a/llvm/unittests/tools/llvm-profdata/CMakeLists.txt b/llvm/unittests/tools/llvm-profdata/CMakeLists.txt
new file mode 100644
index 0000000000000..dab1ac523ed00
--- /dev/null
+++ b/llvm/unittests/tools/llvm-profdata/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(LLVM_LINK_COMPONENTS
+  ProfileData
+  Support
+  )
+
+add_llvm_unittest(LLVMProfdataTests
+    OutputSizeLimitTest.cpp
+  )
+
+target_link_libraries(LLVMProfdataTests PRIVATE LLVMTestingSupport)
+
+set_property(TARGET LLVMProfdataTests PROPERTY FOLDER "Tests/UnitTests/ToolTests")

diff  --git a/llvm/unittests/tools/llvm-profdata/OutputSizeLimitTest.cpp b/llvm/unittests/tools/llvm-profdata/OutputSizeLimitTest.cpp
new file mode 100644
index 0000000000000..c4f8248358969
--- /dev/null
+++ b/llvm/unittests/tools/llvm-profdata/OutputSizeLimitTest.cpp
@@ -0,0 +1,182 @@
+//===- llvm/unittests/tools/llvm-profdata/OutputSizeLimitTest.cpp ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ProfileData/SampleProfReader.h"
+#include "llvm/ProfileData/SampleProfWriter.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include "llvm/Testing/Support/Error.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using llvm::unittest::TempFile;
+
+std::string Input1 = R"(main:184019:0
+ 4: 534
+ 4.2: 534
+ 5: 1075
+ 5.1: 1075
+ 6: 2080
+ 7: 534
+ 9: 2064 _Z3bari:1471 _Z3fooi:631
+ 10: inline1:1000
+  1: 1000
+ 10: inline2:2000
+  1: 2000
+_Z3bari:20301:1437
+ 1: 1437
+_Z3fooi:7711:610
+ 1: 610)";
+
+const char EmptyProfile[18] = "\xff\xe5\xd0\xb1\xf4\xc9\x94\xa8\x53\x67";
+
+/// sys::fs and SampleProf mix Error and error_code, making an adapter class
+/// to keep code elegant.
+template <typename T> class ExpectedErrorOr : public Expected<T> {
+public:
+  ExpectedErrorOr(T &&Obj) : Expected<T>(Obj) {}
+
+  ExpectedErrorOr(std::error_code EC) : Expected<T>(errorCodeToError(EC)) {}
+
+  ExpectedErrorOr(Error &&E) : Expected<T>(std::move(E)) {}
+
+  template <typename U>
+  ExpectedErrorOr(ErrorOr<U> &&E)
+      : Expected<T>(errorCodeToError(E.getError())) {}
+
+  template <typename U>
+  ExpectedErrorOr(Expected<U> &&E) : Expected<T>(E.takeError()) {}
+};
+
+#define DEF_VAR_RETURN_IF_ERROR(Var, Value)                                    \
+  auto Var##OrErr = Value;                                                     \
+  if (!Var##OrErr)                                                             \
+    return Var##OrErr;                                                         \
+  auto Var = std::move(Var##OrErr.get())
+
+#define VAR_RETURN_IF_ERROR(Var, Value)                                        \
+  Var##OrErr = Value;                                                          \
+  if (!Var##OrErr)                                                             \
+    return Var##OrErr;                                                         \
+  Var = std::move(Var##OrErr.get())
+
+#define RETURN_IF_ERROR(Value)                                                 \
+  if (auto E = Value)                                                          \
+  return std::move(E)
+
+/// The main testing routine. After rewriting profiles with size limit, check
+/// the following:
+/// 1. The file size of the new profile is within the size limit.
+/// 2. The new profile is a subset of the old profile, and the content of every
+/// sample in the new profile is unchanged.
+/// Note that even though by default samples with fewest total count are dropped
+/// first, this is not a requirement. Samples can be dropped by any order.
+static ExpectedErrorOr<void *> RunTest(StringRef Input, size_t SizeLimit,
+                                       SampleProfileFormat Format) {
+  // Read Input profile.
+  auto FS = vfs::getRealFileSystem();
+  LLVMContext Context;
+  auto InputBuffer = MemoryBuffer::getMemBuffer(Input);
+  DEF_VAR_RETURN_IF_ERROR(
+      Reader, SampleProfileReader::create(InputBuffer, Context, *FS));
+  RETURN_IF_ERROR(Reader->read());
+  SampleProfileMap OldProfiles = Reader->getProfiles();
+
+  // Rewrite it to a temp file with size limit.
+  TempFile Temp("profile", "afdo", "", true);
+  bool isEmpty = false;
+  {
+    DEF_VAR_RETURN_IF_ERROR(Writer,
+                            SampleProfileWriter::create(Temp.path(), Format));
+    std::error_code EC = Writer->writeWithSizeLimit(OldProfiles, SizeLimit);
+    // too_large means no sample could be written because SizeLimit is too
+    // small. Otherwise any other error code indicates unexpected failure.
+    if (EC == sampleprof_error::too_large)
+      isEmpty = true;
+    else if (EC)
+      return EC;
+  }
+
+  // Read the temp file to get new profiles. Use the default empty profile if
+  // temp file was not written because size limit is too small.
+  SampleProfileMap NewProfiles;
+  InputBuffer = MemoryBuffer::getMemBuffer(StringRef(EmptyProfile, 17));
+  DEF_VAR_RETURN_IF_ERROR(
+      NewReader, SampleProfileReader::create(InputBuffer, Context, *FS));
+  if (!isEmpty) {
+    VAR_RETURN_IF_ERROR(NewReader, SampleProfileReader::create(
+                                       Temp.path().str(), Context, *FS));
+    RETURN_IF_ERROR(NewReader->read());
+    NewProfiles = NewReader->getProfiles();
+  }
+
+  // Check temp file is actually within size limit.
+  uint64_t FileSize;
+  RETURN_IF_ERROR(sys::fs::file_size(Temp.path(), FileSize));
+  EXPECT_LE(FileSize, SizeLimit);
+
+  // For compact binary format, function names are stored as MD5, so we cannot
+  // directly match the samples of the new profile with the old profile. A
+  // simple way is to convert the old profile to compact binary format and read
+  // it back
+  if (Format == llvm::sampleprof::SPF_Compact_Binary) {
+    TempFile CompBinary("compbinary", "afdo", "", true);
+    {
+      DEF_VAR_RETURN_IF_ERROR(
+          Writer, SampleProfileWriter::create(
+                      CompBinary.path(), llvm::sampleprof::SPF_Compact_Binary));
+      RETURN_IF_ERROR(Writer->write(OldProfiles));
+    }
+    VAR_RETURN_IF_ERROR(Reader, SampleProfileReader::create(
+                                    CompBinary.path().str(), Context, *FS));
+    RETURN_IF_ERROR(Reader->read());
+    OldProfiles = Reader->getProfiles();
+  }
+
+  // For every sample in the new profile, confirm it is in the old profile and
+  // unchanged.
+  for (auto Sample : NewProfiles) {
+    auto FindResult = OldProfiles.find(Sample.first);
+    EXPECT_NE(FindResult, OldProfiles.end());
+    if (FindResult != OldProfiles.end()) {
+      EXPECT_EQ(Sample.second.getHeadSamples(),
+                FindResult->second.getHeadSamples());
+      EXPECT_EQ(Sample.second, FindResult->second);
+    }
+  }
+  return nullptr;
+}
+
+TEST(TestOutputSizeLimit, TestOutputSizeLimitExtBinary) {
+  for (size_t OutputSizeLimit : {490, 489, 488, 475, 474, 459, 400})
+    ASSERT_THAT_EXPECTED(
+        RunTest(Input1, OutputSizeLimit, llvm::sampleprof::SPF_Ext_Binary),
+        Succeeded());
+}
+
+TEST(TestOutputSizeLimit, TestOutputSizeLimitBinary) {
+  for (size_t OutputSizeLimit : {250, 249, 248, 237, 236, 223, 200})
+    ASSERT_THAT_EXPECTED(
+        RunTest(Input1, OutputSizeLimit, llvm::sampleprof::SPF_Binary),
+        Succeeded());
+}
+
+TEST(TestOutputSizeLimit, TestOutputSizeLimitCompBinary) {
+  for (size_t OutputSizeLimit : {277, 276, 275, 264, 263, 250, 200})
+    ASSERT_THAT_EXPECTED(
+        RunTest(Input1, OutputSizeLimit, llvm::sampleprof::SPF_Compact_Binary),
+        Succeeded());
+}
+
+TEST(TestOutputSizeLimit, TestOutputSizeLimitText) {
+  for (size_t OutputSizeLimit :
+       {229, 228, 227, 213, 212, 211, 189, 188, 187, 186, 150})
+    ASSERT_THAT_EXPECTED(
+        RunTest(Input1, OutputSizeLimit, llvm::sampleprof::SPF_Text),
+        Succeeded());
+}


        


More information about the llvm-commits mailing list