[llvm] ebad678 - [SampleFDO] Port MD5 name table support to extbinary format.

Wei Mi via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 30 22:07:45 PDT 2020


Author: Wei Mi
Date: 2020-03-30T22:07:08-07:00
New Revision: ebad678857a94c32ce7b6931e9c642b32d278b67

URL: https://github.com/llvm/llvm-project/commit/ebad678857a94c32ce7b6931e9c642b32d278b67
DIFF: https://github.com/llvm/llvm-project/commit/ebad678857a94c32ce7b6931e9c642b32d278b67.diff

LOG: [SampleFDO] Port MD5 name table support to extbinary format.

Compbinary format uses MD5 to represent strings in name table. That gives smaller profile without the need of compression/decompression when writing/reading the profile. The patch adds the support in extbinary format. It is off by default but user can choose to enable it.

Note the feature of using MD5 in name table can bring very small chance of name conflict leading to profile mismatch. Besides, profile using the feature won't have the profile remapping support.

Differential Revision: https://reviews.llvm.org/D76255

Added: 
    llvm/test/Transforms/SampleProfile/Inputs/inline.md5extbinary.afdo

Modified: 
    llvm/include/llvm/ProfileData/SampleProf.h
    llvm/include/llvm/ProfileData/SampleProfReader.h
    llvm/include/llvm/ProfileData/SampleProfWriter.h
    llvm/lib/ProfileData/SampleProf.cpp
    llvm/lib/ProfileData/SampleProfReader.cpp
    llvm/lib/ProfileData/SampleProfWriter.cpp
    llvm/lib/Transforms/IPO/SampleProfile.cpp
    llvm/test/Transforms/SampleProfile/profile-format.ll
    llvm/test/tools/llvm-profdata/roundtrip.test
    llvm/tools/llvm-profdata/llvm-profdata.cpp
    llvm/unittests/ProfileData/SampleProfTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 4019fe2549be..7b1086c74919 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -101,14 +101,14 @@ static inline uint64_t SPMagic(SampleProfileFormat Format = SPF_Binary) {
          uint64_t('2') << (64 - 56) | uint64_t(Format);
 }
 
-// Get the proper representation of a string in the input Format.
-static inline StringRef getRepInFormat(StringRef Name,
-                                       SampleProfileFormat Format,
+/// Get the proper representation of a string according to whether the
+/// current Format uses MD5 to represent the string.
+static inline StringRef getRepInFormat(StringRef Name, bool UseMD5,
                                        std::string &GUIDBuf) {
   if (Name.empty())
     return Name;
   GUIDBuf = std::to_string(Function::getGUID(Name));
-  return (Format == SPF_Compact_Binary) ? StringRef(GUIDBuf) : Name;
+  return UseMD5 ? StringRef(GUIDBuf) : Name;
 }
 
 static inline uint64_t SPVersion() { return 103; }
@@ -154,18 +154,64 @@ struct SecHdrTableEntry {
   uint64_t Size;
 };
 
-enum SecFlags { SecFlagInValid = 0, SecFlagCompress = (1 << 0) };
+// Flags common for all sections are defined here. In SecHdrTableEntry::Flags,
+// common flags will be saved in the lower 32bits and section specific flags
+// will be saved in the higher 32 bits.
+enum class SecCommonFlags : uint32_t {
+  SecFlagInValid = 0,
+  SecFlagCompress = (1 << 0)
+};
+
+// Section specific flags are defined here.
+// !!!Note: Everytime a new enum class is created here, please add
+// a new check in verifySecFlag.
+enum class SecNameTableFlags : uint32_t {
+  SecFlagInValid = 0,
+  SecFlagMD5Name = (1 << 0)
+};
+
+// Verify section specific flag is used for the correct section.
+template <class SecFlagType>
+static inline void verifySecFlag(SecType Type, SecFlagType Flag) {
+  // No verification is needed for common flags.
+  if (std::is_same<SecCommonFlags, SecFlagType>())
+    return;
+
+  // Verification starts here for section specific flag.
+  bool IsFlagLegal = false;
+  switch (Type) {
+  case SecNameTable:
+    IsFlagLegal = std::is_same<SecNameTableFlags, SecFlagType>();
+    break;
+  default:
+    break;
+  }
+  if (!IsFlagLegal)
+    llvm_unreachable("Misuse of a flag in an incompatible section");
+}
 
-static inline void addSecFlags(SecHdrTableEntry &Entry, uint64_t Flags) {
-  Entry.Flags |= Flags;
+template <class SecFlagType>
+static inline void addSecFlag(SecHdrTableEntry &Entry, SecFlagType Flag) {
+  verifySecFlag(Entry.Type, Flag);
+  auto FVal = static_cast<uint64_t>(Flag);
+  bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>();
+  Entry.Flags |= IsCommon ? FVal : (FVal << 32);
 }
 
-static inline void removeSecFlags(SecHdrTableEntry &Entry, uint64_t Flags) {
-  Entry.Flags &= ~Flags;
+template <class SecFlagType>
+static inline void removeSecFlag(SecHdrTableEntry &Entry, SecFlagType Flag) {
+  verifySecFlag(Entry.Type, Flag);
+  auto FVal = static_cast<uint64_t>(Flag);
+  bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>();
+  Entry.Flags &= ~(IsCommon ? FVal : (FVal << 32));
 }
 
-static inline bool hasSecFlag(SecHdrTableEntry &Entry, SecFlags Flag) {
-  return Entry.Flags & Flag;
+template <class SecFlagType>
+static inline bool hasSecFlag(const SecHdrTableEntry &Entry, SecFlagType Flag) {
+  verifySecFlag(Entry.Type, Flag);
+  auto FVal = static_cast<uint64_t>(Flag);
+  bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>();
+  return Entry.Flags & (IsCommon ? FVal : (FVal << 32));
 }
 
 /// Represents the relative location of an instruction.
@@ -379,7 +425,7 @@ class FunctionSamples {
   const FunctionSamples *findFunctionSamplesAt(const LineLocation &Loc,
                                                StringRef CalleeName) const {
     std::string CalleeGUID;
-    CalleeName = getRepInFormat(CalleeName, Format, CalleeGUID);
+    CalleeName = getRepInFormat(CalleeName, UseMD5, CalleeGUID);
 
     auto iter = CallsiteSamples.find(Loc);
     if (iter == CallsiteSamples.end())
@@ -527,13 +573,13 @@ class FunctionSamples {
   }
 
   /// Translate \p Name into its original name in Module.
-  /// When the Format is not SPF_Compact_Binary, \p Name needs no translation.
-  /// When the Format is SPF_Compact_Binary, \p Name in current FunctionSamples
+  /// When profile doesn't use MD5, \p Name needs no translation.
+  /// When profile uses MD5, \p Name in current FunctionSamples
   /// is actually GUID of the original function name. getNameInModule will
   /// translate \p Name in current FunctionSamples into its original name.
   /// If the original name doesn't exist in \p M, return empty StringRef.
   StringRef getNameInModule(StringRef Name, const Module *M) const {
-    if (Format != SPF_Compact_Binary)
+    if (!UseMD5)
       return Name;
 
     assert(GUIDToFuncNameMap && "GUIDToFuncNameMap needs to be popluated first");
@@ -560,16 +606,18 @@ class FunctionSamples {
 
   static SampleProfileFormat Format;
 
+  /// Whether the profile uses MD5 to represent string.
+  static bool UseMD5;
+
   /// GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for
   /// all the function symbols defined or declared in current module.
   DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap = nullptr;
 
   // Assume the input \p Name is a name coming from FunctionSamples itself.
-  // If the format is SPF_Compact_Binary, the name is already a GUID and we
+  // If UseMD5 is true, the name is already a GUID and we
   // don't want to return the GUID of GUID.
   static uint64_t getGUID(StringRef Name) {
-    return (Format == SPF_Compact_Binary) ? std::stoull(Name.data())
-                                          : Function::getGUID(Name);
+    return UseMD5 ? std::stoull(Name.data()) : Function::getGUID(Name);
   }
 
 private:

diff  --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index 72b178edc260..23a2519354c8 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -335,6 +335,7 @@ class SampleProfileReader {
       return EC;
     if (Remapper)
       Remapper->applyRemapping(Ctx);
+    FunctionSamples::UseMD5 = useMD5();
     return sampleprof_error::success;
   }
 
@@ -363,7 +364,7 @@ class SampleProfileReader {
   FunctionSamples *getOrCreateSamplesFor(const Function &F) {
     std::string FGUID;
     StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
-    CanonName = getRepInFormat(CanonName, getFormat(), FGUID);
+    CanonName = getRepInFormat(CanonName, useMD5(), FGUID);
     return &Profiles[CanonName];
   }
 
@@ -374,7 +375,7 @@ class SampleProfileReader {
         return FS;
     }
     std::string FGUID;
-    Fname = getRepInFormat(Fname, getFormat(), FGUID);
+    Fname = getRepInFormat(Fname, useMD5(), FGUID);
     auto It = Profiles.find(Fname);
     if (It != Profiles.end())
       return &It->second;
@@ -419,6 +420,9 @@ class SampleProfileReader {
   virtual std::vector<StringRef> *getNameTable() { return nullptr; }
   virtual bool dumpSectionInfo(raw_ostream &OS = dbgs()) { return false; };
 
+  /// Return whether names in the profile are all MD5 numbers.
+  virtual bool useMD5() { return false; }
+
 protected:
   /// Map every function to its associated profile.
   ///
@@ -590,7 +594,7 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
   virtual std::error_code readHeader() override;
   virtual std::error_code verifySPMagic(uint64_t Magic) override = 0;
   virtual std::error_code readOneSection(const uint8_t *Start, uint64_t Size,
-                                         SecType Type) = 0;
+                                         const SecHdrTableEntry &Entry) = 0;
 
 public:
   SampleProfileReaderExtBinaryBase(std::unique_ptr<MemoryBuffer> B,
@@ -610,11 +614,14 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
 class SampleProfileReaderExtBinary : public SampleProfileReaderExtBinaryBase {
 private:
   virtual std::error_code verifySPMagic(uint64_t Magic) override;
-  virtual std::error_code readOneSection(const uint8_t *Start, uint64_t Size,
-                                         SecType Type) override;
+  virtual std::error_code
+  readOneSection(const uint8_t *Start, uint64_t Size,
+                 const SecHdrTableEntry &Entry) override;
   std::error_code readProfileSymbolList();
   std::error_code readFuncOffsetTable();
   std::error_code readFuncProfiles();
+  std::error_code readMD5NameTable();
+  std::error_code readNameTableSec(bool IsMD5);
 
   /// The table mapping from function name to the offset of its FunctionSample
   /// towards file start.
@@ -624,6 +631,15 @@ class SampleProfileReaderExtBinary : public SampleProfileReaderExtBinaryBase {
   /// Use all functions from the input profile.
   bool UseAllFuncs = true;
 
+  /// If MD5 is used in NameTable section, the section saves uint64_t data.
+  /// The uint64_t data has to be converted to a string and then the string
+  /// will be used to initialize StringRef in NameTable.
+  /// Note NameTable contains StringRef so it needs another buffer to own
+  /// the string data. MD5StringBuf serves as the string buffer that is
+  /// referenced by NameTable (vector of StringRef). We make sure
+  /// the lifetime of MD5StringBuf is not shorter than that of NameTable.
+  std::unique_ptr<std::vector<std::string>> MD5StringBuf;
+
 public:
   SampleProfileReaderExtBinary(std::unique_ptr<MemoryBuffer> B, LLVMContext &C,
                                SampleProfileFormat Format = SPF_Ext_Binary)
@@ -638,6 +654,12 @@ class SampleProfileReaderExtBinary : public SampleProfileReaderExtBinaryBase {
 
   /// Collect functions with definitions in Module \p M.
   void collectFuncsFrom(const Module &M) override;
+
+  /// Return whether names in the profile are all MD5 numbers.
+  virtual bool useMD5() {
+    assert(!NameTable.empty() && "NameTable should have been initialized");
+    return MD5StringBuf && !MD5StringBuf->empty();
+  }
 };
 
 class SampleProfileReaderCompactBinary : public SampleProfileReaderBinary {
@@ -671,6 +693,9 @@ class SampleProfileReaderCompactBinary : public SampleProfileReaderBinary {
 
   /// Collect functions to be used when compiling Module \p M.
   void collectFuncsFrom(const Module &M) override;
+
+  /// Return whether names in the profile are all MD5 numbers.
+  virtual bool useMD5() { return true; }
 };
 
 using InlineCallStack = SmallVector<FunctionSamples *, 10>;

diff  --git a/llvm/include/llvm/ProfileData/SampleProfWriter.h b/llvm/include/llvm/ProfileData/SampleProfWriter.h
index 5814f69fdcab..54500f513ad1 100644
--- a/llvm/include/llvm/ProfileData/SampleProfWriter.h
+++ b/llvm/include/llvm/ProfileData/SampleProfWriter.h
@@ -57,6 +57,8 @@ class SampleProfileWriter {
   create(std::unique_ptr<raw_ostream> &OS, SampleProfileFormat Format);
 
   virtual void setProfileSymbolList(ProfileSymbolList *PSL) {}
+  virtual void setToCompressAllSections() {}
+  virtual void setUseMD5() {}
 
 protected:
   SampleProfileWriter(std::unique_ptr<raw_ostream> &OS)
@@ -147,12 +149,20 @@ class SampleProfileWriterExtBinaryBase : public SampleProfileWriterBinary {
   virtual std::error_code
   write(const StringMap<FunctionSamples> &ProfileMap) override;
 
-  void setToCompressAllSections();
+  virtual void setToCompressAllSections() override;
   void setToCompressSection(SecType Type);
 
 protected:
   uint64_t markSectionStart(SecType Type);
   std::error_code addNewSection(SecType Sec, uint64_t SectionStart);
+  template <class SecFlagType>
+  void addSectionFlag(SecType Type, SecFlagType Flag) {
+    for (auto &Entry : SectionHdrLayout) {
+      if (Entry.Type == Type)
+        addSecFlag(Entry, Flag);
+    }
+  }
+
   virtual void initSectionHdrLayout() = 0;
   virtual std::error_code
   writeSections(const StringMap<FunctionSamples> &ProfileMap) = 0;
@@ -168,7 +178,6 @@ class SampleProfileWriterExtBinaryBase : public SampleProfileWriterBinary {
   std::error_code writeSecHdrTable();
   virtual std::error_code
   writeHeader(const StringMap<FunctionSamples> &ProfileMap) override;
-  void addSectionFlags(SecType Type, SecFlags Flags);
   SecHdrTableEntry &getEntryInLayout(SecType Type);
   std::error_code compressAndOutput();
 
@@ -202,6 +211,12 @@ class SampleProfileWriterExtBinary : public SampleProfileWriterExtBinaryBase {
     ProfSymList = PSL;
   };
 
+  // Set to use MD5 to represent string in NameTable.
+  virtual void setUseMD5() override {
+    UseMD5 = true;
+    addSectionFlag(SecNameTable, SecNameTableFlags::SecFlagMD5Name);
+  }
+
 private:
   virtual void initSectionHdrLayout() override {
     // Note that SecFuncOffsetTable section is written after SecLBRProfile
@@ -222,6 +237,10 @@ class SampleProfileWriterExtBinary : public SampleProfileWriterExtBinaryBase {
   };
   virtual std::error_code
   writeSections(const StringMap<FunctionSamples> &ProfileMap) override;
+
+  std::error_code writeFuncOffsetTable();
+  virtual std::error_code writeNameTable() override;
+
   ProfileSymbolList *ProfSymList = nullptr;
 
   // Save the start of SecLBRProfile so we can compute the offset to the
@@ -231,7 +250,8 @@ class SampleProfileWriterExtBinary : public SampleProfileWriterExtBinaryBase {
   // FuncOffsetTable maps function name to its profile offset in SecLBRProfile
   // section. It is used to load function profile on demand.
   MapVector<StringRef, uint64_t> FuncOffsetTable;
-  std::error_code writeFuncOffsetTable();
+  // Whether to use MD5 to represent string.
+  bool UseMD5 = false;
 };
 
 // CompactBinary is a compact format of binary profile which both reduces

diff  --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp
index 003e8d4d4296..e5d0fdba5fc4 100644
--- a/llvm/lib/ProfileData/SampleProf.cpp
+++ b/llvm/lib/ProfileData/SampleProf.cpp
@@ -30,6 +30,7 @@ using namespace sampleprof;
 namespace llvm {
 namespace sampleprof {
 SampleProfileFormat FunctionSamples::Format;
+bool FunctionSamples::UseMD5;
 } // namespace sampleprof
 } // namespace llvm
 

diff  --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index 6f74cec07b96..33efb51dca07 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -470,18 +470,18 @@ std::error_code SampleProfileReaderBinary::readImpl() {
   return sampleprof_error::success;
 }
 
-std::error_code
-SampleProfileReaderExtBinary::readOneSection(const uint8_t *Start,
-                                             uint64_t Size, SecType Type) {
+std::error_code SampleProfileReaderExtBinary::readOneSection(
+    const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) {
   Data = Start;
   End = Start + Size;
-  switch (Type) {
+  switch (Entry.Type) {
   case SecProfSummary:
     if (std::error_code EC = readSummary())
       return EC;
     break;
   case SecNameTable:
-    if (std::error_code EC = readNameTable())
+    if (std::error_code EC = readNameTableSec(
+            hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name)))
       return EC;
     break;
   case SecLBRProfile:
@@ -546,15 +546,28 @@ std::error_code SampleProfileReaderExtBinary::readFuncProfiles() {
     }
   }
 
-  for (auto NameOffset : FuncOffsetTable) {
-    auto FuncName = NameOffset.first;
-    if (!FuncsToUse.count(FuncName) &&
-        (!Remapper || !Remapper->exist(FuncName)))
-      continue;
-    const uint8_t *FuncProfileAddr = Start + NameOffset.second;
-    assert(FuncProfileAddr < End && "out of LBRProfile section");
-    if (std::error_code EC = readFuncProfile(FuncProfileAddr))
-      return EC;
+  if (useMD5()) {
+    for (auto Name : FuncsToUse) {
+      auto GUID = std::to_string(MD5Hash(Name));
+      auto iter = FuncOffsetTable.find(StringRef(GUID));
+      if (iter == FuncOffsetTable.end())
+        continue;
+      const uint8_t *FuncProfileAddr = Start + iter->second;
+      assert(FuncProfileAddr < End && "out of LBRProfile section");
+      if (std::error_code EC = readFuncProfile(FuncProfileAddr))
+        return EC;
+    }
+  } else {
+    for (auto NameOffset : FuncOffsetTable) {
+      auto FuncName = NameOffset.first;
+      if (!FuncsToUse.count(FuncName) &&
+          (!Remapper || !Remapper->exist(FuncName)))
+        continue;
+      const uint8_t *FuncProfileAddr = Start + NameOffset.second;
+      assert(FuncProfileAddr < End && "out of LBRProfile section");
+      if (std::error_code EC = readFuncProfile(FuncProfileAddr))
+        return EC;
+    }
   }
 
   Data = End;
@@ -617,7 +630,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readImpl() {
     // DecompressBuf before reading the actual data. The pointee of
     // 'Data' will be changed to buffer hold by DecompressBuf
     // temporarily when reading the actual data.
-    bool isCompressed = hasSecFlag(Entry, SecFlagCompress);
+    bool isCompressed = hasSecFlag(Entry, SecCommonFlags::SecFlagCompress);
     if (isCompressed) {
       const uint8_t *DecompressBuf;
       uint64_t DecompressBufSize;
@@ -628,7 +641,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readImpl() {
       SecSize = DecompressBufSize;
     }
 
-    if (std::error_code EC = readOneSection(SecStart, SecSize, Entry.Type))
+    if (std::error_code EC = readOneSection(SecStart, SecSize, Entry))
       return EC;
     if (Data != SecStart + SecSize)
       return sampleprof_error::malformed;
@@ -705,6 +718,31 @@ std::error_code SampleProfileReaderBinary::readNameTable() {
   return sampleprof_error::success;
 }
 
+std::error_code SampleProfileReaderExtBinary::readMD5NameTable() {
+  auto Size = readNumber<uint64_t>();
+  if (std::error_code EC = Size.getError())
+    return EC;
+  NameTable.reserve(*Size);
+  MD5StringBuf = std::make_unique<std::vector<std::string>>();
+  MD5StringBuf->reserve(*Size);
+  for (uint32_t I = 0; I < *Size; ++I) {
+    auto FID = readNumber<uint64_t>();
+    if (std::error_code EC = FID.getError())
+      return EC;
+    MD5StringBuf->push_back(std::to_string(*FID));
+    // NameTable is a vector of StringRef. Here it is pushing back a
+    // StringRef initialized with the last string in MD5stringBuf.
+    NameTable.push_back(MD5StringBuf->back());
+  }
+  return sampleprof_error::success;
+}
+
+std::error_code SampleProfileReaderExtBinary::readNameTableSec(bool IsMD5) {
+  if (IsMD5)
+    return readMD5NameTable();
+  return SampleProfileReaderBinary::readNameTable();
+}
+
 std::error_code SampleProfileReaderCompactBinary::readNameTable() {
   auto Size = readNumber<uint64_t>();
   if (std::error_code EC = Size.getError())
@@ -1210,9 +1248,9 @@ bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) {
 }
 
 void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) {
-  // If the reader is in compact format, we can't remap it because
+  // If the reader uses MD5 to represent string, we can't remap it because
   // we don't know what the original function names were.
-  if (Reader.getFormat() == SPF_Compact_Binary) {
+  if (Reader.useMD5()) {
     Ctx.diagnose(DiagnosticInfoSampleProfile(
         Reader.getBuffer()->getBufferIdentifier(),
         "Profile data remapping cannot be applied to profile data "

diff  --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp
index 8d09af31f94b..48d3faa6cd2f 100644
--- a/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -87,7 +87,7 @@ uint64_t SampleProfileWriterExtBinaryBase::markSectionStart(SecType Type) {
   uint64_t SectionStart = OutputStream->tell();
   auto &Entry = getEntryInLayout(Type);
   // Use LocalBuf as a temporary output for writting data.
-  if (hasSecFlag(Entry, SecFlagCompress))
+  if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress))
     LocalBufStream.swap(OutputStream);
   return SectionStart;
 }
@@ -117,7 +117,7 @@ std::error_code
 SampleProfileWriterExtBinaryBase::addNewSection(SecType Type,
                                                 uint64_t SectionStart) {
   auto Entry = getEntryInLayout(Type);
-  if (hasSecFlag(Entry, SecFlagCompress)) {
+  if (hasSecFlag(Entry, SecCommonFlags::SecFlagCompress)) {
     LocalBufStream.swap(OutputStream);
     if (std::error_code EC = compressAndOutput())
       return EC;
@@ -166,6 +166,22 @@ std::error_code SampleProfileWriterExtBinary::writeFuncOffsetTable() {
   return sampleprof_error::success;
 }
 
+std::error_code SampleProfileWriterExtBinary::writeNameTable() {
+  if (!UseMD5)
+    return SampleProfileWriterBinary::writeNameTable();
+
+  auto &OS = *OutputStream;
+  std::set<StringRef> V;
+  stablizeNameTable(V);
+
+  // Write out the name table.
+  encodeULEB128(NameTable.size(), OS);
+  for (auto N : V) {
+    encodeULEB128(MD5Hash(N), OS);
+  }
+  return sampleprof_error::success;
+}
+
 std::error_code SampleProfileWriterExtBinary::writeSections(
     const StringMap<FunctionSamples> &ProfileMap) {
   uint64_t SectionStart = markSectionStart(SecProfSummary);
@@ -390,19 +406,11 @@ std::error_code SampleProfileWriterBinary::writeHeader(
 
 void SampleProfileWriterExtBinaryBase::setToCompressAllSections() {
   for (auto &Entry : SectionHdrLayout)
-    addSecFlags(Entry, SecFlagCompress);
+    addSecFlag(Entry, SecCommonFlags::SecFlagCompress);
 }
 
 void SampleProfileWriterExtBinaryBase::setToCompressSection(SecType Type) {
-  addSectionFlags(Type, SecFlagCompress);
-}
-
-void SampleProfileWriterExtBinaryBase::addSectionFlags(SecType Type,
-                                                       SecFlags Flags) {
-  for (auto &Entry : SectionHdrLayout) {
-    if (Entry.Type == Type)
-      addSecFlags(Entry, Flags);
-  }
+  addSectionFlag(Type, SecCommonFlags::SecFlagCompress);
 }
 
 void SampleProfileWriterExtBinaryBase::allocSecHdrTable() {

diff  --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index b5b834030a38..9c1369fdcf99 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -236,7 +236,7 @@ class GUIDToFuncNameMapper {
                         DenseMap<uint64_t, StringRef> &GUIDToFuncNameMap)
       : CurrentReader(Reader), CurrentModule(M),
       CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
-    if (CurrentReader.getFormat() != SPF_Compact_Binary)
+    if (!CurrentReader.useMD5())
       return;
 
     for (const auto &F : CurrentModule) {
@@ -262,7 +262,7 @@ class GUIDToFuncNameMapper {
   }
 
   ~GUIDToFuncNameMapper() {
-    if (CurrentReader.getFormat() != SPF_Compact_Binary)
+    if (!CurrentReader.useMD5())
       return;
 
     CurrentGUIDToFuncNameMap.clear();

diff  --git a/llvm/test/Transforms/SampleProfile/Inputs/inline.md5extbinary.afdo b/llvm/test/Transforms/SampleProfile/Inputs/inline.md5extbinary.afdo
new file mode 100644
index 000000000000..c827d1b825f4
Binary files /dev/null and b/llvm/test/Transforms/SampleProfile/Inputs/inline.md5extbinary.afdo 
diff er

diff  --git a/llvm/test/Transforms/SampleProfile/profile-format.ll b/llvm/test/Transforms/SampleProfile/profile-format.ll
index 4f7ae0a06a41..c42fb5a5e0e5 100644
--- a/llvm/test/Transforms/SampleProfile/profile-format.ll
+++ b/llvm/test/Transforms/SampleProfile/profile-format.ll
@@ -4,6 +4,8 @@
 ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.compactbinary.afdo -S | FileCheck %s
 ; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.extbinary.afdo -S | FileCheck %s
 ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.extbinary.afdo -S | FileCheck %s
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.md5extbinary.afdo -S | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.md5extbinary.afdo -S | FileCheck %s
 
 ; Original C++ test case
 ;

diff  --git a/llvm/test/tools/llvm-profdata/roundtrip.test b/llvm/test/tools/llvm-profdata/roundtrip.test
index 7af76e0a5822..ce20d633a74a 100644
--- a/llvm/test/tools/llvm-profdata/roundtrip.test
+++ b/llvm/test/tools/llvm-profdata/roundtrip.test
@@ -16,3 +16,11 @@ RUN: llvm-profdata merge --sample --binary -output=%t.4.profdata %S/Inputs/sampl
 RUN: llvm-profdata merge --sample --extbinary -output=%t.5.profdata %t.4.profdata
 RUN: llvm-profdata merge --sample --text -output=%t.4.proftext %t.5.profdata
 RUN: 
diff  -b %t.4.proftext %S/Inputs/sample-profile.proftext
+# Trip from text --> extbinary --> md5text
+# Trip from text --> compbinary --> md5text
+# Compare the two md5 texts
+RUN: llvm-profdata merge --sample --compbinary -output=%t.6.profdata %S/Inputs/sample-profile.proftext
+RUN: llvm-profdata merge --sample --text -output=%t.6.proftext %S/Inputs/sample-profile.proftext
+RUN: llvm-profdata merge --sample --extbinary -use-md5 -output=%t.7.profdata %S/Inputs/sample-profile.proftext
+RUN: llvm-profdata merge --sample --text -output=%t.7.proftext %S/Inputs/sample-profile.proftext
+RUN: 
diff  -b %t.6.proftext %t.7.proftext

diff  --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index f05c7e637cd5..90ce8400c845 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -448,7 +448,7 @@ static void handleExtBinaryWriter(sampleprof::SampleProfileWriter &Writer,
                                   ProfileFormat OutputFormat,
                                   MemoryBuffer *Buffer,
                                   sampleprof::ProfileSymbolList &WriterList,
-                                  bool CompressAllSections) {
+                                  bool CompressAllSections, bool UseMD5) {
   populateProfileSymbolList(Buffer, WriterList);
   if (WriterList.size() > 0 && OutputFormat != PF_Ext_Binary)
     warn("Profile Symbol list is not empty but the output format is not "
@@ -457,22 +457,24 @@ static void handleExtBinaryWriter(sampleprof::SampleProfileWriter &Writer,
   Writer.setProfileSymbolList(&WriterList);
 
   if (CompressAllSections) {
-    if (OutputFormat != PF_Ext_Binary) {
+    if (OutputFormat != PF_Ext_Binary)
       warn("-compress-all-section is ignored. Specify -extbinary to enable it");
-    } else {
-      auto ExtBinaryWriter =
-          static_cast<sampleprof::SampleProfileWriterExtBinary *>(&Writer);
-      ExtBinaryWriter->setToCompressAllSections();
-    }
+    else
+      Writer.setToCompressAllSections();
+  }
+  if (UseMD5) {
+    if (OutputFormat != PF_Ext_Binary)
+      warn("-use-md5 is ignored. Specify -extbinary to enable it");
+    else
+      Writer.setUseMD5();
   }
 }
 
-static void mergeSampleProfile(const WeightedFileVector &Inputs,
-                               SymbolRemapper *Remapper,
-                               StringRef OutputFilename,
-                               ProfileFormat OutputFormat,
-                               StringRef ProfileSymbolListFile,
-                               bool CompressAllSections, FailureMode FailMode) {
+static void
+mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
+                   StringRef OutputFilename, ProfileFormat OutputFormat,
+                   StringRef ProfileSymbolListFile, bool CompressAllSections,
+                   bool UseMD5, FailureMode FailMode) {
   using namespace sampleprof;
   StringMap<FunctionSamples> ProfileMap;
   SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
@@ -529,7 +531,7 @@ static void mergeSampleProfile(const WeightedFileVector &Inputs,
   // Make sure Buffer lives as long as WriterList.
   auto Buffer = getInputFileBuf(ProfileSymbolListFile);
   handleExtBinaryWriter(*Writer, OutputFormat, Buffer.get(), WriterList,
-                        CompressAllSections);
+                        CompressAllSections, UseMD5);
   Writer->write(ProfileMap);
 }
 
@@ -657,6 +659,10 @@ static int merge_main(int argc, const char *argv[]) {
       "compress-all-sections", cl::init(false), cl::Hidden,
       cl::desc("Compress all sections when writing the profile (only "
                "meaningful for -extbinary)"));
+  cl::opt<bool> UseMD5(
+      "use-md5", cl::init(false), cl::Hidden,
+      cl::desc("Choose to use MD5 to represent string in name table (only "
+               "meaningful for -extbinary)"));
 
   cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n");
 
@@ -691,7 +697,7 @@ static int merge_main(int argc, const char *argv[]) {
   else
     mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename,
                        OutputFormat, ProfileSymbolListFile, CompressAllSections,
-                       FailureMode);
+                       UseMD5, FailureMode);
 
   return 0;
 }

diff  --git a/llvm/unittests/ProfileData/SampleProfTest.cpp b/llvm/unittests/ProfileData/SampleProfTest.cpp
index 866a9ec21f7a..3aaba056c618 100644
--- a/llvm/unittests/ProfileData/SampleProfTest.cpp
+++ b/llvm/unittests/ProfileData/SampleProfTest.cpp
@@ -77,11 +77,13 @@ struct SampleProfTest : ::testing::Test {
     OS->close();
   }
 
-  void testRoundTrip(SampleProfileFormat Format, bool Remap) {
+  void testRoundTrip(SampleProfileFormat Format, bool Remap, bool UseMD5) {
     SmallVector<char, 128> ProfilePath;
     ASSERT_TRUE(NoError(llvm::sys::fs::createTemporaryFile("profile", "", ProfilePath)));
     StringRef Profile(ProfilePath.data(), ProfilePath.size());
     createWriter(Format, Profile);
+    if (Format == SampleProfileFormat::SPF_Ext_Binary && UseMD5)
+      static_cast<SampleProfileWriterExtBinary *>(Writer.get())->setUseMD5();
 
     StringRef FooName("_Z3fooi");
     FunctionSamples FooSamples;
@@ -168,7 +170,7 @@ struct SampleProfTest : ::testing::Test {
 
     FunctionSamples *ReadFooSamples = Reader->getSamplesFor(FooName);
     ASSERT_TRUE(ReadFooSamples != nullptr);
-    if (Format != SampleProfileFormat::SPF_Compact_Binary) {
+    if (!UseMD5) {
       ASSERT_EQ("_Z3fooi", ReadFooSamples->getName());
     }
     ASSERT_EQ(7711u, ReadFooSamples->getTotalSamples());
@@ -176,7 +178,7 @@ struct SampleProfTest : ::testing::Test {
 
     FunctionSamples *ReadBarSamples = Reader->getSamplesFor(BarName);
     ASSERT_TRUE(ReadBarSamples != nullptr);
-    if (Format != SampleProfileFormat::SPF_Compact_Binary) {
+    if (!UseMD5) {
       ASSERT_EQ("_Z3bari", ReadBarSamples->getName());
     }
     ASSERT_EQ(20301u, ReadBarSamples->getTotalSamples());
@@ -205,10 +207,10 @@ struct SampleProfTest : ::testing::Test {
 
     std::string MconstructGUID;
     StringRef MconstructRep =
-        getRepInFormat(MconstructName, Format, MconstructGUID);
+        getRepInFormat(MconstructName, UseMD5, MconstructGUID);
     std::string StringviewGUID;
     StringRef StringviewRep =
-        getRepInFormat(StringviewName, Format, StringviewGUID);
+        getRepInFormat(StringviewName, UseMD5, StringviewGUID);
     ASSERT_EQ(1000u, CTMap.get()[MconstructRep]);
     ASSERT_EQ(437u, CTMap.get()[StringviewRep]);
 
@@ -333,31 +335,35 @@ struct SampleProfTest : ::testing::Test {
 };
 
 TEST_F(SampleProfTest, roundtrip_text_profile) {
-  testRoundTrip(SampleProfileFormat::SPF_Text, false);
+  testRoundTrip(SampleProfileFormat::SPF_Text, false, false);
 }
 
 TEST_F(SampleProfTest, roundtrip_raw_binary_profile) {
-  testRoundTrip(SampleProfileFormat::SPF_Binary, false);
+  testRoundTrip(SampleProfileFormat::SPF_Binary, false, false);
 }
 
 TEST_F(SampleProfTest, roundtrip_compact_binary_profile) {
-  testRoundTrip(SampleProfileFormat::SPF_Compact_Binary, false);
+  testRoundTrip(SampleProfileFormat::SPF_Compact_Binary, false, true);
 }
 
 TEST_F(SampleProfTest, roundtrip_ext_binary_profile) {
-  testRoundTrip(SampleProfileFormat::SPF_Ext_Binary, false);
+  testRoundTrip(SampleProfileFormat::SPF_Ext_Binary, false, false);
+}
+
+TEST_F(SampleProfTest, roundtrip_md5_ext_binary_profile) {
+  testRoundTrip(SampleProfileFormat::SPF_Ext_Binary, false, true);
 }
 
 TEST_F(SampleProfTest, remap_text_profile) {
-  testRoundTrip(SampleProfileFormat::SPF_Text, true);
+  testRoundTrip(SampleProfileFormat::SPF_Text, true, false);
 }
 
 TEST_F(SampleProfTest, remap_raw_binary_profile) {
-  testRoundTrip(SampleProfileFormat::SPF_Binary, true);
+  testRoundTrip(SampleProfileFormat::SPF_Binary, true, false);
 }
 
 TEST_F(SampleProfTest, remap_ext_binary_profile) {
-  testRoundTrip(SampleProfileFormat::SPF_Ext_Binary, true);
+  testRoundTrip(SampleProfileFormat::SPF_Ext_Binary, true, false);
 }
 
 TEST_F(SampleProfTest, sample_overflow_saturation) {


        


More information about the llvm-commits mailing list