[llvm-branch-commits] [llvm] 64e7685 - [SampleFDO] Store fixed length MD5 in NameTable instead of using ULEB128 if

Wei Mi via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Dec 8 16:26:12 PST 2020


Author: Wei Mi
Date: 2020-12-08T16:21:01-08:00
New Revision: 64e7685368894742517524878716184a8cd3ba9b

URL: https://github.com/llvm/llvm-project/commit/64e7685368894742517524878716184a8cd3ba9b
DIFF: https://github.com/llvm/llvm-project/commit/64e7685368894742517524878716184a8cd3ba9b.diff

LOG: [SampleFDO] Store fixed length MD5 in NameTable instead of using ULEB128 if
MD5 is used.

Currently during sample profile loading, NameTable has to be loaded entirely
up front before any name string is retrieved. That is because NameTable is
stored using ULEB128 encoding and cannot be directly accessed like an array.
However, if MD5 is used to represent name in the NameTable, it has fixed
length. If MD5 names are stored in uint64_t type instead of ULEB128, NameTable
can be accessed like an array then in many cases only part of the NameTable
has to be read. This is helpful for reducing compile time especially when
small source file is compiled. We find that after this change, the elapsed
time to build a large application distributively is reduced by 5% and the
accumulative cpu time used for building is also reduced by 5%. The size of
the profile is slightly reduced with this change by ~0.2%, and that also
indicates encoding MD5 in ULEB128 doesn't save the storage space.

Differential Revision: https://reviews.llvm.org/D92621

Added: 
    llvm/test/Transforms/SampleProfile/Inputs/inline.fixlenmd5.extbinary.afdo

Modified: 
    llvm/include/llvm/ProfileData/SampleProf.h
    llvm/include/llvm/ProfileData/SampleProfReader.h
    llvm/include/llvm/ProfileData/SampleProfWriter.h
    llvm/lib/ProfileData/SampleProfReader.cpp
    llvm/lib/ProfileData/SampleProfWriter.cpp
    llvm/test/Transforms/SampleProfile/profile-format.ll
    llvm/test/tools/llvm-profdata/show-prof-info.test

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 0c40bc00a8f6..f6dd496d9f57 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -165,7 +165,10 @@ enum class SecCommonFlags : uint32_t {
 // a new check in verifySecFlag.
 enum class SecNameTableFlags : uint32_t {
   SecFlagInValid = 0,
-  SecFlagMD5Name = (1 << 0)
+  SecFlagMD5Name = (1 << 0),
+  // Store MD5 in fixed length instead of ULEB128 so NameTable can be
+  // accessed like an array.
+  SecFlagFixedLengthMD5 = (1 << 1)
 };
 enum class SecProfSummaryFlags : uint32_t {
   SecFlagInValid = 0,

diff  --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index f4bdffd2cfe6..97b932c781bc 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -618,6 +618,7 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
                                          const SecHdrTableEntry &Entry);
   // placeholder for subclasses to dispatch their own section readers.
   virtual std::error_code readCustomSection(const SecHdrTableEntry &Entry) = 0;
+  virtual ErrorOr<StringRef> readStringFromTable() override;
 
   std::unique_ptr<ProfileSymbolList> ProfSymList;
 
@@ -629,6 +630,12 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
   /// Use all functions from the input profile.
   bool UseAllFuncs = true;
 
+  /// Use fixed length MD5 instead of ULEB128 encoding so NameTable doesn't
+  /// need to be read in up front and can be directly accessed using index.
+  bool FixedLengthMD5 = false;
+  /// The starting address of NameTable containing fixed length MD5.
+  const uint8_t *MD5NameMemStart = nullptr;
+
   /// If MD5 is used in NameTable section, the section saves uint64_t data.
   /// The uint64_t data has to be converted to a string and then the string
   /// will be used to initialize StringRef in NameTable.
@@ -656,10 +663,7 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
   void collectFuncsFrom(const Module &M) override;
 
   /// Return whether names in the profile are all MD5 numbers.
-  virtual bool useMD5() override {
-    assert(!NameTable.empty() && "NameTable should have been initialized");
-    return MD5StringBuf && !MD5StringBuf->empty();
-  }
+  virtual bool useMD5() override { return MD5StringBuf.get(); }
 
   virtual std::unique_ptr<ProfileSymbolList> getProfileSymbolList() override {
     return std::move(ProfSymList);

diff  --git a/llvm/include/llvm/ProfileData/SampleProfWriter.h b/llvm/include/llvm/ProfileData/SampleProfWriter.h
index 24269c5fbbf5..cae39518e0af 100644
--- a/llvm/include/llvm/ProfileData/SampleProfWriter.h
+++ b/llvm/include/llvm/ProfileData/SampleProfWriter.h
@@ -158,6 +158,9 @@ class SampleProfileWriterExtBinaryBase : public SampleProfileWriterBinary {
   virtual void setUseMD5() override {
     UseMD5 = true;
     addSectionFlag(SecNameTable, SecNameTableFlags::SecFlagMD5Name);
+    // MD5 will be stored as plain uint64_t instead of variable-length
+    // quantity format in NameTable section.
+    addSectionFlag(SecNameTable, SecNameTableFlags::SecFlagFixedLengthMD5);
   }
 
   // Set the profile to be partial. It means the profile is for

diff  --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index be6d3e1c506c..6a574ffacd0d 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -367,6 +367,34 @@ ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() {
   return NameTable[*Idx];
 }
 
+ErrorOr<StringRef> SampleProfileReaderExtBinaryBase::readStringFromTable() {
+  if (!FixedLengthMD5)
+    return SampleProfileReaderBinary::readStringFromTable();
+
+  // read NameTable index.
+  auto Idx = readStringIndex(NameTable);
+  if (std::error_code EC = Idx.getError())
+    return EC;
+
+  // Check whether the name to be accessed has been accessed before,
+  // if not, read it from memory directly.
+  StringRef &SR = NameTable[*Idx];
+  if (SR.empty()) {
+    const uint8_t *SavedData = Data;
+    Data = MD5NameMemStart + ((*Idx) * sizeof(uint64_t));
+    auto FID = readUnencodedNumber<uint64_t>();
+    if (std::error_code EC = FID.getError())
+      return EC;
+    // Save the string converted from uint64_t in MD5StringBuf. All the
+    // references to the name are all StringRefs refering to the string
+    // in MD5StringBuf.
+    MD5StringBuf->push_back(std::to_string(*FID));
+    SR = MD5StringBuf->back();
+    Data = SavedData;
+  }
+  return SR;
+}
+
 ErrorOr<StringRef> SampleProfileReaderCompactBinary::readStringFromTable() {
   auto Idx = readStringIndex(NameTable);
   if (std::error_code EC = Idx.getError())
@@ -494,11 +522,16 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagPartial))
       Summary->setPartialProfile(true);
     break;
-  case SecNameTable:
-    if (std::error_code EC = readNameTableSec(
-            hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name)))
+  case SecNameTable: {
+    FixedLengthMD5 =
+        hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5);
+    bool UseMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name);
+    assert((!FixedLengthMD5 || UseMD5) &&
+           "If FixedLengthMD5 is true, UseMD5 has to be true");
+    if (std::error_code EC = readNameTableSec(UseMD5))
       return EC;
     break;
+  }
   case SecLBRProfile:
     if (std::error_code EC = readFuncProfiles())
       return EC;
@@ -739,9 +772,20 @@ std::error_code SampleProfileReaderExtBinaryBase::readMD5NameTable() {
   auto Size = readNumber<uint64_t>();
   if (std::error_code EC = Size.getError())
     return EC;
-  NameTable.reserve(*Size);
   MD5StringBuf = std::make_unique<std::vector<std::string>>();
   MD5StringBuf->reserve(*Size);
+  if (FixedLengthMD5) {
+    // Preallocate and initialize NameTable so we can check whether a name
+    // index has been read before by checking whether the element in the
+    // NameTable is empty, meanwhile readStringIndex can do the boundary
+    // check using the size of NameTable.
+    NameTable.resize(*Size + NameTable.size());
+
+    MD5NameMemStart = Data;
+    Data = Data + (*Size) * sizeof(uint64_t);
+    return sampleprof_error::success;
+  }
+  NameTable.reserve(*Size);
   for (uint32_t I = 0; I < *Size; ++I) {
     auto FID = readNumber<uint64_t>();
     if (std::error_code EC = FID.getError())
@@ -857,7 +901,9 @@ static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
 
   switch (Entry.Type) {
   case SecNameTable:
-    if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name))
+    if (hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5))
+      Flags.append("fixlenmd5,");
+    else if (hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name))
       Flags.append("md5,");
     break;
   case SecProfSummary:

diff  --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp
index dbb4473f068a..0264210fc206 100644
--- a/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -174,11 +174,13 @@ std::error_code SampleProfileWriterExtBinaryBase::writeNameTable() {
   std::set<StringRef> V;
   stablizeNameTable(V);
 
-  // Write out the name table.
+  // Write out the MD5 name table. We wrote unencoded MD5 so reader can
+  // retrieve the name using the name index without having to read the
+  // whole name table.
   encodeULEB128(NameTable.size(), OS);
-  for (auto N : V) {
-    encodeULEB128(MD5Hash(N), OS);
-  }
+  support::endian::Writer Writer(OS, support::little);
+  for (auto N : V)
+    Writer.write(MD5Hash(N));
   return sampleprof_error::success;
 }
 

diff  --git a/llvm/test/Transforms/SampleProfile/Inputs/inline.fixlenmd5.extbinary.afdo b/llvm/test/Transforms/SampleProfile/Inputs/inline.fixlenmd5.extbinary.afdo
new file mode 100644
index 000000000000..9db07bb5b7fa
Binary files /dev/null and b/llvm/test/Transforms/SampleProfile/Inputs/inline.fixlenmd5.extbinary.afdo 
diff er

diff  --git a/llvm/test/Transforms/SampleProfile/profile-format.ll b/llvm/test/Transforms/SampleProfile/profile-format.ll
index f00bc73511d8..3809a9618a39 100644
--- a/llvm/test/Transforms/SampleProfile/profile-format.ll
+++ b/llvm/test/Transforms/SampleProfile/profile-format.ll
@@ -6,6 +6,8 @@
 ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.extbinary.afdo -S | FileCheck %s
 ; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.md5extbinary.afdo -S | FileCheck %s
 ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.md5extbinary.afdo -S | FileCheck %s
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.fixlenmd5.extbinary.afdo -S | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.fixlenmd5.extbinary.afdo -S | FileCheck %s
 
 ; Original C++ test case
 ;

diff  --git a/llvm/test/tools/llvm-profdata/show-prof-info.test b/llvm/test/tools/llvm-profdata/show-prof-info.test
index 4a748667d1ad..2b38dee5d865 100644
--- a/llvm/test/tools/llvm-profdata/show-prof-info.test
+++ b/llvm/test/tools/llvm-profdata/show-prof-info.test
@@ -7,6 +7,6 @@ REQUIRES: zlib
 ; To check llvm-profdata shows the correct flags for ProfileSummarySection.
 ; CHECK: ProfileSummarySection {{.*}} Flags: {compressed,partial}
 ; To check llvm-profdata shows the correct flags for NameTableSection.
-; CHECK: NameTableSection {{.*}} Flags: {compressed,md5}
+; CHECK: NameTableSection {{.*}} Flags: {compressed,fixlenmd5}
 ; To check llvm-profdata shows the correct file size.
 ; CHECK: [[FILESIZE]]


        


More information about the llvm-branch-commits mailing list