[compiler-rt] [llvm] [InstrFDO]Record the on-disk header size in indexed profile header (PR #88212)

via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 9 17:20:15 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-pgo

Author: Mingming Liu (minglotus-6)

<details>
<summary>Changes</summary>

Currently, the indexed profile format does not have forward compatibility. This is sub-optimal for usability, see an example below. 

This change proposes to record the on-disk header size in the indexed profiles at a fixed byte offset. This way profile header size is a part of profile rather than a part of profile reader library. 

 - Currently, the indexed profile reader relies on the C++ struct [definition](https://github.com/llvm/llvm-project/blob/d97d560fbf6ed26a198b3afe1594d7d63b88ab3a/llvm/include/llvm/ProfileData/InstrProf.h#L1158-L1191) of Header [to know the byte size of the profile header](https://github.com/llvm/llvm-project/blob/d97d560fbf6ed26a198b3afe1594d7d63b88ab3a/llvm/lib/ProfileData/InstrProf.cpp#L1648-L1672), which is baked in the binary.  Thereby older compilers won't be able to parse profiles correctly if any new field is added (for new payload sections) in the header C++ struct.
- In the real world, users might profile their instrumented binaries on a server farm and use orchestration services to merge profiles, and then feedback the merged profiles for optimized builds to compilers. The release cycle for compilers and orchestration tools might be different and not easy to coordinate.
   - Say a commit `X` introduces a new section `S` and bumps an indexed profile version from `V` to `V+1`.  Without header forward compatibility, things could break in various ways.  For instance, if the orchestration tool generates profiles of `V+1` but compiler release falls behind, the compiler cannot parse the post-processed indexed profiles to generated FDO-optimized binaries. A better choice is to make the compiler parse the known profile sections and ignore the new unknown one to proceed with optimized build. 


---
Full diff: https://github.com/llvm/llvm-project/pull/88212.diff


6 Files Affected:

- (modified) compiler-rt/include/profile/InstrProfData.inc (+1-1) 
- (modified) llvm/include/llvm/ProfileData/InstrProf.h (+6-1) 
- (modified) llvm/include/llvm/ProfileData/InstrProfData.inc (+1-1) 
- (modified) llvm/lib/ProfileData/InstrProf.cpp (+28-17) 
- (modified) llvm/lib/ProfileData/InstrProfWriter.cpp (+35-27) 
- (modified) llvm/test/tools/llvm-profdata/profile-version.test (+2-2) 


``````````diff
diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc
index e9866d94b762c1..36fb7fe989c391 100644
--- a/compiler-rt/include/profile/InstrProfData.inc
+++ b/compiler-rt/include/profile/InstrProfData.inc
@@ -710,7 +710,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
 /* Raw profile format version (start from 1). */
 #define INSTR_PROF_RAW_VERSION 10
 /* Indexed profile format version (start from 1). */
-#define INSTR_PROF_INDEX_VERSION 12
+#define INSTR_PROF_INDEX_VERSION 13
 /* Coverage mapping format version (start from 0). */
 #define INSTR_PROF_COVMAP_VERSION 6
 
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index eb3c10bcba1ca7..8399a3340ccef9 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -1146,7 +1146,9 @@ enum ProfVersion {
   Version11 = 11,
   // VTable profiling,
   Version12 = 12,
-  // The current version is 12.
+  // Record the on-disk byte size of header.
+  Version13 = 13,
+  // The current version is 13.
   CurrentVersion = INSTR_PROF_INDEX_VERSION
 };
 const uint64_t Version = ProfVersion::CurrentVersion;
@@ -1155,6 +1157,7 @@ const HashT HashType = HashT::MD5;
 
 inline uint64_t ComputeHash(StringRef K) { return ComputeHash(HashType, K); }
 
+constexpr unsigned kHeaderFieldSize = 10;
 // This structure defines the file header of the LLVM profile
 // data file in indexed-format. Please update llvm/docs/InstrProfileFormat.rst
 // as appropriate when updating the indexed profile format.
@@ -1174,6 +1177,8 @@ struct Header {
   uint64_t BinaryIdOffset;
   uint64_t TemporalProfTracesOffset;
   uint64_t VTableNamesOffset;
+  // The on-disk byte size of the header.
+  uint64_t Size;
   // New fields should only be added at the end to ensure that the size
   // computation is correct. The methods below need to be updated to ensure that
   // the new field is read correctly.
diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc
index e9866d94b762c1..36fb7fe989c391 100644
--- a/llvm/include/llvm/ProfileData/InstrProfData.inc
+++ b/llvm/include/llvm/ProfileData/InstrProfData.inc
@@ -710,7 +710,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
 /* Raw profile format version (start from 1). */
 #define INSTR_PROF_RAW_VERSION 10
 /* Indexed profile format version (start from 1). */
-#define INSTR_PROF_INDEX_VERSION 12
+#define INSTR_PROF_INDEX_VERSION 13
 /* Coverage mapping format version (start from 0). */
 #define INSTR_PROF_COVMAP_VERSION 6
 
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 95f900d0fff1ca..c85520f523abcb 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -1598,11 +1598,10 @@ uint64_t Header::formatVersion() const {
 Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
   using namespace support;
   static_assert(std::is_standard_layout_v<Header>,
-                "The header should be standard layout type since we use offset "
-                "of fields to read.");
-  Header H;
+                "Keep the header a standard-layout class for simplicity");
 
-  H.Magic = read(Buffer, offsetOf(&Header::Magic));
+  Header H;
+  H.Magic = read(Buffer, 0);
   // Check the magic number.
   uint64_t Magic =
       endian::byte_swap<uint64_t, llvm::endianness::little>(H.Magic);
@@ -1610,36 +1609,46 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
     return make_error<InstrProfError>(instrprof_error::bad_magic);
 
   // Read the version.
-  H.Version = read(Buffer, offsetOf(&Header::Version));
-  if (GET_VERSION(H.formatVersion()) >
-      IndexedInstrProf::ProfVersion::CurrentVersion)
+  H.Version = read(Buffer, sizeof(uint64_t));
+  const uint64_t ProfVersion = GET_VERSION(H.formatVersion());
+  if (ProfVersion > IndexedInstrProf::ProfVersion::CurrentVersion)
     return make_error<InstrProfError>(instrprof_error::unsupported_version);
 
-  switch (GET_VERSION(H.formatVersion())) {
+  size_t FieldByteOffset = H.size() - sizeof(uint64_t);
+
+  switch (ProfVersion) {
     // When a new field is added in the header add a case statement here to
     // populate it.
     static_assert(
-        IndexedInstrProf::ProfVersion::CurrentVersion == Version12,
+        IndexedInstrProf::ProfVersion::CurrentVersion == Version13,
         "Please update the reading code below if a new field has been added, "
         "if not add a case statement to fall through to the latest version.");
+  case 13ull:
+    FieldByteOffset -= sizeof(uint64_t);
+    [[fallthrough]];
   case 12ull:
-    H.VTableNamesOffset = read(Buffer, offsetOf(&Header::VTableNamesOffset));
+    H.VTableNamesOffset = read(Buffer, FieldByteOffset);
+    FieldByteOffset -= sizeof(uint64_t);
     [[fallthrough]];
   case 11ull:
     [[fallthrough]];
   case 10ull:
-    H.TemporalProfTracesOffset =
-        read(Buffer, offsetOf(&Header::TemporalProfTracesOffset));
+    H.TemporalProfTracesOffset = read(Buffer, FieldByteOffset);
+    FieldByteOffset -= sizeof(uint64_t);
     [[fallthrough]];
   case 9ull:
-    H.BinaryIdOffset = read(Buffer, offsetOf(&Header::BinaryIdOffset));
+    H.BinaryIdOffset = read(Buffer, FieldByteOffset);
+    FieldByteOffset -= sizeof(uint64_t);
     [[fallthrough]];
   case 8ull:
-    H.MemProfOffset = read(Buffer, offsetOf(&Header::MemProfOffset));
+    H.MemProfOffset = read(Buffer, FieldByteOffset);
+    FieldByteOffset -= sizeof(uint64_t);
     [[fallthrough]];
   default: // Version7 (when the backwards compatible header was introduced).
-    H.HashType = read(Buffer, offsetOf(&Header::HashType));
-    H.HashOffset = read(Buffer, offsetOf(&Header::HashOffset));
+    H.HashOffset = read(Buffer, FieldByteOffset);
+    FieldByteOffset -= sizeof(uint64_t);
+    H.HashType = read(Buffer, FieldByteOffset);
+    FieldByteOffset -= sizeof(uint64_t);
   }
 
   return H;
@@ -1650,10 +1659,12 @@ size_t Header::size() const {
     // When a new field is added to the header add a case statement here to
     // compute the size as offset of the new field + size of the new field. This
     // relies on the field being added to the end of the list.
-    static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version12,
+    static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version13,
                   "Please update the size computation below if a new field has "
                   "been added to the header, if not add a case statement to "
                   "fall through to the latest version.");
+  case 13ull:
+    return sizeof(uint64_t) * kHeaderFieldSize;
   case 12ull:
     return offsetOf(&Header::VTableNamesOffset) +
            sizeof(Header::VTableNamesOffset);
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 7c56cde3e6cedd..837655656f9d4d 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -576,12 +576,18 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   IndexedInstrProf::Header Header;
   Header.Magic = IndexedInstrProf::Magic;
   Header.Version = WritePrevVersion
-                       ? IndexedInstrProf::ProfVersion::Version11
+                       ? IndexedInstrProf::ProfVersion::Version12
                        : IndexedInstrProf::ProfVersion::CurrentVersion;
   // The WritePrevVersion handling will either need to be removed or updated
-  // if the version is advanced beyond 12.
+  // if the version is advanced beyond 13.
+  // Starting from version 13, an indexed profile records the on-disk
+  // byte size of header at a fixed byte offset. Compilers or tools built
+  // starting from this version can read the on-disk byte size (as opposed to
+  // relying on struct definition of Header) to locate the start of payload
+  // sections.
+  // FIXME: Clean up WritePrevVersion later.
   assert(IndexedInstrProf::ProfVersion::CurrentVersion ==
-         IndexedInstrProf::ProfVersion::Version12);
+         IndexedInstrProf::ProfVersion::Version13);
   if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation))
     Header.Version |= VARIANT_MASK_IR_PROF;
   if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive))
@@ -606,6 +612,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   Header.TemporalProfTracesOffset = 0;
   Header.VTableNamesOffset = 0;
 
+  const uint64_t StartOffset = OS.tell();
   // Only write out the first four fields. We need to remember the offset of the
   // remaining fields to allow back patching later.
   for (int I = 0; I < 4; I++)
@@ -633,8 +640,10 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   OS.write(0);
 
   uint64_t VTableNamesOffset = OS.tell();
+  OS.write(0);
+
   if (!WritePrevVersion)
-    OS.write(0);
+    OS.write(OS.tell() - StartOffset);
 
   // Reserve space to write profile summary data.
   uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size();
@@ -703,34 +712,32 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
 
   uint64_t VTableNamesSectionStart = OS.tell();
 
-  if (!WritePrevVersion) {
-    std::vector<std::string> VTableNameStrs;
-    for (StringRef VTableName : VTableNames.keys())
-      VTableNameStrs.push_back(VTableName.str());
-
-    std::string CompressedVTableNames;
-    if (!VTableNameStrs.empty())
-      if (Error E = collectGlobalObjectNameStrings(
-              VTableNameStrs, compression::zlib::isAvailable(),
-              CompressedVTableNames))
-        return E;
+  std::vector<std::string> VTableNameStrs;
+  for (StringRef VTableName : VTableNames.keys())
+    VTableNameStrs.push_back(VTableName.str());
 
-    const uint64_t CompressedStringLen = CompressedVTableNames.length();
+  std::string CompressedVTableNames;
+  if (!VTableNameStrs.empty())
+    if (Error E = collectGlobalObjectNameStrings(
+            VTableNameStrs, compression::zlib::isAvailable(),
+            CompressedVTableNames))
+      return E;
 
-    // Record the length of compressed string.
-    OS.write(CompressedStringLen);
+  const uint64_t CompressedStringLen = CompressedVTableNames.length();
 
-    // Write the chars in compressed strings.
-    for (auto &c : CompressedVTableNames)
-      OS.writeByte(static_cast<uint8_t>(c));
+  // Record the length of compressed string.
+  OS.write(CompressedStringLen);
 
-    // Pad up to a multiple of 8.
-    // InstrProfReader could read bytes according to 'CompressedStringLen'.
-    const uint64_t PaddedLength = alignTo(CompressedStringLen, 8);
+  // Write the chars in compressed strings.
+  for (auto &c : CompressedVTableNames)
+    OS.writeByte(static_cast<uint8_t>(c));
 
-    for (uint64_t K = CompressedStringLen; K < PaddedLength; K++)
-      OS.writeByte(0);
-  }
+  // Pad up to a multiple of 8.
+  // InstrProfReader could read bytes according to 'CompressedStringLen'.
+  const uint64_t PaddedLength = alignTo(CompressedStringLen, 8);
+
+  for (uint64_t K = CompressedStringLen; K < PaddedLength; K++)
+    OS.writeByte(0);
 
   uint64_t TemporalProfTracesSectionStart = 0;
   if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile)) {
@@ -797,6 +804,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
         // Patch the Header.TemporalProfTracesOffset (=0 for profiles without
         // traces).
         {TemporalProfTracesOffset, &TemporalProfTracesSectionStart, 1},
+        {VTableNamesOffset, &VTableNamesSectionStart, 1},
         // Patch the summary data.
         {SummaryOffset, reinterpret_cast<uint64_t *>(TheSummary.get()),
          (int)(SummarySize / sizeof(uint64_t))},
diff --git a/llvm/test/tools/llvm-profdata/profile-version.test b/llvm/test/tools/llvm-profdata/profile-version.test
index cb68a648d5e5aa..b5405cdec46248 100644
--- a/llvm/test/tools/llvm-profdata/profile-version.test
+++ b/llvm/test/tools/llvm-profdata/profile-version.test
@@ -2,8 +2,8 @@ Test the profile version.
 
 RUN: llvm-profdata merge -o %t.profdata %p/Inputs/basic.proftext
 RUN: llvm-profdata show --profile-version %t.profdata | FileCheck %s
-CHECK: Profile version: 12
+CHECK: Profile version: 13
 
 RUN: llvm-profdata merge -o %t.prev.profdata %p/Inputs/basic.proftext --write-prev-version
 RUN: llvm-profdata show --profile-version %t.prev.profdata | FileCheck %s --check-prefix=PREV
-PREV: Profile version: 11
+PREV: Profile version: 12

``````````

</details>


https://github.com/llvm/llvm-project/pull/88212


More information about the llvm-commits mailing list