[compiler-rt] [llvm] [InstrFDO]Record the on-disk header size in indexed profile header (PR #88212)

Mingming Liu via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 15 11:53:41 PDT 2024


https://github.com/minglotus-6 updated https://github.com/llvm/llvm-project/pull/88212

>From ee8390560bb48c95a9dddf63fb9313ffe99b648b Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Tue, 9 Apr 2024 16:45:30 -0700
Subject: [PATCH 1/5] [InstrFDO]Record the on-disk header size in indexed
 profile header

---
 compiler-rt/include/profile/InstrProfData.inc |  2 +-
 llvm/include/llvm/ProfileData/InstrProf.h     |  6 +-
 .../llvm/ProfileData/InstrProfData.inc        |  2 +-
 llvm/lib/ProfileData/InstrProf.cpp            | 46 ++++++++------
 llvm/lib/ProfileData/InstrProfReader.cpp      |  2 +-
 llvm/lib/ProfileData/InstrProfWriter.cpp      | 60 ++++++++++---------
 .../tools/llvm-profdata/profile-version.test  |  4 +-
 7 files changed, 72 insertions(+), 50 deletions(-)

diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc
index e9866d94b762c1..36fb7fe989c391 100644
--- a/compiler-rt/include/profile/InstrProfData.inc
+++ b/compiler-rt/include/profile/InstrProfData.inc
@@ -710,7 +710,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
 /* Raw profile format version (start from 1). */
 #define INSTR_PROF_RAW_VERSION 10
 /* Indexed profile format version (start from 1). */
-#define INSTR_PROF_INDEX_VERSION 12
+#define INSTR_PROF_INDEX_VERSION 13
 /* Coverage mapping format version (start from 0). */
 #define INSTR_PROF_COVMAP_VERSION 6
 
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index eb3c10bcba1ca7..7072a20e083db5 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -1146,7 +1146,9 @@ enum ProfVersion {
   Version11 = 11,
   // VTable profiling,
   Version12 = 12,
-  // The current version is 12.
+  // Record the on-disk byte size of header.
+  Version13 = 13,
+  // The current version is 13.
   CurrentVersion = INSTR_PROF_INDEX_VERSION
 };
 const uint64_t Version = ProfVersion::CurrentVersion;
@@ -1174,6 +1176,8 @@ struct Header {
   uint64_t BinaryIdOffset;
   uint64_t TemporalProfTracesOffset;
   uint64_t VTableNamesOffset;
+  // The byte size of on-disk header.
+  uint64_t Size;
   // New fields should only be added at the end to ensure that the size
   // computation is correct. The methods below need to be updated to ensure that
   // the new field is read correctly.
diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc
index e9866d94b762c1..36fb7fe989c391 100644
--- a/llvm/include/llvm/ProfileData/InstrProfData.inc
+++ b/llvm/include/llvm/ProfileData/InstrProfData.inc
@@ -710,7 +710,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
 /* Raw profile format version (start from 1). */
 #define INSTR_PROF_RAW_VERSION 10
 /* Indexed profile format version (start from 1). */
-#define INSTR_PROF_INDEX_VERSION 12
+#define INSTR_PROF_INDEX_VERSION 13
 /* Coverage mapping format version (start from 0). */
 #define INSTR_PROF_COVMAP_VERSION 6
 
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 95f900d0fff1ca..200ae377e81abc 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -1598,11 +1598,11 @@ uint64_t Header::formatVersion() const {
 Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
   using namespace support;
   static_assert(std::is_standard_layout_v<Header>,
-                "The header should be standard layout type since we use offset "
-                "of fields to read.");
-  Header H;
+                "Keep the header a standard-layout class for simplicity");
 
-  H.Magic = read(Buffer, offsetOf(&Header::Magic));
+  Header H;
+  size_t BufferOffset = 0;
+  H.Magic = read(Buffer, BufferOffset);
   // Check the magic number.
   uint64_t Magic =
       endian::byte_swap<uint64_t, llvm::endianness::little>(H.Magic);
@@ -1610,36 +1610,46 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
     return make_error<InstrProfError>(instrprof_error::bad_magic);
 
   // Read the version.
-  H.Version = read(Buffer, offsetOf(&Header::Version));
-  if (GET_VERSION(H.formatVersion()) >
-      IndexedInstrProf::ProfVersion::CurrentVersion)
+  H.Version = read(Buffer, BufferOffset + sizeof(uint64_t));
+  const uint64_t ProfVersion = GET_VERSION(H.formatVersion());
+  if (ProfVersion > IndexedInstrProf::ProfVersion::CurrentVersion)
     return make_error<InstrProfError>(instrprof_error::unsupported_version);
 
-  switch (GET_VERSION(H.formatVersion())) {
+  size_t FieldByteOffset = H.size() - sizeof(uint64_t);
+
+  switch (ProfVersion) {
     // When a new field is added in the header add a case statement here to
     // populate it.
     static_assert(
-        IndexedInstrProf::ProfVersion::CurrentVersion == Version12,
+        IndexedInstrProf::ProfVersion::CurrentVersion == Version13,
         "Please update the reading code below if a new field has been added, "
         "if not add a case statement to fall through to the latest version.");
+  case 13ull:
+    FieldByteOffset -= sizeof(uint64_t);
+    [[fallthrough]];
   case 12ull:
-    H.VTableNamesOffset = read(Buffer, offsetOf(&Header::VTableNamesOffset));
+    H.VTableNamesOffset = read(Buffer, FieldByteOffset);
+    FieldByteOffset -= sizeof(uint64_t);
     [[fallthrough]];
   case 11ull:
     [[fallthrough]];
   case 10ull:
-    H.TemporalProfTracesOffset =
-        read(Buffer, offsetOf(&Header::TemporalProfTracesOffset));
+    H.TemporalProfTracesOffset = read(Buffer, FieldByteOffset);
+    FieldByteOffset -= sizeof(uint64_t);
     [[fallthrough]];
   case 9ull:
-    H.BinaryIdOffset = read(Buffer, offsetOf(&Header::BinaryIdOffset));
+    H.BinaryIdOffset = read(Buffer, FieldByteOffset);
+    FieldByteOffset -= sizeof(uint64_t);
     [[fallthrough]];
   case 8ull:
-    H.MemProfOffset = read(Buffer, offsetOf(&Header::MemProfOffset));
+    H.MemProfOffset = read(Buffer, FieldByteOffset);
+    FieldByteOffset -= sizeof(uint64_t);
     [[fallthrough]];
   default: // Version7 (when the backwards compatible header was introduced).
-    H.HashType = read(Buffer, offsetOf(&Header::HashType));
-    H.HashOffset = read(Buffer, offsetOf(&Header::HashOffset));
+    H.HashOffset = read(Buffer, FieldByteOffset);
+    FieldByteOffset -= sizeof(uint64_t);
+    H.HashType = read(Buffer, FieldByteOffset);
+    FieldByteOffset -= sizeof(uint64_t);
   }
 
   return H;
@@ -1650,10 +1660,12 @@ size_t Header::size() const {
     // When a new field is added to the header add a case statement here to
     // compute the size as offset of the new field + size of the new field. This
     // relies on the field being added to the end of the list.
-    static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version12,
+    static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version13,
                   "Please update the size computation below if a new field has "
                   "been added to the header, if not add a case statement to "
                   "fall through to the latest version.");
+  case 13ull:
+    return sizeof(uint64_t) * 10;
   case 12ull:
     return offsetOf(&Header::VTableNamesOffset) +
            sizeof(Header::VTableNamesOffset);
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index 884334ed070e8d..fef71f309ddd30 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -1215,7 +1215,7 @@ Error IndexedInstrProfReader::readHeader() {
   if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24)
     return error(instrprof_error::truncated);
 
-  auto HeaderOr = IndexedInstrProf::Header::readFromBuffer(Start);
+  auto HeaderOr = IndexedInstrProf::Header::readFromBuffer(Cur);
   if (!HeaderOr)
     return HeaderOr.takeError();
 
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 7c56cde3e6cedd..0c13b734746fdc 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -576,12 +576,16 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   IndexedInstrProf::Header Header;
   Header.Magic = IndexedInstrProf::Magic;
   Header.Version = WritePrevVersion
-                       ? IndexedInstrProf::ProfVersion::Version11
+                       ? IndexedInstrProf::ProfVersion::Version12
                        : IndexedInstrProf::ProfVersion::CurrentVersion;
   // The WritePrevVersion handling will either need to be removed or updated
-  // if the version is advanced beyond 12.
+  // if the version is advanced beyond 13.
+  // FIXME: Starting from version 13, indexed profile header has forward
+  // compatibility. Older tools can parse header and finds the starting byte for
+  // payload when reading profiles of newer version. By then we can clean up
+  // WritePrevVersion.
   assert(IndexedInstrProf::ProfVersion::CurrentVersion ==
-         IndexedInstrProf::ProfVersion::Version12);
+         IndexedInstrProf::ProfVersion::Version13);
   if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation))
     Header.Version |= VARIANT_MASK_IR_PROF;
   if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive))
@@ -606,6 +610,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   Header.TemporalProfTracesOffset = 0;
   Header.VTableNamesOffset = 0;
 
+  const uint64_t StartOffset = OS.tell();
   // Only write out the first four fields. We need to remember the offset of the
   // remaining fields to allow back patching later.
   for (int I = 0; I < 4; I++)
@@ -633,8 +638,10 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   OS.write(0);
 
   uint64_t VTableNamesOffset = OS.tell();
+  OS.write(0);
+
   if (!WritePrevVersion)
-    OS.write(0);
+    OS.write(OS.tell() - StartOffset);
 
   // Reserve space to write profile summary data.
   uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size();
@@ -703,34 +710,32 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
 
   uint64_t VTableNamesSectionStart = OS.tell();
 
-  if (!WritePrevVersion) {
-    std::vector<std::string> VTableNameStrs;
-    for (StringRef VTableName : VTableNames.keys())
-      VTableNameStrs.push_back(VTableName.str());
-
-    std::string CompressedVTableNames;
-    if (!VTableNameStrs.empty())
-      if (Error E = collectGlobalObjectNameStrings(
-              VTableNameStrs, compression::zlib::isAvailable(),
-              CompressedVTableNames))
-        return E;
+  std::vector<std::string> VTableNameStrs;
+  for (StringRef VTableName : VTableNames.keys())
+    VTableNameStrs.push_back(VTableName.str());
 
-    const uint64_t CompressedStringLen = CompressedVTableNames.length();
+  std::string CompressedVTableNames;
+  if (!VTableNameStrs.empty())
+    if (Error E = collectGlobalObjectNameStrings(
+            VTableNameStrs, compression::zlib::isAvailable(),
+            CompressedVTableNames))
+      return E;
 
-    // Record the length of compressed string.
-    OS.write(CompressedStringLen);
+  const uint64_t CompressedStringLen = CompressedVTableNames.length();
 
-    // Write the chars in compressed strings.
-    for (auto &c : CompressedVTableNames)
-      OS.writeByte(static_cast<uint8_t>(c));
+  // Record the length of compressed string.
+  OS.write(CompressedStringLen);
 
-    // Pad up to a multiple of 8.
-    // InstrProfReader could read bytes according to 'CompressedStringLen'.
-    const uint64_t PaddedLength = alignTo(CompressedStringLen, 8);
+  // Write the chars in compressed strings.
+  for (auto &c : CompressedVTableNames)
+    OS.writeByte(static_cast<uint8_t>(c));
 
-    for (uint64_t K = CompressedStringLen; K < PaddedLength; K++)
-      OS.writeByte(0);
-  }
+  // Pad up to a multiple of 8.
+  // InstrProfReader could read bytes according to 'CompressedStringLen'.
+  const uint64_t PaddedLength = alignTo(CompressedStringLen, 8);
+
+  for (uint64_t K = CompressedStringLen; K < PaddedLength; K++)
+    OS.writeByte(0);
 
   uint64_t TemporalProfTracesSectionStart = 0;
   if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile)) {
@@ -797,6 +802,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
         // Patch the Header.TemporalProfTracesOffset (=0 for profiles without
         // traces).
         {TemporalProfTracesOffset, &TemporalProfTracesSectionStart, 1},
+        {VTableNamesOffset, &VTableNamesSectionStart, 1},
         // Patch the summary data.
         {SummaryOffset, reinterpret_cast<uint64_t *>(TheSummary.get()),
          (int)(SummarySize / sizeof(uint64_t))},
diff --git a/llvm/test/tools/llvm-profdata/profile-version.test b/llvm/test/tools/llvm-profdata/profile-version.test
index cb68a648d5e5aa..b5405cdec46248 100644
--- a/llvm/test/tools/llvm-profdata/profile-version.test
+++ b/llvm/test/tools/llvm-profdata/profile-version.test
@@ -2,8 +2,8 @@ Test the profile version.
 
 RUN: llvm-profdata merge -o %t.profdata %p/Inputs/basic.proftext
 RUN: llvm-profdata show --profile-version %t.profdata | FileCheck %s
-CHECK: Profile version: 12
+CHECK: Profile version: 13
 
 RUN: llvm-profdata merge -o %t.prev.profdata %p/Inputs/basic.proftext --write-prev-version
 RUN: llvm-profdata show --profile-version %t.prev.profdata | FileCheck %s --check-prefix=PREV
-PREV: Profile version: 11
+PREV: Profile version: 12

>From a948d2890c0e198ba77d1c55e8c75703763e774f Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Tue, 9 Apr 2024 17:01:47 -0700
Subject: [PATCH 2/5] polish

---
 llvm/include/llvm/ProfileData/InstrProf.h |  3 ++-
 llvm/lib/ProfileData/InstrProf.cpp        |  7 +++----
 llvm/lib/ProfileData/InstrProfReader.cpp  |  2 +-
 llvm/lib/ProfileData/InstrProfWriter.cpp  | 10 ++++++----
 4 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 7072a20e083db5..8399a3340ccef9 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -1157,6 +1157,7 @@ const HashT HashType = HashT::MD5;
 
 inline uint64_t ComputeHash(StringRef K) { return ComputeHash(HashType, K); }
 
+constexpr unsigned kHeaderFieldSize = 10;
 // This structure defines the file header of the LLVM profile
 // data file in indexed-format. Please update llvm/docs/InstrProfileFormat.rst
 // as appropriate when updating the indexed profile format.
@@ -1176,7 +1177,7 @@ struct Header {
   uint64_t BinaryIdOffset;
   uint64_t TemporalProfTracesOffset;
   uint64_t VTableNamesOffset;
-  // The byte size of on-disk header.
+  // The on-disk byte size of the header.
   uint64_t Size;
   // New fields should only be added at the end to ensure that the size
   // computation is correct. The methods below need to be updated to ensure that
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 200ae377e81abc..c85520f523abcb 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -1601,8 +1601,7 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
                 "Keep the header a standard-layout class for simplicity");
 
   Header H;
-  size_t BufferOffset = 0;
-  H.Magic = read(Buffer, BufferOffset);
+  H.Magic = read(Buffer, 0);
   // Check the magic number.
   uint64_t Magic =
       endian::byte_swap<uint64_t, llvm::endianness::little>(H.Magic);
@@ -1610,7 +1609,7 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
     return make_error<InstrProfError>(instrprof_error::bad_magic);
 
   // Read the version.
-  H.Version = read(Buffer, BufferOffset + sizeof(uint64_t));
+  H.Version = read(Buffer, sizeof(uint64_t));
   const uint64_t ProfVersion = GET_VERSION(H.formatVersion());
   if (ProfVersion > IndexedInstrProf::ProfVersion::CurrentVersion)
     return make_error<InstrProfError>(instrprof_error::unsupported_version);
@@ -1665,7 +1664,7 @@ size_t Header::size() const {
                   "been added to the header, if not add a case statement to "
                   "fall through to the latest version.");
   case 13ull:
-    return sizeof(uint64_t) * 10;
+    return sizeof(uint64_t) * kHeaderFieldSize;
   case 12ull:
     return offsetOf(&Header::VTableNamesOffset) +
            sizeof(Header::VTableNamesOffset);
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index fef71f309ddd30..884334ed070e8d 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -1215,7 +1215,7 @@ Error IndexedInstrProfReader::readHeader() {
   if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24)
     return error(instrprof_error::truncated);
 
-  auto HeaderOr = IndexedInstrProf::Header::readFromBuffer(Cur);
+  auto HeaderOr = IndexedInstrProf::Header::readFromBuffer(Start);
   if (!HeaderOr)
     return HeaderOr.takeError();
 
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 0c13b734746fdc..837655656f9d4d 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -580,10 +580,12 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
                        : IndexedInstrProf::ProfVersion::CurrentVersion;
   // The WritePrevVersion handling will either need to be removed or updated
   // if the version is advanced beyond 13.
-  // FIXME: Starting from version 13, indexed profile header has forward
-  // compatibility. Older tools can parse header and finds the starting byte for
-  // payload when reading profiles of newer version. By then we can clean up
-  // WritePrevVersion.
+  // Starting from version 13, an indexed profile records the on-disk
+  // byte size of header at a fixed byte offset. Compilers or tools built
+  // starting from this version can read the on-disk byte size (as opposed to
+  // relying on struct definition of Header) to locate the start of payload
+  // sections.
+  // FIXME: Clean up WritePrevVersion later.
   assert(IndexedInstrProf::ProfVersion::CurrentVersion ==
          IndexedInstrProf::ProfVersion::Version13);
   if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation))

>From 3ac8075b1f893e91f4a3bd5495f4494c87b752eb Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Tue, 9 Apr 2024 17:58:56 -0700
Subject: [PATCH 3/5] Fix the write and read of 'Size' field

---
 llvm/include/llvm/ProfileData/InstrProf.h |  8 +++---
 llvm/lib/ProfileData/InstrProf.cpp        | 31 ++++++++++++++---------
 llvm/lib/ProfileData/InstrProfWriter.cpp  | 10 +++++++-
 3 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 8399a3340ccef9..48fd4aebcd1a6c 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -1157,7 +1157,6 @@ const HashT HashType = HashT::MD5;
 
 inline uint64_t ComputeHash(StringRef K) { return ComputeHash(HashType, K); }
 
-constexpr unsigned kHeaderFieldSize = 10;
 // This structure defines the file header of the LLVM profile
 // data file in indexed-format. Please update llvm/docs/InstrProfileFormat.rst
 // as appropriate when updating the indexed profile format.
@@ -1177,8 +1176,11 @@ struct Header {
   uint64_t BinaryIdOffset;
   uint64_t TemporalProfTracesOffset;
   uint64_t VTableNamesOffset;
-  // The on-disk byte size of the header.
-  uint64_t Size;
+
+  // The on-disk byte size of the header. As with other fields, do not read its
+  // value before calling readFromBuffer.
+  uint64_t Size = 0;
+
   // New fields should only be added at the end to ensure that the size
   // computation is correct. The methods below need to be updated to ensure that
   // the new field is read correctly.
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index c85520f523abcb..27eecce73439eb 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -1610,13 +1610,17 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
 
   // Read the version.
   H.Version = read(Buffer, sizeof(uint64_t));
-  const uint64_t ProfVersion = GET_VERSION(H.formatVersion());
-  if (ProfVersion > IndexedInstrProf::ProfVersion::CurrentVersion)
+  const uint64_t ProfileVersion = GET_VERSION(H.formatVersion());
+  if (ProfileVersion > IndexedInstrProf::ProfVersion::CurrentVersion)
     return make_error<InstrProfError>(instrprof_error::unsupported_version);
 
-  size_t FieldByteOffset = H.size() - sizeof(uint64_t);
+  constexpr size_t kOnDiskSizeOffset = 9 * sizeof(uint64_t);
+  if (ProfileVersion >= ProfVersion::Version13)
+    H.Size = read(Buffer, kOnDiskSizeOffset);
 
-  switch (ProfVersion) {
+  size_t FieldByteOffset = H.size();
+
+  switch (ProfileVersion) {
     // When a new field is added in the header add a case statement here to
     // populate it.
     static_assert(
@@ -1624,31 +1628,32 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
         "Please update the reading code below if a new field has been added, "
         "if not add a case statement to fall through to the latest version.");
   case 13ull:
-    FieldByteOffset -= sizeof(uint64_t);
+    // Size field is already read.
+    FieldByteOffset -= sizeof(Header::Size);
     [[fallthrough]];
   case 12ull:
+    FieldByteOffset -= sizeof(Header::VTableNamesOffset);
     H.VTableNamesOffset = read(Buffer, FieldByteOffset);
-    FieldByteOffset -= sizeof(uint64_t);
     [[fallthrough]];
   case 11ull:
     [[fallthrough]];
   case 10ull:
+    FieldByteOffset -= sizeof(Header::TemporalProfTracesOffset);
     H.TemporalProfTracesOffset = read(Buffer, FieldByteOffset);
-    FieldByteOffset -= sizeof(uint64_t);
     [[fallthrough]];
   case 9ull:
+    FieldByteOffset -= sizeof(Header::BinaryIdOffset);
     H.BinaryIdOffset = read(Buffer, FieldByteOffset);
-    FieldByteOffset -= sizeof(uint64_t);
     [[fallthrough]];
   case 8ull:
+    FieldByteOffset -= sizeof(Header::MemProfOffset);
     H.MemProfOffset = read(Buffer, FieldByteOffset);
-    FieldByteOffset -= sizeof(uint64_t);
     [[fallthrough]];
   default: // Version7 (when the backwards compatible header was introduced).
+    FieldByteOffset -= sizeof(Header::HashOffset);
     H.HashOffset = read(Buffer, FieldByteOffset);
-    FieldByteOffset -= sizeof(uint64_t);
+    FieldByteOffset -= sizeof(Header::HashType);
     H.HashType = read(Buffer, FieldByteOffset);
-    FieldByteOffset -= sizeof(uint64_t);
   }
 
   return H;
@@ -1664,7 +1669,9 @@ size_t Header::size() const {
                   "been added to the header, if not add a case statement to "
                   "fall through to the latest version.");
   case 13ull:
-    return sizeof(uint64_t) * kHeaderFieldSize;
+    assert(Size != 0 && "User can call Header::size() only after reading it "
+                        "from memory buffer");
+    return Size;
   case 12ull:
     return offsetOf(&Header::VTableNamesOffset) +
            sizeof(Header::VTableNamesOffset);
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 837655656f9d4d..df937226762744 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -642,8 +642,15 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   uint64_t VTableNamesOffset = OS.tell();
   OS.write(0);
 
+  // Record the offset of 'Header::Size' field and reserve the space to allow
+  // back patching later.
+  const uint64_t OnDiskHeaderSizeOffset = OS.tell();
   if (!WritePrevVersion)
-    OS.write(OS.tell() - StartOffset);
+    OS.write(0);
+
+  // Record the OnDiskHeader Size after each header field either gets written or
+  // gets its space reserved.
+  uint64_t OnDiskHeaderSize = OS.tell() - StartOffset;
 
   // Reserve space to write profile summary data.
   uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size();
@@ -784,6 +791,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
         // traces).
         {TemporalProfTracesOffset, &TemporalProfTracesSectionStart, 1},
         {VTableNamesOffset, &VTableNamesSectionStart, 1},
+        {OnDiskHeaderSizeOffset, &OnDiskHeaderSize, 1},
         // Patch the summary data.
         {SummaryOffset, reinterpret_cast<uint64_t *>(TheSummary.get()),
          (int)(SummarySize / sizeof(uint64_t))},

>From 34b0136976691bdaa7cb88bef233f088e3efa2b5 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Wed, 10 Apr 2024 08:28:16 -0700
Subject: [PATCH 4/5] Update FieldByteOffset to be the last known header field
 that's understandable by the binary

---
 llvm/lib/ProfileData/InstrProf.cpp | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 27eecce73439eb..6b4ca0f86ecb5e 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -1615,10 +1615,22 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
     return make_error<InstrProfError>(instrprof_error::unsupported_version);
 
   constexpr size_t kOnDiskSizeOffset = 9 * sizeof(uint64_t);
+  // `FieldByteOffset` represents the end byte of the last known header field.
+  size_t FieldByteOffset = 0;
   if (ProfileVersion >= ProfVersion::Version13)
     H.Size = read(Buffer, kOnDiskSizeOffset);
 
-  size_t FieldByteOffset = H.size();
+  if (ProfileVersion < ProfVersion::Version13)
+    FieldByteOffset = H.size();
+  else {
+    assert(ProfileVersion == ProfVersion::Version13 &&
+           "The newest known version is version 13");
+    FieldByteOffset = kOnDiskSizeOffset + sizeof(Header::Size);
+  }
+
+  assert(FieldByteOffset != 0 &&
+         "FieldByteOffset specifies the byte offset of the last known field in "
+         "header and should not be zero");
 
   switch (ProfileVersion) {
     // When a new field is added in the header add a case statement here to

>From 7cd93b1ec270e8d8d3c4f310a72f3f9833690604 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Thu, 11 Apr 2024 12:18:01 -0700
Subject: [PATCH 5/5] Allow unknown future versions in profile reader.

- Before this, profile reader check and rejects unknown future versions.
  This prevents unknown errors when the semantics change in an existing
  payload section.
- After this change, we want to allow profile reader to skip unknown new
  sections but still prevent unknown errors upon semantic changes.
- One middle ground is to record the compatible versions in the
  profiles. Profile readers can read back the compatible versions and
  rejects a profile if the intersection between reader-supported version
  and profile-compatible version is empty. Still working on finalizing
  the solution.
---
 llvm/include/llvm/ProfileData/InstrProf.h     | 10 ++-
 llvm/lib/ProfileData/InstrProf.cpp            | 78 +++++++++++++------
 llvm/lib/ProfileData/InstrProfWriter.cpp      | 13 ++++
 .../llvm-profdata/vtable-value-prof.test      |  2 +-
 4 files changed, 75 insertions(+), 28 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 48fd4aebcd1a6c..dff3424c52c87d 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -1188,12 +1188,14 @@ struct Header {
   // Reads a header struct from the buffer.
   static Expected<Header> readFromBuffer(const unsigned char *Buffer);
 
-  // Returns the size of the header in bytes for all valid fields based on the
-  // version. I.e a older version header will return a smaller size.
+  // Returns the on-disk byte size of the header for all valid fields based on
+  // the version.
   size_t size() const;
 
-  // Returns the format version in little endian. The header retains the version
-  // in native endian of the compiler runtime.
+  Expected<size_t> knownFieldsEndByteOffset() const;
+
+  // Returns the format version in little endian. The header retains the
+  // version in native endian of the compiler runtime.
   uint64_t formatVersion() const;
 };
 
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 6b4ca0f86ecb5e..a9af6b2484a7e4 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -1595,6 +1595,8 @@ uint64_t Header::formatVersion() const {
   return endian::byte_swap<uint64_t, llvm::endianness::little>(Version);
 }
 
+constexpr size_t kOnDiskSizeOffset = 9 * sizeof(uint64_t);
+
 Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
   using namespace support;
   static_assert(std::is_standard_layout_v<Header>,
@@ -1610,28 +1612,36 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
 
   // Read the version.
   H.Version = read(Buffer, sizeof(uint64_t));
-  const uint64_t ProfileVersion = GET_VERSION(H.formatVersion());
-  if (ProfileVersion > IndexedInstrProf::ProfVersion::CurrentVersion)
-    return make_error<InstrProfError>(instrprof_error::unsupported_version);
+  uint64_t ProfileVersion = GET_VERSION(H.formatVersion());
+
+  if (ProfileVersion > IndexedInstrProf::ProfVersion::CurrentVersion) {
+    if (CurrentVersion < ProfVersion::Version13)
+      return make_error<InstrProfError>(instrprof_error::unsupported_version);
+
+    // FIXME: When the semantic of an existing payload section changes, allowing
+    // profile reader to interpret new profiles using the old code in the
+    // current way is too error-prone.
+    // Should the payload section record its own version and fail the profile
+    // reader to require a re-build of the profile reader in this case?
+  }
 
-  constexpr size_t kOnDiskSizeOffset = 9 * sizeof(uint64_t);
-  // `FieldByteOffset` represents the end byte of the last known header field.
-  size_t FieldByteOffset = 0;
   if (ProfileVersion >= ProfVersion::Version13)
     H.Size = read(Buffer, kOnDiskSizeOffset);
 
-  if (ProfileVersion < ProfVersion::Version13)
-    FieldByteOffset = H.size();
-  else {
-    assert(ProfileVersion == ProfVersion::Version13 &&
-           "The newest known version is version 13");
-    FieldByteOffset = kOnDiskSizeOffset + sizeof(Header::Size);
-  }
+  // `FieldByteOffset` represents the end byte of the last known header field.
+  auto FieldByteOffsetOrErr = H.knownFieldsEndByteOffset();
+  if (FieldByteOffsetOrErr)
 
   assert(FieldByteOffset != 0 &&
          "FieldByteOffset specifies the byte offset of the last known field in "
          "header and should not be zero");
 
+  // If the version from profile is higher than the currently supported version,
+  // read the known defined header fields and discard the rest.
+  if (ProfileVersion > ProfVersion::CurrentVersion)
+    ProfileVersion = ProfVersion::CurrentVersion;
+
+  // Initialize header fields based on the currently supported version.
   switch (ProfileVersion) {
     // When a new field is added in the header add a case statement here to
     // populate it.
@@ -1671,19 +1681,41 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
   return H;
 }
 
-size_t Header::size() const {
-  switch (GET_VERSION(formatVersion())) {
-    // When a new field is added to the header add a case statement here to
-    // compute the size as offset of the new field + size of the new field. This
-    // relies on the field being added to the end of the list.
-    static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version13,
-                  "Please update the size computation below if a new field has "
-                  "been added to the header, if not add a case statement to "
-                  "fall through to the latest version.");
+Expected<size_t> Header::knownFieldsEndByteOffset() const {
+  const uint64_t ProfileVersion = GET_VERSION(formatVersion());
+  if (ProfileVersion <= ProfVersion::Version12)
+    return this->size();
+
+  // Starting from version 13, the known field end byte offset is inferred based
+  // on the currently supported version.
+  switch (IndexedInstrProf::ProfVersion::CurrentVersion) {
+    // When a new field is added in the header add a case statement here to
+    // populate it.
+    static_assert(
+        IndexedInstrProf::ProfVersion::CurrentVersion == Version13,
+        "Please update the reading code below if a new field has been added, "
+        "if not add a case statement to fall through to the latest version.");
   case 13ull:
+    return kOnDiskSizeOffset + sizeof(Header::Size);
+  default:
+    break;
+  }
+
+  return make_error<InstrProfError>(instrprof_error::unsupported_version);
+}
+
+size_t Header::size() const {
+  const uint64_t ProfileVersion = GET_VERSION(formatVersion());
+  // Starting from version 13, the indexed profile records the byte size of
+  // header.
+  if (ProfileVersion >= ProfVersion::Version13) {
     assert(Size != 0 && "User can call Header::size() only after reading it "
-                        "from memory buffer");
+                        "from readMemoryBuffer");
     return Size;
+  }
+  switch (ProfileVersion) {
+    assert(ProfileVersion <= ProfVersion::Version12 &&
+           "Do not infer header size field for newer version");
   case 12ull:
     return offsetOf(&Header::VTableNamesOffset) +
            sizeof(Header::VTableNamesOffset);
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index df937226762744..c636ee624f3af4 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -36,6 +36,9 @@
 
 using namespace llvm;
 
+static cl::opt<bool> WriteFutureVersion("write-future-version",
+                                        cl::init(false));
+
 // A struct to define how the data stream should be patched. For Indexed
 // profiling, only uint64_t data type is needed.
 struct PatchItem {
@@ -578,6 +581,8 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   Header.Version = WritePrevVersion
                        ? IndexedInstrProf::ProfVersion::Version12
                        : IndexedInstrProf::ProfVersion::CurrentVersion;
+  if (WriteFutureVersion)
+    Header.Version = 14;
   // The WritePrevVersion handling will either need to be removed or updated
   // if the version is advanced beyond 13.
   // Starting from version 13, an indexed profile records the on-disk
@@ -648,10 +653,18 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   if (!WritePrevVersion)
     OS.write(0);
 
+  // Write a dummy value
+  if (WriteFutureVersion)
+    OS.write(0);
+
   // Record the OnDiskHeader Size after each header field either gets written or
   // gets its space reserved.
   uint64_t OnDiskHeaderSize = OS.tell() - StartOffset;
 
+  llvm::errs() << "OnDiskHeaderSize is " << OnDiskHeaderSize << "\n";
+  fflush(stdout);
+  fflush(stderr);
+
   // Reserve space to write profile summary data.
   uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size();
   uint32_t SummarySize = Summary::getSize(Summary::NumKinds, NumEntries);
diff --git a/llvm/test/tools/llvm-profdata/vtable-value-prof.test b/llvm/test/tools/llvm-profdata/vtable-value-prof.test
index 378c2e11b236ba..8c8dfb975ab67c 100644
--- a/llvm/test/tools/llvm-profdata/vtable-value-prof.test
+++ b/llvm/test/tools/llvm-profdata/vtable-value-prof.test
@@ -1,7 +1,7 @@
 ; RUN: rm -rf %t && mkdir %t && cd %t
 
 ; Generate indexed profiles from text profiles
-RUN: llvm-profdata merge %S/Inputs/vtable-value-prof.proftext -o indexed.profdata
+RUN: llvm-profdata merge -write-future-version %S/Inputs/vtable-value-prof.proftext -o indexed.profdata
 
 ; Show indexed profiles
 RUN: llvm-profdata show --function=main --ic-targets --show-vtables indexed.profdata | FileCheck %s --check-prefix=INDEXED



More information about the llvm-commits mailing list