[compiler-rt] [llvm] [InstrFDO]Allow indexed profile reader to parse compatible future versions and returns errors for incompatible ones. (PR #88212)

Mingming Liu via llvm-commits llvm-commits at lists.llvm.org
Sun Apr 28 16:11:00 PDT 2024


https://github.com/minglotus-6 updated https://github.com/llvm/llvm-project/pull/88212

>From ee8390560bb48c95a9dddf63fb9313ffe99b648b Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Tue, 9 Apr 2024 16:45:30 -0700
Subject: [PATCH 1/7] [InstrFDO]Record the on-disk header size in indexed
 profile header

---
 compiler-rt/include/profile/InstrProfData.inc |  2 +-
 llvm/include/llvm/ProfileData/InstrProf.h     |  6 +-
 .../llvm/ProfileData/InstrProfData.inc        |  2 +-
 llvm/lib/ProfileData/InstrProf.cpp            | 46 ++++++++------
 llvm/lib/ProfileData/InstrProfReader.cpp      |  2 +-
 llvm/lib/ProfileData/InstrProfWriter.cpp      | 60 ++++++++++---------
 .../tools/llvm-profdata/profile-version.test  |  4 +-
 7 files changed, 72 insertions(+), 50 deletions(-)

diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc
index e9866d94b762c1..36fb7fe989c391 100644
--- a/compiler-rt/include/profile/InstrProfData.inc
+++ b/compiler-rt/include/profile/InstrProfData.inc
@@ -710,7 +710,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
 /* Raw profile format version (start from 1). */
 #define INSTR_PROF_RAW_VERSION 10
 /* Indexed profile format version (start from 1). */
-#define INSTR_PROF_INDEX_VERSION 12
+#define INSTR_PROF_INDEX_VERSION 13
 /* Coverage mapping format version (start from 0). */
 #define INSTR_PROF_COVMAP_VERSION 6
 
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index eb3c10bcba1ca7..7072a20e083db5 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -1146,7 +1146,9 @@ enum ProfVersion {
   Version11 = 11,
   // VTable profiling,
   Version12 = 12,
-  // The current version is 12.
+  // Record the on-disk byte size of header.
+  Version13 = 13,
+  // The current version is 13.
   CurrentVersion = INSTR_PROF_INDEX_VERSION
 };
 const uint64_t Version = ProfVersion::CurrentVersion;
@@ -1174,6 +1176,8 @@ struct Header {
   uint64_t BinaryIdOffset;
   uint64_t TemporalProfTracesOffset;
   uint64_t VTableNamesOffset;
+  // The byte size of on-disk header.
+  uint64_t Size;
   // New fields should only be added at the end to ensure that the size
   // computation is correct. The methods below need to be updated to ensure that
   // the new field is read correctly.
diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc
index e9866d94b762c1..36fb7fe989c391 100644
--- a/llvm/include/llvm/ProfileData/InstrProfData.inc
+++ b/llvm/include/llvm/ProfileData/InstrProfData.inc
@@ -710,7 +710,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
 /* Raw profile format version (start from 1). */
 #define INSTR_PROF_RAW_VERSION 10
 /* Indexed profile format version (start from 1). */
-#define INSTR_PROF_INDEX_VERSION 12
+#define INSTR_PROF_INDEX_VERSION 13
 /* Coverage mapping format version (start from 0). */
 #define INSTR_PROF_COVMAP_VERSION 6
 
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 95f900d0fff1ca..200ae377e81abc 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -1598,11 +1598,11 @@ uint64_t Header::formatVersion() const {
 Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
   using namespace support;
   static_assert(std::is_standard_layout_v<Header>,
-                "The header should be standard layout type since we use offset "
-                "of fields to read.");
-  Header H;
+                "Keep the header a standard-layout class for simplicity");
 
-  H.Magic = read(Buffer, offsetOf(&Header::Magic));
+  Header H;
+  size_t BufferOffset = 0;
+  H.Magic = read(Buffer, BufferOffset);
   // Check the magic number.
   uint64_t Magic =
       endian::byte_swap<uint64_t, llvm::endianness::little>(H.Magic);
@@ -1610,36 +1610,46 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
     return make_error<InstrProfError>(instrprof_error::bad_magic);
 
   // Read the version.
-  H.Version = read(Buffer, offsetOf(&Header::Version));
-  if (GET_VERSION(H.formatVersion()) >
-      IndexedInstrProf::ProfVersion::CurrentVersion)
+  H.Version = read(Buffer, BufferOffset + sizeof(uint64_t));
+  const uint64_t ProfVersion = GET_VERSION(H.formatVersion());
+  if (ProfVersion > IndexedInstrProf::ProfVersion::CurrentVersion)
     return make_error<InstrProfError>(instrprof_error::unsupported_version);
 
-  switch (GET_VERSION(H.formatVersion())) {
+  size_t FieldByteOffset = H.size() - sizeof(uint64_t);
+
+  switch (ProfVersion) {
     // When a new field is added in the header add a case statement here to
     // populate it.
     static_assert(
-        IndexedInstrProf::ProfVersion::CurrentVersion == Version12,
+        IndexedInstrProf::ProfVersion::CurrentVersion == Version13,
         "Please update the reading code below if a new field has been added, "
         "if not add a case statement to fall through to the latest version.");
+  case 13ull:
+    FieldByteOffset -= sizeof(uint64_t);
+    [[fallthrough]];
   case 12ull:
-    H.VTableNamesOffset = read(Buffer, offsetOf(&Header::VTableNamesOffset));
+    H.VTableNamesOffset = read(Buffer, FieldByteOffset);
+    FieldByteOffset -= sizeof(uint64_t);
     [[fallthrough]];
   case 11ull:
     [[fallthrough]];
   case 10ull:
-    H.TemporalProfTracesOffset =
-        read(Buffer, offsetOf(&Header::TemporalProfTracesOffset));
+    H.TemporalProfTracesOffset = read(Buffer, FieldByteOffset);
+    FieldByteOffset -= sizeof(uint64_t);
     [[fallthrough]];
   case 9ull:
-    H.BinaryIdOffset = read(Buffer, offsetOf(&Header::BinaryIdOffset));
+    H.BinaryIdOffset = read(Buffer, FieldByteOffset);
+    FieldByteOffset -= sizeof(uint64_t);
     [[fallthrough]];
   case 8ull:
-    H.MemProfOffset = read(Buffer, offsetOf(&Header::MemProfOffset));
+    H.MemProfOffset = read(Buffer, FieldByteOffset);
+    FieldByteOffset -= sizeof(uint64_t);
     [[fallthrough]];
   default: // Version7 (when the backwards compatible header was introduced).
-    H.HashType = read(Buffer, offsetOf(&Header::HashType));
-    H.HashOffset = read(Buffer, offsetOf(&Header::HashOffset));
+    H.HashOffset = read(Buffer, FieldByteOffset);
+    FieldByteOffset -= sizeof(uint64_t);
+    H.HashType = read(Buffer, FieldByteOffset);
+    FieldByteOffset -= sizeof(uint64_t);
   }
 
   return H;
@@ -1650,10 +1660,12 @@ size_t Header::size() const {
     // When a new field is added to the header add a case statement here to
     // compute the size as offset of the new field + size of the new field. This
     // relies on the field being added to the end of the list.
-    static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version12,
+    static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version13,
                   "Please update the size computation below if a new field has "
                   "been added to the header, if not add a case statement to "
                   "fall through to the latest version.");
+  case 13ull:
+    return sizeof(uint64_t) * 10;
   case 12ull:
     return offsetOf(&Header::VTableNamesOffset) +
            sizeof(Header::VTableNamesOffset);
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index 884334ed070e8d..fef71f309ddd30 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -1215,7 +1215,7 @@ Error IndexedInstrProfReader::readHeader() {
   if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24)
     return error(instrprof_error::truncated);
 
-  auto HeaderOr = IndexedInstrProf::Header::readFromBuffer(Start);
+  auto HeaderOr = IndexedInstrProf::Header::readFromBuffer(Cur);
   if (!HeaderOr)
     return HeaderOr.takeError();
 
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 7c56cde3e6cedd..0c13b734746fdc 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -576,12 +576,16 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   IndexedInstrProf::Header Header;
   Header.Magic = IndexedInstrProf::Magic;
   Header.Version = WritePrevVersion
-                       ? IndexedInstrProf::ProfVersion::Version11
+                       ? IndexedInstrProf::ProfVersion::Version12
                        : IndexedInstrProf::ProfVersion::CurrentVersion;
   // The WritePrevVersion handling will either need to be removed or updated
-  // if the version is advanced beyond 12.
+  // if the version is advanced beyond 13.
+  // FIXME: Starting from version 13, indexed profile header has forward
+  // compatibility. Older tools can parse header and finds the starting byte for
+  // payload when reading profiles of newer version. By then we can clean up
+  // WritePrevVersion.
   assert(IndexedInstrProf::ProfVersion::CurrentVersion ==
-         IndexedInstrProf::ProfVersion::Version12);
+         IndexedInstrProf::ProfVersion::Version13);
   if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation))
     Header.Version |= VARIANT_MASK_IR_PROF;
   if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive))
@@ -606,6 +610,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   Header.TemporalProfTracesOffset = 0;
   Header.VTableNamesOffset = 0;
 
+  const uint64_t StartOffset = OS.tell();
   // Only write out the first four fields. We need to remember the offset of the
   // remaining fields to allow back patching later.
   for (int I = 0; I < 4; I++)
@@ -633,8 +638,10 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   OS.write(0);
 
   uint64_t VTableNamesOffset = OS.tell();
+  OS.write(0);
+
   if (!WritePrevVersion)
-    OS.write(0);
+    OS.write(OS.tell() - StartOffset);
 
   // Reserve space to write profile summary data.
   uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size();
@@ -703,34 +710,32 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
 
   uint64_t VTableNamesSectionStart = OS.tell();
 
-  if (!WritePrevVersion) {
-    std::vector<std::string> VTableNameStrs;
-    for (StringRef VTableName : VTableNames.keys())
-      VTableNameStrs.push_back(VTableName.str());
-
-    std::string CompressedVTableNames;
-    if (!VTableNameStrs.empty())
-      if (Error E = collectGlobalObjectNameStrings(
-              VTableNameStrs, compression::zlib::isAvailable(),
-              CompressedVTableNames))
-        return E;
+  std::vector<std::string> VTableNameStrs;
+  for (StringRef VTableName : VTableNames.keys())
+    VTableNameStrs.push_back(VTableName.str());
 
-    const uint64_t CompressedStringLen = CompressedVTableNames.length();
+  std::string CompressedVTableNames;
+  if (!VTableNameStrs.empty())
+    if (Error E = collectGlobalObjectNameStrings(
+            VTableNameStrs, compression::zlib::isAvailable(),
+            CompressedVTableNames))
+      return E;
 
-    // Record the length of compressed string.
-    OS.write(CompressedStringLen);
+  const uint64_t CompressedStringLen = CompressedVTableNames.length();
 
-    // Write the chars in compressed strings.
-    for (auto &c : CompressedVTableNames)
-      OS.writeByte(static_cast<uint8_t>(c));
+  // Record the length of compressed string.
+  OS.write(CompressedStringLen);
 
-    // Pad up to a multiple of 8.
-    // InstrProfReader could read bytes according to 'CompressedStringLen'.
-    const uint64_t PaddedLength = alignTo(CompressedStringLen, 8);
+  // Write the chars in compressed strings.
+  for (auto &c : CompressedVTableNames)
+    OS.writeByte(static_cast<uint8_t>(c));
 
-    for (uint64_t K = CompressedStringLen; K < PaddedLength; K++)
-      OS.writeByte(0);
-  }
+  // Pad up to a multiple of 8.
+  // InstrProfReader could read bytes according to 'CompressedStringLen'.
+  const uint64_t PaddedLength = alignTo(CompressedStringLen, 8);
+
+  for (uint64_t K = CompressedStringLen; K < PaddedLength; K++)
+    OS.writeByte(0);
 
   uint64_t TemporalProfTracesSectionStart = 0;
   if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile)) {
@@ -797,6 +802,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
         // Patch the Header.TemporalProfTracesOffset (=0 for profiles without
         // traces).
         {TemporalProfTracesOffset, &TemporalProfTracesSectionStart, 1},
+        {VTableNamesOffset, &VTableNamesSectionStart, 1},
         // Patch the summary data.
         {SummaryOffset, reinterpret_cast<uint64_t *>(TheSummary.get()),
          (int)(SummarySize / sizeof(uint64_t))},
diff --git a/llvm/test/tools/llvm-profdata/profile-version.test b/llvm/test/tools/llvm-profdata/profile-version.test
index cb68a648d5e5aa..b5405cdec46248 100644
--- a/llvm/test/tools/llvm-profdata/profile-version.test
+++ b/llvm/test/tools/llvm-profdata/profile-version.test
@@ -2,8 +2,8 @@ Test the profile version.
 
 RUN: llvm-profdata merge -o %t.profdata %p/Inputs/basic.proftext
 RUN: llvm-profdata show --profile-version %t.profdata | FileCheck %s
-CHECK: Profile version: 12
+CHECK: Profile version: 13
 
 RUN: llvm-profdata merge -o %t.prev.profdata %p/Inputs/basic.proftext --write-prev-version
 RUN: llvm-profdata show --profile-version %t.prev.profdata | FileCheck %s --check-prefix=PREV
-PREV: Profile version: 11
+PREV: Profile version: 12

>From a948d2890c0e198ba77d1c55e8c75703763e774f Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Tue, 9 Apr 2024 17:01:47 -0700
Subject: [PATCH 2/7] polish

---
 llvm/include/llvm/ProfileData/InstrProf.h |  3 ++-
 llvm/lib/ProfileData/InstrProf.cpp        |  7 +++----
 llvm/lib/ProfileData/InstrProfReader.cpp  |  2 +-
 llvm/lib/ProfileData/InstrProfWriter.cpp  | 10 ++++++----
 4 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 7072a20e083db5..8399a3340ccef9 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -1157,6 +1157,7 @@ const HashT HashType = HashT::MD5;
 
 inline uint64_t ComputeHash(StringRef K) { return ComputeHash(HashType, K); }
 
+constexpr unsigned kHeaderFieldSize = 10;
 // This structure defines the file header of the LLVM profile
 // data file in indexed-format. Please update llvm/docs/InstrProfileFormat.rst
 // as appropriate when updating the indexed profile format.
@@ -1176,7 +1177,7 @@ struct Header {
   uint64_t BinaryIdOffset;
   uint64_t TemporalProfTracesOffset;
   uint64_t VTableNamesOffset;
-  // The byte size of on-disk header.
+  // The on-disk byte size of the header.
   uint64_t Size;
   // New fields should only be added at the end to ensure that the size
   // computation is correct. The methods below need to be updated to ensure that
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 200ae377e81abc..c85520f523abcb 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -1601,8 +1601,7 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
                 "Keep the header a standard-layout class for simplicity");
 
   Header H;
-  size_t BufferOffset = 0;
-  H.Magic = read(Buffer, BufferOffset);
+  H.Magic = read(Buffer, 0);
   // Check the magic number.
   uint64_t Magic =
       endian::byte_swap<uint64_t, llvm::endianness::little>(H.Magic);
@@ -1610,7 +1609,7 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
     return make_error<InstrProfError>(instrprof_error::bad_magic);
 
   // Read the version.
-  H.Version = read(Buffer, BufferOffset + sizeof(uint64_t));
+  H.Version = read(Buffer, sizeof(uint64_t));
   const uint64_t ProfVersion = GET_VERSION(H.formatVersion());
   if (ProfVersion > IndexedInstrProf::ProfVersion::CurrentVersion)
     return make_error<InstrProfError>(instrprof_error::unsupported_version);
@@ -1665,7 +1664,7 @@ size_t Header::size() const {
                   "been added to the header, if not add a case statement to "
                   "fall through to the latest version.");
   case 13ull:
-    return sizeof(uint64_t) * 10;
+    return sizeof(uint64_t) * kHeaderFieldSize;
   case 12ull:
     return offsetOf(&Header::VTableNamesOffset) +
            sizeof(Header::VTableNamesOffset);
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index fef71f309ddd30..884334ed070e8d 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -1215,7 +1215,7 @@ Error IndexedInstrProfReader::readHeader() {
   if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24)
     return error(instrprof_error::truncated);
 
-  auto HeaderOr = IndexedInstrProf::Header::readFromBuffer(Cur);
+  auto HeaderOr = IndexedInstrProf::Header::readFromBuffer(Start);
   if (!HeaderOr)
     return HeaderOr.takeError();
 
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 0c13b734746fdc..837655656f9d4d 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -580,10 +580,12 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
                        : IndexedInstrProf::ProfVersion::CurrentVersion;
   // The WritePrevVersion handling will either need to be removed or updated
   // if the version is advanced beyond 13.
-  // FIXME: Starting from version 13, indexed profile header has forward
-  // compatibility. Older tools can parse header and finds the starting byte for
-  // payload when reading profiles of newer version. By then we can clean up
-  // WritePrevVersion.
+  // Starting from version 13, an indexed profile records the on-disk
+  // byte size of header at a fixed byte offset. Compilers or tools built
+  // starting from this version can read the on-disk byte size (as opposed to
+  // relying on struct definition of Header) to locate the start of payload
+  // sections.
+  // FIXME: Clean up WritePrevVersion later.
   assert(IndexedInstrProf::ProfVersion::CurrentVersion ==
          IndexedInstrProf::ProfVersion::Version13);
   if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation))

>From 3ac8075b1f893e91f4a3bd5495f4494c87b752eb Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Tue, 9 Apr 2024 17:58:56 -0700
Subject: [PATCH 3/7] Fix the write and read of 'Size' field

---
 llvm/include/llvm/ProfileData/InstrProf.h |  8 +++---
 llvm/lib/ProfileData/InstrProf.cpp        | 31 ++++++++++++++---------
 llvm/lib/ProfileData/InstrProfWriter.cpp  | 10 +++++++-
 3 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 8399a3340ccef9..48fd4aebcd1a6c 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -1157,7 +1157,6 @@ const HashT HashType = HashT::MD5;
 
 inline uint64_t ComputeHash(StringRef K) { return ComputeHash(HashType, K); }
 
-constexpr unsigned kHeaderFieldSize = 10;
 // This structure defines the file header of the LLVM profile
 // data file in indexed-format. Please update llvm/docs/InstrProfileFormat.rst
 // as appropriate when updating the indexed profile format.
@@ -1177,8 +1176,11 @@ struct Header {
   uint64_t BinaryIdOffset;
   uint64_t TemporalProfTracesOffset;
   uint64_t VTableNamesOffset;
-  // The on-disk byte size of the header.
-  uint64_t Size;
+
+  // The on-disk byte size of the header. As with other fields, do not read its
+  // value before calling readFromBuffer.
+  uint64_t Size = 0;
+
   // New fields should only be added at the end to ensure that the size
   // computation is correct. The methods below need to be updated to ensure that
   // the new field is read correctly.
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index c85520f523abcb..27eecce73439eb 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -1610,13 +1610,17 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
 
   // Read the version.
   H.Version = read(Buffer, sizeof(uint64_t));
-  const uint64_t ProfVersion = GET_VERSION(H.formatVersion());
-  if (ProfVersion > IndexedInstrProf::ProfVersion::CurrentVersion)
+  const uint64_t ProfileVersion = GET_VERSION(H.formatVersion());
+  if (ProfileVersion > IndexedInstrProf::ProfVersion::CurrentVersion)
     return make_error<InstrProfError>(instrprof_error::unsupported_version);
 
-  size_t FieldByteOffset = H.size() - sizeof(uint64_t);
+  constexpr size_t kOnDiskSizeOffset = 9 * sizeof(uint64_t);
+  if (ProfileVersion >= ProfVersion::Version13)
+    H.Size = read(Buffer, kOnDiskSizeOffset);
 
-  switch (ProfVersion) {
+  size_t FieldByteOffset = H.size();
+
+  switch (ProfileVersion) {
     // When a new field is added in the header add a case statement here to
     // populate it.
     static_assert(
@@ -1624,31 +1628,32 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
         "Please update the reading code below if a new field has been added, "
         "if not add a case statement to fall through to the latest version.");
   case 13ull:
-    FieldByteOffset -= sizeof(uint64_t);
+    // Size field is already read.
+    FieldByteOffset -= sizeof(Header::Size);
     [[fallthrough]];
   case 12ull:
+    FieldByteOffset -= sizeof(Header::VTableNamesOffset);
     H.VTableNamesOffset = read(Buffer, FieldByteOffset);
-    FieldByteOffset -= sizeof(uint64_t);
     [[fallthrough]];
   case 11ull:
     [[fallthrough]];
   case 10ull:
+    FieldByteOffset -= sizeof(Header::TemporalProfTracesOffset);
     H.TemporalProfTracesOffset = read(Buffer, FieldByteOffset);
-    FieldByteOffset -= sizeof(uint64_t);
     [[fallthrough]];
   case 9ull:
+    FieldByteOffset -= sizeof(Header::BinaryIdOffset);
     H.BinaryIdOffset = read(Buffer, FieldByteOffset);
-    FieldByteOffset -= sizeof(uint64_t);
     [[fallthrough]];
   case 8ull:
+    FieldByteOffset -= sizeof(Header::MemProfOffset);
     H.MemProfOffset = read(Buffer, FieldByteOffset);
-    FieldByteOffset -= sizeof(uint64_t);
     [[fallthrough]];
   default: // Version7 (when the backwards compatible header was introduced).
+    FieldByteOffset -= sizeof(Header::HashOffset);
     H.HashOffset = read(Buffer, FieldByteOffset);
-    FieldByteOffset -= sizeof(uint64_t);
+    FieldByteOffset -= sizeof(Header::HashType);
     H.HashType = read(Buffer, FieldByteOffset);
-    FieldByteOffset -= sizeof(uint64_t);
   }
 
   return H;
@@ -1664,7 +1669,9 @@ size_t Header::size() const {
                   "been added to the header, if not add a case statement to "
                   "fall through to the latest version.");
   case 13ull:
-    return sizeof(uint64_t) * kHeaderFieldSize;
+    assert(Size != 0 && "User can call Header::size() only after reading it "
+                        "from memory buffer");
+    return Size;
   case 12ull:
     return offsetOf(&Header::VTableNamesOffset) +
            sizeof(Header::VTableNamesOffset);
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 837655656f9d4d..df937226762744 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -642,8 +642,15 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   uint64_t VTableNamesOffset = OS.tell();
   OS.write(0);
 
+  // Record the offset of 'Header::Size' field and reserve the space to allow
+  // back patching later.
+  const uint64_t OnDiskHeaderSizeOffset = OS.tell();
   if (!WritePrevVersion)
-    OS.write(OS.tell() - StartOffset);
+    OS.write(0);
+
+  // Record the OnDiskHeader Size after each header field either gets written or
+  // gets its space reserved.
+  uint64_t OnDiskHeaderSize = OS.tell() - StartOffset;
 
   // Reserve space to write profile summary data.
   uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size();
@@ -784,6 +791,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
         // traces).
         {TemporalProfTracesOffset, &TemporalProfTracesSectionStart, 1},
         {VTableNamesOffset, &VTableNamesSectionStart, 1},
+        {OnDiskHeaderSizeOffset, &OnDiskHeaderSize, 1},
         // Patch the summary data.
         {SummaryOffset, reinterpret_cast<uint64_t *>(TheSummary.get()),
          (int)(SummarySize / sizeof(uint64_t))},

>From 34b0136976691bdaa7cb88bef233f088e3efa2b5 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Wed, 10 Apr 2024 08:28:16 -0700
Subject: [PATCH 4/7] Update FieldByteOffset to be the last known header field
 that's understandable by the binary

---
 llvm/lib/ProfileData/InstrProf.cpp | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 27eecce73439eb..6b4ca0f86ecb5e 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -1615,10 +1615,22 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
     return make_error<InstrProfError>(instrprof_error::unsupported_version);
 
   constexpr size_t kOnDiskSizeOffset = 9 * sizeof(uint64_t);
+  // `FieldByteOffset` represents the end byte of the last known header field.
+  size_t FieldByteOffset = 0;
   if (ProfileVersion >= ProfVersion::Version13)
     H.Size = read(Buffer, kOnDiskSizeOffset);
 
-  size_t FieldByteOffset = H.size();
+  if (ProfileVersion < ProfVersion::Version13)
+    FieldByteOffset = H.size();
+  else {
+    assert(ProfileVersion == ProfVersion::Version13 &&
+           "The newest known version is version 13");
+    FieldByteOffset = kOnDiskSizeOffset + sizeof(Header::Size);
+  }
+
+  assert(FieldByteOffset != 0 &&
+         "FieldByteOffset specifies the byte offset of the last known field in "
+         "header and should not be zero");
 
   switch (ProfileVersion) {
     // When a new field is added in the header add a case statement here to

>From 7cd93b1ec270e8d8d3c4f310a72f3f9833690604 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Thu, 11 Apr 2024 12:18:01 -0700
Subject: [PATCH 5/7] Allow unknown future versions in profile reader.

- Before this, profile reader check and rejects unknown future versions.
  This prevents unknown errors when the semantics change in an existing
  payload section.
- After this change, we want to allow profile reader to skip unknown new
  sections but still prevent unknown errors upon semantic changes.
- One middle ground is to record the compatible versions in the
  profiles. Profile readers can read back the compatible versions and
  rejects a profile if the intersection between reader-supported version
  and profile-compatible version is empty. Still working on finalizing
  the solution.
---
 llvm/include/llvm/ProfileData/InstrProf.h     | 10 ++-
 llvm/lib/ProfileData/InstrProf.cpp            | 78 +++++++++++++------
 llvm/lib/ProfileData/InstrProfWriter.cpp      | 13 ++++
 .../llvm-profdata/vtable-value-prof.test      |  2 +-
 4 files changed, 75 insertions(+), 28 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 48fd4aebcd1a6c..dff3424c52c87d 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -1188,12 +1188,14 @@ struct Header {
   // Reads a header struct from the buffer.
   static Expected<Header> readFromBuffer(const unsigned char *Buffer);
 
-  // Returns the size of the header in bytes for all valid fields based on the
-  // version. I.e a older version header will return a smaller size.
+  // Returns the on-disk byte size of the header for all valid fields based on
+  // the version.
   size_t size() const;
 
-  // Returns the format version in little endian. The header retains the version
-  // in native endian of the compiler runtime.
+  Expected<size_t> knownFieldsEndByteOffset() const;
+
+  // Returns the format version in little endian. The header retains the
+  // version in native endian of the compiler runtime.
   uint64_t formatVersion() const;
 };
 
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 6b4ca0f86ecb5e..a9af6b2484a7e4 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -1595,6 +1595,8 @@ uint64_t Header::formatVersion() const {
   return endian::byte_swap<uint64_t, llvm::endianness::little>(Version);
 }
 
+constexpr size_t kOnDiskSizeOffset = 9 * sizeof(uint64_t);
+
 Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
   using namespace support;
   static_assert(std::is_standard_layout_v<Header>,
@@ -1610,28 +1612,36 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
 
   // Read the version.
   H.Version = read(Buffer, sizeof(uint64_t));
-  const uint64_t ProfileVersion = GET_VERSION(H.formatVersion());
-  if (ProfileVersion > IndexedInstrProf::ProfVersion::CurrentVersion)
-    return make_error<InstrProfError>(instrprof_error::unsupported_version);
+  uint64_t ProfileVersion = GET_VERSION(H.formatVersion());
+
+  if (ProfileVersion > IndexedInstrProf::ProfVersion::CurrentVersion) {
+    if (CurrentVersion < ProfVersion::Version13)
+      return make_error<InstrProfError>(instrprof_error::unsupported_version);
+
+    // FIXME: When the semantic of an existing payload section changes, allowing
+    // profile reader to interpret new profiles using the old code in the
+    // current way is too error-prone.
+    // Should the payload section record its own version and fail the profile
+    // reader to require a re-build of the profile reader in this case?
+  }
 
-  constexpr size_t kOnDiskSizeOffset = 9 * sizeof(uint64_t);
-  // `FieldByteOffset` represents the end byte of the last known header field.
-  size_t FieldByteOffset = 0;
   if (ProfileVersion >= ProfVersion::Version13)
     H.Size = read(Buffer, kOnDiskSizeOffset);
 
-  if (ProfileVersion < ProfVersion::Version13)
-    FieldByteOffset = H.size();
-  else {
-    assert(ProfileVersion == ProfVersion::Version13 &&
-           "The newest known version is version 13");
-    FieldByteOffset = kOnDiskSizeOffset + sizeof(Header::Size);
-  }
+  // `FieldByteOffset` represents the end byte of the last known header field.
+  auto FieldByteOffsetOrErr = H.knownFieldsEndByteOffset();
+  if (FieldByteOffsetOrErr)
 
   assert(FieldByteOffset != 0 &&
          "FieldByteOffset specifies the byte offset of the last known field in "
          "header and should not be zero");
 
+  // If the version from profile is higher than the currently supported version,
+  // read the known defined header fields and discard the rest.
+  if (ProfileVersion > ProfVersion::CurrentVersion)
+    ProfileVersion = ProfVersion::CurrentVersion;
+
+  // Initialize header fields based on the currently supported version.
   switch (ProfileVersion) {
     // When a new field is added in the header add a case statement here to
     // populate it.
@@ -1671,19 +1681,41 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
   return H;
 }
 
-size_t Header::size() const {
-  switch (GET_VERSION(formatVersion())) {
-    // When a new field is added to the header add a case statement here to
-    // compute the size as offset of the new field + size of the new field. This
-    // relies on the field being added to the end of the list.
-    static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version13,
-                  "Please update the size computation below if a new field has "
-                  "been added to the header, if not add a case statement to "
-                  "fall through to the latest version.");
+Expected<size_t> Header::knownFieldsEndByteOffset() const {
+  const uint64_t ProfileVersion = GET_VERSION(formatVersion());
+  if (ProfileVersion <= ProfVersion::Version12)
+    return this->size();
+
+  // Starting from version 13, the known field end byte offset is inferred based
+  // on the currently supported version.
+  switch (IndexedInstrProf::ProfVersion::CurrentVersion) {
+    // When a new field is added in the header add a case statement here to
+    // populate it.
+    static_assert(
+        IndexedInstrProf::ProfVersion::CurrentVersion == Version13,
+        "Please update the reading code below if a new field has been added, "
+        "if not add a case statement to fall through to the latest version.");
   case 13ull:
+    return kOnDiskSizeOffset + sizeof(Header::Size);
+  default:
+    break;
+  }
+
+  return make_error<InstrProfError>(instrprof_error::unsupported_version);
+}
+
+size_t Header::size() const {
+  const uint64_t ProfileVersion = GET_VERSION(formatVersion());
+  // Starting from version 13, the indexed profile records the byte size of
+  // header.
+  if (ProfileVersion >= ProfVersion::Version13) {
     assert(Size != 0 && "User can call Header::size() only after reading it "
-                        "from memory buffer");
+                        "from readMemoryBuffer");
     return Size;
+  }
+  switch (ProfileVersion) {
+    assert(ProfileVersion <= ProfVersion::Version12 &&
+           "Do not infer header size field for newer version");
   case 12ull:
     return offsetOf(&Header::VTableNamesOffset) +
            sizeof(Header::VTableNamesOffset);
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index df937226762744..c636ee624f3af4 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -36,6 +36,9 @@
 
 using namespace llvm;
 
+static cl::opt<bool> WriteFutureVersion("write-future-version",
+                                        cl::init(false));
+
 // A struct to define how the data stream should be patched. For Indexed
 // profiling, only uint64_t data type is needed.
 struct PatchItem {
@@ -578,6 +581,8 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   Header.Version = WritePrevVersion
                        ? IndexedInstrProf::ProfVersion::Version12
                        : IndexedInstrProf::ProfVersion::CurrentVersion;
+  if (WriteFutureVersion)
+    Header.Version = 14;
   // The WritePrevVersion handling will either need to be removed or updated
   // if the version is advanced beyond 13.
   // Starting from version 13, an indexed profile records the on-disk
@@ -648,10 +653,18 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   if (!WritePrevVersion)
     OS.write(0);
 
+  // Write a dummy value
+  if (WriteFutureVersion)
+    OS.write(0);
+
   // Record the OnDiskHeader Size after each header field either gets written or
   // gets its space reserved.
   uint64_t OnDiskHeaderSize = OS.tell() - StartOffset;
 
+  llvm::errs() << "OnDiskHeaderSize is " << OnDiskHeaderSize << "\n";
+  fflush(stdout);
+  fflush(stderr);
+
   // Reserve space to write profile summary data.
   uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size();
   uint32_t SummarySize = Summary::getSize(Summary::NumKinds, NumEntries);
diff --git a/llvm/test/tools/llvm-profdata/vtable-value-prof.test b/llvm/test/tools/llvm-profdata/vtable-value-prof.test
index 378c2e11b236ba..8c8dfb975ab67c 100644
--- a/llvm/test/tools/llvm-profdata/vtable-value-prof.test
+++ b/llvm/test/tools/llvm-profdata/vtable-value-prof.test
@@ -1,7 +1,7 @@
 ; RUN: rm -rf %t && mkdir %t && cd %t
 
 ; Generate indexed profiles from text profiles
-RUN: llvm-profdata merge %S/Inputs/vtable-value-prof.proftext -o indexed.profdata
+RUN: llvm-profdata merge -write-future-version %S/Inputs/vtable-value-prof.proftext -o indexed.profdata
 
 ; Show indexed profiles
 RUN: llvm-profdata show --function=main --ic-targets --show-vtables indexed.profdata | FileCheck %s --check-prefix=INDEXED

>From ea7e84e60d924aa1f239f057f9e87c7295b8a033 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Wed, 24 Apr 2024 16:22:58 -0700
Subject: [PATCH 6/7] Add Header::MinimumProfileReaderVersion to detect
 incompatible changes

---
 llvm/include/llvm/ProfileData/InstrProf.h     | 31 ++++++-
 .../llvm/ProfileData/InstrProfWriter.h        | 31 ++++++-
 llvm/lib/ProfileData/InstrProf.cpp            | 90 ++++++++++++-------
 llvm/lib/ProfileData/InstrProfWriter.cpp      | 77 ++++++++++------
 .../tools/llvm-profdata/profile-version.test  |  1 +
 .../llvm-profdata/vtable-value-prof.test      |  2 +-
 llvm/unittests/ProfileData/InstrProfTest.cpp  | 55 ++++++++++++
 7 files changed, 219 insertions(+), 68 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index dff3424c52c87d..f534eeb682f812 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -368,6 +368,7 @@ enum class instrprof_error {
   zlib_unavailable,
   raw_profile_version_mismatch,
   counter_value_too_large,
+  unsupported_incompatible_future_version,
 };
 
 /// An ordered list of functions identified by their NameRef found in
@@ -1146,7 +1147,13 @@ enum ProfVersion {
   Version11 = 11,
   // VTable profiling,
   Version12 = 12,
-  // Record the on-disk byte size of header.
+  // Two additional header fields to add partial forward compatibility.
+  // Indexed profile reader compares the latest version it can parse with the
+  // minimum version required by (and recorded in) the profile; if the reader
+  // can reasonably interprets the payload, it proceeds to parse known sections
+  // and skip unknown sections; otherwise it stops reading and throws error
+  // (users should update compilers and/or command line tools to parse profiles
+  // with newer versions).
   Version13 = 13,
   // The current version is 13.
   CurrentVersion = INSTR_PROF_INDEX_VERSION
@@ -1177,9 +1184,24 @@ struct Header {
   uint64_t TemporalProfTracesOffset;
   uint64_t VTableNamesOffset;
 
-  // The on-disk byte size of the header. As with other fields, do not read its
-  // value before calling readFromBuffer.
-  uint64_t Size = 0;
+  // Records the on-disk byte size of the header.
+  uint64_t OnDiskByteSize = 0;
+
+  // Indexed profile writer will records the minimum profile reader version
+  // required to parse this profile. If a profile reader's supported version is
+  // smaller than what's recorded in this field, the profile reader (in
+  // compilers or command line tools) should be updated.
+  //
+  // Example scenario:
+  // The semantics of an existing section change starting from version V + 1
+  // (with readers supporting both versions to preserve backward compatibility),
+  // indexed profile writer should record `MinimumProfileReaderVersion` as V
+  // + 1. Previously-built profile readers won't know how to interpret the
+  // existing section correctly; these readers will find the
+  // `MinimumProfileReaderVersion` recorded in the profile is higher than the
+  // readers' supported version (a constant baked in the library), and stop the
+  // read process.
+  uint64_t MinimumProfileReaderVersion = 0;
 
   // New fields should only be added at the end to ensure that the size
   // computation is correct. The methods below need to be updated to ensure that
@@ -1192,6 +1214,7 @@ struct Header {
   // the version.
   size_t size() const;
 
+  // Returns the end byte offset of all known fields by the reader.
   Expected<size_t> knownFieldsEndByteOffset() const;
 
   // Returns the format version in little endian. The header retains the
diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h
index 4b42392bc1e061..8756710c7f7e84 100644
--- a/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -40,6 +40,7 @@ class InstrProfWriter {
 
 private:
   bool Sparse;
+
   StringMap<ProfilingData> FunctionData;
   /// The maximum length of a single temporal profile trace.
   uint64_t MaxTemporalProfTraceLength;
@@ -81,6 +82,25 @@ class InstrProfWriter {
   // The MemProf version we should write.
   memprof::IndexedVersion MemProfVersionRequested;
 
+  // Returns the profile version in uint32_t.
+  // Header.Version is uint64_t with the lowest 32 bits specifying profile
+  // version and the most significant bits specyfing profile flavors.
+  uint32_t profileVersion() const;
+
+  // Returns the minimum profile reader version required to parse this profile.
+  uint64_t minProfileReaderVersion() const;
+
+  // The following fields are used in unit tests only.
+  // If not std::nullopt, this field overwrites the lowest 32 bits of
+  // Header::Version in the generated profile.
+  std::optional<uint32_t> ProfileVersion = std::nullopt;
+  // If true, profile writer will append one 64-bit dummy value as unknown
+  // header fields.
+  bool AppendAdditionalHeaderFields = false;
+  // If not std::nullopt, this field overwrites
+  // Header::MinimumProfileReaderVersion in the generated profile.
+  std::optional<int> MinCompatibleReaderProfileVersion = std::nullopt;
+
 public:
   InstrProfWriter(
       bool Sparse = false, uint64_t TemporalProfTraceReservoirSize = 0,
@@ -187,15 +207,20 @@ class InstrProfWriter {
     return static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage);
   }
 
-  // Internal interface for testing purpose only.
-  void setValueProfDataEndianness(llvm::endianness Endianness);
-  void setOutputSparse(bool Sparse);
   // Compute the overlap b/w this object and Other. Program level result is
   // stored in Overlap and function level result is stored in FuncLevelOverlap.
   void overlapRecord(NamedInstrProfRecord &&Other, OverlapStats &Overlap,
                      OverlapStats &FuncLevelOverlap,
                      const OverlapFuncFilters &FuncFilter);
 
+  // Internal interface for testing purpose only.
+  void setValueProfDataEndianness(llvm::endianness Endianness);
+  void setOutputSparse(bool Sparse);
+  void setProfileVersion(uint32_t Version);
+  void setMinCompatibleReaderProfileVersion(uint32_t Version);
+  void setAppendAdditionalHeaderFields();
+  void resetTestOnlyStatesForHeaderSection();
+
 private:
   void addRecord(StringRef Name, uint64_t Hash, InstrProfRecord &&I,
                  uint64_t Weight, function_ref<void(Error)> Warn);
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index a9af6b2484a7e4..6f519edfc8dbde 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -38,6 +38,7 @@
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Path.h"
@@ -165,6 +166,11 @@ static std::string getInstrProfErrString(instrprof_error Err,
   case instrprof_error::counter_value_too_large:
     OS << "excessively large counter value suggests corrupted profile data";
     break;
+  case instrprof_error::unsupported_incompatible_future_version:
+    OS << "unsupported incompatible future version. The profile is likely "
+          "generated from newer released compilers/tools and not parsable by "
+          "current reader.";
+    break;
   }
 
   // If optional error message is not empty, append it to the message.
@@ -1612,25 +1618,37 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
 
   // Read the version.
   H.Version = read(Buffer, sizeof(uint64_t));
-  uint64_t ProfileVersion = GET_VERSION(H.formatVersion());
-
-  if (ProfileVersion > IndexedInstrProf::ProfVersion::CurrentVersion) {
-    if (CurrentVersion < ProfVersion::Version13)
-      return make_error<InstrProfError>(instrprof_error::unsupported_version);
-
-    // FIXME: When the semantic of an existing payload section changes, allowing
-    // profile reader to interpret new profiles using the old code in the
-    // current way is too error-prone.
-    // Should the payload section record its own version and fail the profile
-    // reader to require a re-build of the profile reader in this case?
+  uint64_t ProfileRecordedProfileVersion = GET_VERSION(H.formatVersion());
+
+  if (ProfileRecordedProfileVersion >= ProfVersion::Version13) {
+    // Starting from version 13, profiles should records header's on-disk byte
+    // size and the minimum profile reader version required.
+    H.OnDiskByteSize = read(Buffer, kOnDiskSizeOffset);
+    H.MinimumProfileReaderVersion =
+        read(Buffer, kOnDiskSizeOffset + sizeof(uint64_t));
+  } else {
+    // Prior to version 13, the largest version supported by the reader
+    // (ProfVersion::CurrentVersion) must be greater than or equal to the
+    // profile-recorded version.
+    H.MinimumProfileReaderVersion = ProfileRecordedProfileVersion;
   }
 
-  if (ProfileVersion >= ProfVersion::Version13)
-    H.Size = read(Buffer, kOnDiskSizeOffset);
+  // Stop reading and return error if the largest version supported by the
+  // reader falls behind the minimum reader version required by the profiles.
+  if (IndexedInstrProf::ProfVersion::CurrentVersion <
+      H.MinimumProfileReaderVersion)
+    return make_error<InstrProfError>(
+        instrprof_error::unsupported_incompatible_future_version,
+        formatv("Profile reader should support {0} to parse profile of version "
+                "{1} ",
+                H.MinimumProfileReaderVersion, ProfileRecordedProfileVersion));
 
   // `FieldByteOffset` represents the end byte of the last known header field.
   auto FieldByteOffsetOrErr = H.knownFieldsEndByteOffset();
-  if (FieldByteOffsetOrErr)
+  if (Error E = FieldByteOffsetOrErr.takeError())
+    return E;
+
+  auto FieldByteOffset = FieldByteOffsetOrErr.get();
 
   assert(FieldByteOffset != 0 &&
          "FieldByteOffset specifies the byte offset of the last known field in "
@@ -1638,11 +1656,11 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
 
   // If the version from profile is higher than the currently supported version,
   // read the known defined header fields and discard the rest.
-  if (ProfileVersion > ProfVersion::CurrentVersion)
-    ProfileVersion = ProfVersion::CurrentVersion;
+  if (ProfileRecordedProfileVersion > ProfVersion::CurrentVersion)
+    ProfileRecordedProfileVersion = ProfVersion::CurrentVersion;
 
   // Initialize header fields based on the currently supported version.
-  switch (ProfileVersion) {
+  switch (ProfileRecordedProfileVersion) {
     // When a new field is added in the header add a case statement here to
     // populate it.
     static_assert(
@@ -1651,7 +1669,8 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
         "if not add a case statement to fall through to the latest version.");
   case 13ull:
     // Size field is already read.
-    FieldByteOffset -= sizeof(Header::Size);
+    FieldByteOffset -= sizeof(Header::OnDiskByteSize);
+    FieldByteOffset -= sizeof(Header::MinimumProfileReaderVersion);
     [[fallthrough]];
   case 12ull:
     FieldByteOffset -= sizeof(Header::VTableNamesOffset);
@@ -1682,21 +1701,23 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
 }
 
 Expected<size_t> Header::knownFieldsEndByteOffset() const {
-  const uint64_t ProfileVersion = GET_VERSION(formatVersion());
-  if (ProfileVersion <= ProfVersion::Version12)
+  // All header fields are known before, so the end byte offset of known fields
+  // is the same as header byte size.
+  const uint64_t ProfileRecordedProfileVersion = GET_VERSION(formatVersion());
+  if (ProfileRecordedProfileVersion <= ProfVersion::Version12)
     return this->size();
 
-  // Starting from version 13, the known field end byte offset is inferred based
-  // on the currently supported version.
+  // Starting from version 13, the known field end byte offset is calculated
+  // based on the currently supported version recorded in the reader.
   switch (IndexedInstrProf::ProfVersion::CurrentVersion) {
     // When a new field is added in the header add a case statement here to
     // populate it.
-    static_assert(
-        IndexedInstrProf::ProfVersion::CurrentVersion == Version13,
-        "Please update the reading code below if a new field has been added, "
-        "if not add a case statement to fall through to the latest version.");
+    static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version13,
+                  "If current version bumps, please update the case below to "
+                  "calculate the known field end byte offset.");
   case 13ull:
-    return kOnDiskSizeOffset + sizeof(Header::Size);
+    return kOnDiskSizeOffset + sizeof(Header::OnDiskByteSize) +
+           sizeof(Header::MinimumProfileReaderVersion);
   default:
     break;
   }
@@ -1705,16 +1726,17 @@ Expected<size_t> Header::knownFieldsEndByteOffset() const {
 }
 
 size_t Header::size() const {
-  const uint64_t ProfileVersion = GET_VERSION(formatVersion());
+  const uint64_t ProfileRecordedProfileVersion = GET_VERSION(formatVersion());
   // Starting from version 13, the indexed profile records the byte size of
   // header.
-  if (ProfileVersion >= ProfVersion::Version13) {
-    assert(Size != 0 && "User can call Header::size() only after reading it "
-                        "from readMemoryBuffer");
-    return Size;
+  if (ProfileRecordedProfileVersion >= ProfVersion::Version13) {
+    assert(OnDiskByteSize != 0 &&
+           "User can call Header::size() only after reading it "
+           "from readMemoryBuffer");
+    return OnDiskByteSize;
   }
-  switch (ProfileVersion) {
-    assert(ProfileVersion <= ProfVersion::Version12 &&
+  switch (ProfileRecordedProfileVersion) {
+    assert(ProfileRecordedProfileVersion <= ProfVersion::Version12 &&
            "Do not infer header size field for newer version");
   case 12ull:
     return offsetOf(&Header::VTableNamesOffset) +
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index c636ee624f3af4..aa61d0f8e3decb 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -36,9 +36,6 @@
 
 using namespace llvm;
 
-static cl::opt<bool> WriteFutureVersion("write-future-version",
-                                        cl::init(false));
-
 // A struct to define how the data stream should be patched. For Indexed
 // profiling, only uint64_t data type is needed.
 struct PatchItem {
@@ -196,7 +193,7 @@ InstrProfWriter::InstrProfWriter(
 
 InstrProfWriter::~InstrProfWriter() { delete InfoObj; }
 
-// Internal interface for testing purpose only.
+// Begin: Internal interface for testing purpose only.
 void InstrProfWriter::setValueProfDataEndianness(llvm::endianness Endianness) {
   InfoObj->ValueProfDataEndianness = Endianness;
 }
@@ -205,6 +202,25 @@ void InstrProfWriter::setOutputSparse(bool Sparse) {
   this->Sparse = Sparse;
 }
 
+void InstrProfWriter::setProfileVersion(uint32_t Version) {
+  ProfileVersion = std::make_optional(Version);
+}
+
+void InstrProfWriter::setMinCompatibleReaderProfileVersion(uint32_t Version) {
+  MinCompatibleReaderProfileVersion = std::make_optional(Version);
+}
+
+void InstrProfWriter::setAppendAdditionalHeaderFields() {
+  AppendAdditionalHeaderFields = true;
+}
+
+void InstrProfWriter::resetTestOnlyStatesForHeaderSection() {
+  ProfileVersion = std::nullopt;
+  MinCompatibleReaderProfileVersion = std::nullopt;
+  AppendAdditionalHeaderFields = false;
+}
+// End: Internal interface for testing purpose only.
+
 void InstrProfWriter::addRecord(NamedInstrProfRecord &&I, uint64_t Weight,
                                 function_ref<void(Error)> Warn) {
   auto Name = I.Name;
@@ -555,6 +571,24 @@ static Error writeMemProf(
               memprof::MaximumSupportedVersion));
 }
 
+uint32_t InstrProfWriter::profileVersion() const {
+  // ProfileVersion is set in tests only.
+  if (this->ProfileVersion)
+    return *(this->ProfileVersion);
+
+  uint64_t CurVersion = IndexedInstrProf::ProfVersion::CurrentVersion;
+
+  return WritePrevVersion ? CurVersion - 1 : CurVersion;
+}
+
+uint64_t InstrProfWriter::minProfileReaderVersion() const {
+  // MinCompatibleReaderProfileVersion is set in tests only.
+  if (this->MinCompatibleReaderProfileVersion)
+    return *(this->MinCompatibleReaderProfileVersion);
+
+  return IndexedInstrProf::ProfVersion::Version13;
+}
+
 Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   using namespace IndexedInstrProf;
   using namespace support;
@@ -578,19 +612,8 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   // Write the header.
   IndexedInstrProf::Header Header;
   Header.Magic = IndexedInstrProf::Magic;
-  Header.Version = WritePrevVersion
-                       ? IndexedInstrProf::ProfVersion::Version12
-                       : IndexedInstrProf::ProfVersion::CurrentVersion;
-  if (WriteFutureVersion)
-    Header.Version = 14;
-  // The WritePrevVersion handling will either need to be removed or updated
-  // if the version is advanced beyond 13.
-  // Starting from version 13, an indexed profile records the on-disk
-  // byte size of header at a fixed byte offset. Compilers or tools built
-  // starting from this version can read the on-disk byte size (as opposed to
-  // relying on struct definition of Header) to locate the start of payload
-  // sections.
-  // FIXME: Clean up WritePrevVersion later.
+  Header.Version = profileVersion();
+
   assert(IndexedInstrProf::ProfVersion::CurrentVersion ==
          IndexedInstrProf::ProfVersion::Version13);
   if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation))
@@ -647,24 +670,26 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   uint64_t VTableNamesOffset = OS.tell();
   OS.write(0);
 
-  // Record the offset of 'Header::Size' field and reserve the space to allow
-  // back patching later.
+  // Record the offset of 'Header::Size' field.
   const uint64_t OnDiskHeaderSizeOffset = OS.tell();
-  if (!WritePrevVersion)
+
+  if (!WritePrevVersion) {
+    // Reserve the space for `OnDiskByteSize` to allow back patching later.
     OS.write(0);
 
-  // Write a dummy value
-  if (WriteFutureVersion)
+    // Write the minimum compatible version required.
+    OS.write(minProfileReaderVersion());
+  }
+
+  // This is a test-only path to append dummy header fields.
+  // NOTE: please write all other header fields before this one.
+  if (AppendAdditionalHeaderFields)
     OS.write(0);
 
   // Record the OnDiskHeader Size after each header field either gets written or
   // gets its space reserved.
   uint64_t OnDiskHeaderSize = OS.tell() - StartOffset;
 
-  llvm::errs() << "OnDiskHeaderSize is " << OnDiskHeaderSize << "\n";
-  fflush(stdout);
-  fflush(stderr);
-
   // Reserve space to write profile summary data.
   uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size();
   uint32_t SummarySize = Summary::getSize(Summary::NumKinds, NumEntries);
diff --git a/llvm/test/tools/llvm-profdata/profile-version.test b/llvm/test/tools/llvm-profdata/profile-version.test
index b5405cdec46248..0885a8d5e36cd9 100644
--- a/llvm/test/tools/llvm-profdata/profile-version.test
+++ b/llvm/test/tools/llvm-profdata/profile-version.test
@@ -7,3 +7,4 @@ CHECK: Profile version: 13
 RUN: llvm-profdata merge -o %t.prev.profdata %p/Inputs/basic.proftext --write-prev-version
 RUN: llvm-profdata show --profile-version %t.prev.profdata | FileCheck %s --check-prefix=PREV
 PREV: Profile version: 12
+
diff --git a/llvm/test/tools/llvm-profdata/vtable-value-prof.test b/llvm/test/tools/llvm-profdata/vtable-value-prof.test
index 8c8dfb975ab67c..378c2e11b236ba 100644
--- a/llvm/test/tools/llvm-profdata/vtable-value-prof.test
+++ b/llvm/test/tools/llvm-profdata/vtable-value-prof.test
@@ -1,7 +1,7 @@
 ; RUN: rm -rf %t && mkdir %t && cd %t
 
 ; Generate indexed profiles from text profiles
-RUN: llvm-profdata merge -write-future-version %S/Inputs/vtable-value-prof.proftext -o indexed.profdata
+RUN: llvm-profdata merge %S/Inputs/vtable-value-prof.proftext -o indexed.profdata
 
 ; Show indexed profiles
 RUN: llvm-profdata show --function=main --ic-targets --show-vtables indexed.profdata | FileCheck %s --check-prefix=INDEXED
diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp
index 732f8fd792f8de..f810f1fbf33df5 100644
--- a/llvm/unittests/ProfileData/InstrProfTest.cpp
+++ b/llvm/unittests/ProfileData/InstrProfTest.cpp
@@ -171,6 +171,61 @@ TEST_P(MaybeSparseInstrProfTest, get_function_counts) {
   ASSERT_TRUE(ErrorEquals(instrprof_error::unknown_function, std::move(E2)));
 }
 
+// Test that reader could read compatible future versions even if header has
+// unknown new fields.
+TEST_F(InstrProfTest, read_compatible_future_profiles) {
+  Writer.setProfileVersion(std::numeric_limits<uint32_t>::max() - 1);
+  Writer.setMinCompatibleReaderProfileVersion(
+      IndexedInstrProf::ProfVersion::CurrentVersion);
+  Writer.setAppendAdditionalHeaderFields();
+  Writer.addRecord({"foo", 0x1234, {1, 2}}, Err);
+  Writer.addRecord({"foo", 0x1235, {3, 4}}, Err);
+
+  auto Profile = Writer.writeBuffer();
+
+  auto ReaderOrErr = IndexedInstrProfReader::create(std::move(Profile));
+  EXPECT_THAT_ERROR(ReaderOrErr.takeError(), Succeeded());
+
+  // Sanity check the profile counts are correct.
+  auto ProfReader = std::move(ReaderOrErr.get());
+  std::vector<uint64_t> Counts;
+  EXPECT_THAT_ERROR(ProfReader->getFunctionCounts("foo", 0x1234, Counts),
+                    Succeeded());
+  EXPECT_EQ(Counts.size(), 2U);
+  EXPECT_EQ(Counts[0], 1U);
+  EXPECT_EQ(Counts[1], 2U);
+
+  EXPECT_THAT_ERROR(ProfReader->getFunctionCounts("foo", 0x1235, Counts),
+                    Succeeded());
+  ASSERT_EQ(Counts.size(), 2U);
+  ASSERT_EQ(Counts[0], 3U);
+  ASSERT_EQ(Counts[1], 4U);
+
+  // Reset test-only states for other test cases.
+  Writer.resetTestOnlyStatesForHeaderSection();
+}
+
+// Test that reader returns error when reading incompatible profiles.
+TEST_F(InstrProfTest, error_incompatible_profiles) {
+  Writer.setProfileVersion(std::numeric_limits<uint32_t>::max() - 1);
+  Writer.setMinCompatibleReaderProfileVersion(
+      std::numeric_limits<uint32_t>::max() - 1);
+  Writer.setAppendAdditionalHeaderFields();
+  Writer.addRecord({"foo", 0x1234, {1, 2}}, Err);
+  Writer.addRecord({"foo", 0x1235, {3, 4}}, Err);
+
+  auto Profile = Writer.writeBuffer();
+
+  auto ReaderOrErr = IndexedInstrProfReader::create(std::move(Profile));
+
+  ASSERT_TRUE(
+      ErrorEquals(instrprof_error::unsupported_incompatible_future_version,
+                  ReaderOrErr.takeError()));
+
+  // Reset test-only states for other test cases.
+  Writer.resetTestOnlyStatesForHeaderSection();
+}
+
 // Profile data is copied from general.proftext
 TEST_F(InstrProfTest, get_profile_summary) {
   Writer.addRecord({"func1", 0x1234, {97531}}, Err);

>From 22ca3100974cd66531695c39405b637603c214a5 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Sun, 28 Apr 2024 16:10:31 -0700
Subject: [PATCH 7/7] Record the profile byte size and temporal profile byte
 size, such that reader can locate the end of the profile and the end o of the
 temporal profile payload

---
 llvm/include/llvm/ProfileData/InstrProf.h     | 35 ++++++++++++++-----
 .../llvm/ProfileData/InstrProfWriter.h        |  1 -
 llvm/lib/ProfileData/InstrProf.cpp            | 20 +++++++----
 llvm/lib/ProfileData/InstrProfReader.cpp      | 30 +++++++++++-----
 llvm/lib/ProfileData/InstrProfWriter.cpp      | 21 ++++++++++-
 5 files changed, 82 insertions(+), 25 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 5a821108396a24..bb6344141adc3c 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -1148,13 +1148,26 @@ enum ProfVersion {
   Version11 = 11,
   // VTable profiling,
   Version12 = 12,
-  // Two additional header fields to add partial forward compatibility.
-  // Indexed profile reader compares the latest version it can parse with the
-  // minimum version required by (and recorded in) the profile; if the reader
-  // can reasonably interpret the payload, it proceeds to parse known sections
-  // and skip new unknown sections; otherwise it stops reading and throws error
-  // (in this case users should update compilers and/or command line tools to parse profiles
-  // with newer versions).
+  // Add additional header fields to allow partial forward compatibility.
+  // - Allow reader to locate the end byte of the profile header and each
+  //   payload section.
+  //   - Prior to this version, header records the start byte offset of each
+  //   payload; for some payloads, reader knows the end byte offset as it reads
+  //   (or decodes) the payload (i.e., without explicit header fields).
+  //   - With this change, profile header records the byte size of the profile
+  //   header, and the end byte offset of a payload if explicit recording is
+  //   necessary for reader to locate the end of this payload.
+  //
+  // - Profile reader (in compilers and tools) can detect whether it can parse
+  // known payloads with the correct semantic. It makes use of the compatible
+  // profiles and rejects the incompatbile ones (in this case users should
+  // update compilers and/or command line tools to parse profiles with newer
+  // versions)
+  //  - Indexed profile reader compares the latest version it can parse with
+  //    the  minimum version required by (and recorded in) the profile; if the
+  //    reader  can reasonably interpret the payload, it proceeds to parse known
+  //    sections and skip new unknown sections; otherwise it stops reading and
+  //    throws error.
   Version13 = 13,
   // The current version is 13.
   CurrentVersion = INSTR_PROF_INDEX_VERSION
@@ -1186,7 +1199,7 @@ struct Header {
   uint64_t VTableNamesOffset;
 
   // Records the on-disk byte size of the header.
-  uint64_t OnDiskByteSize = 0;
+  uint64_t OnDiskHeaderByteSize = 0;
 
   // Indexed profile writer will records the minimum profile reader version
   // required to parse this profile. If a profile reader's supported version is
@@ -1204,6 +1217,12 @@ struct Header {
   // read process.
   uint64_t MinimumProfileReaderVersion = 0;
 
+  // The byte size of temporal profiles.
+  uint64_t TemporalProfSectionSize = 0;
+
+  // Records the on-disk byte size of the profiles (header and payloads).
+  uint64_t OnDiskProfileByteSize = 0;
+
   // New fields should only be added at the end to ensure that the size
   // computation is correct. The methods below need to be updated to ensure that
   // the new field is read correctly.
diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h
index 8c0bd9c5ba5fca..c7cc32d39f5260 100644
--- a/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -40,7 +40,6 @@ class InstrProfWriter {
 
 private:
   bool Sparse;
-
   StringMap<ProfilingData> FunctionData;
   /// The maximum length of a single temporal profile trace.
   uint64_t MaxTemporalProfTraceLength;
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index eb1e5bd8d817f5..11e1278f34c1e3 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -1625,9 +1625,13 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
   if (ProfileRecordedProfileVersion >= ProfVersion::Version13) {
     // Starting from version 13, profiles should records header's on-disk byte
     // size and the minimum profile reader version required.
-    H.OnDiskByteSize = read(Buffer, kOnDiskSizeOffset);
+    H.OnDiskHeaderByteSize = read(Buffer, kOnDiskSizeOffset);
     H.MinimumProfileReaderVersion =
         read(Buffer, kOnDiskSizeOffset + sizeof(uint64_t));
+    H.TemporalProfSectionSize =
+        read(Buffer, kOnDiskSizeOffset + 2 * sizeof(uint64_t));
+    H.OnDiskProfileByteSize =
+        read(Buffer, kOnDiskSizeOffset + 3 * sizeof(uint64_t));
   } else {
     // Prior to version 13, the largest version supported by the reader
     // (ProfVersion::CurrentVersion) must be greater than or equal to the
@@ -1671,8 +1675,10 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
         "if not add a case statement to fall through to the latest version.");
   case 13ull:
     // Size field is already read.
-    FieldByteOffset -= sizeof(Header::OnDiskByteSize);
+    FieldByteOffset -= sizeof(Header::OnDiskHeaderByteSize);
+    FieldByteOffset -= sizeof(Header::TemporalProfSectionSize);
     FieldByteOffset -= sizeof(Header::MinimumProfileReaderVersion);
+    FieldByteOffset -= sizeof(Header::OnDiskProfileByteSize);
     [[fallthrough]];
   case 12ull:
     FieldByteOffset -= sizeof(Header::VTableNamesOffset);
@@ -1718,8 +1724,10 @@ Expected<size_t> Header::knownFieldsEndByteOffset() const {
                   "If current version bumps, please update the case below to "
                   "calculate the known field end byte offset.");
   case 13ull:
-    return kOnDiskSizeOffset + sizeof(Header::OnDiskByteSize) +
-           sizeof(Header::MinimumProfileReaderVersion);
+    return kOnDiskSizeOffset + sizeof(Header::OnDiskHeaderByteSize) +
+           sizeof(Header::MinimumProfileReaderVersion) +
+           sizeof(Header::OnDiskProfileByteSize) +
+           sizeof(Header::TemporalProfSectionSize);
   default:
     break;
   }
@@ -1732,10 +1740,10 @@ size_t Header::size() const {
   // Starting from version 13, the indexed profile records the byte size of
   // header.
   if (ProfileRecordedProfileVersion >= ProfVersion::Version13) {
-    assert(OnDiskByteSize != 0 &&
+    assert(OnDiskHeaderByteSize != 0 &&
            "User can call Header::size() only after reading it "
            "from readMemoryBuffer");
-    return OnDiskByteSize;
+    return OnDiskHeaderByteSize;
   }
   switch (ProfileRecordedProfileVersion) {
     assert(ProfileRecordedProfileVersion <= ProfVersion::Version12 &&
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index cefb6af12d0021..ce4f6b89c23319 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -1330,9 +1330,11 @@ Error IndexedInstrProfReader::readHeader() {
   auto IndexPtr = std::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>(
       Start + HashOffset, Cur, Start, HashType, Header->formatVersion());
 
+  const auto ProfileRecordedVersion = GET_VERSION(Header->formatVersion());
+
   // The MemProfOffset field in the header is only valid when the format
   // version is higher than 8 (when it was introduced).
-  if (GET_VERSION(Header->formatVersion()) >= 8 &&
+  if (ProfileRecordedVersion >= 8 &&
       Header->formatVersion() & VARIANT_MASK_MEMPROF) {
     uint64_t MemProfOffset =
         endian::byte_swap<uint64_t, llvm::endianness::little>(
@@ -1341,9 +1343,19 @@ Error IndexedInstrProfReader::readHeader() {
       return E;
   }
 
-  // BinaryIdOffset field in the header is only valid when the format version
-  // is higher than 9 (when it was introduced).
-  if (GET_VERSION(Header->formatVersion()) >= 9) {
+  auto getProfileBufferEnd = [&]() -> const char * {
+    if (ProfileRecordedVersion <= 12)
+      return DataBuffer->getBufferEnd();
+
+    return DataBuffer->getBufferStart() + Header->OnDiskProfileByteSize;
+  };
+
+  const unsigned char *ProfileBufferEnd =
+      (const unsigned char *)getProfileBufferEnd();
+
+  // BinaryIdOffset field in the header is only valid when the format
+  // version is higher than 9 (when it was introduced).
+  if (ProfileRecordedVersion >= 9) {
     uint64_t BinaryIdOffset =
         endian::byte_swap<uint64_t, llvm::endianness::little>(
             Header->BinaryIdOffset);
@@ -1355,12 +1367,12 @@ Error IndexedInstrProfReader::readHeader() {
       return error(instrprof_error::bad_header);
     // Set the binary ids start.
     BinaryIdsStart = Ptr;
-    if (BinaryIdsStart > (const unsigned char *)DataBuffer->getBufferEnd())
+    if (BinaryIdsStart > ProfileBufferEnd)
       return make_error<InstrProfError>(instrprof_error::malformed,
                                         "corrupted binary ids");
   }
 
-  if (GET_VERSION(Header->formatVersion()) >= 12) {
+  if (ProfileRecordedVersion >= 12) {
     uint64_t VTableNamesOffset =
         endian::byte_swap<uint64_t, llvm::endianness::little>(
             Header->VTableNamesOffset);
@@ -1372,17 +1384,17 @@ Error IndexedInstrProfReader::readHeader() {
     // Writer first writes the length of compressed string, and then the actual
     // content.
     VTableNamePtr = (const char *)Ptr;
-    if (VTableNamePtr > (const char *)DataBuffer->getBufferEnd())
+    if (VTableNamePtr > (const char *)ProfileBufferEnd)
       return make_error<InstrProfError>(instrprof_error::truncated);
   }
 
-  if (GET_VERSION(Header->formatVersion()) >= 10 &&
+  if (ProfileRecordedVersion >= 10 &&
       Header->formatVersion() & VARIANT_MASK_TEMPORAL_PROF) {
     uint64_t TemporalProfTracesOffset =
         endian::byte_swap<uint64_t, llvm::endianness::little>(
             Header->TemporalProfTracesOffset);
     const unsigned char *Ptr = Start + TemporalProfTracesOffset;
-    const auto *PtrEnd = (const unsigned char *)DataBuffer->getBufferEnd();
+    const auto *PtrEnd = ProfileBufferEnd;
     // Expect at least two 64 bit fields: NumTraces, and TraceStreamSize
     if (Ptr + 2 * sizeof(uint64_t) > PtrEnd)
       return error(instrprof_error::truncated);
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 422959cabb6407..b9823ab5076453 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -747,15 +747,27 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   uint64_t VTableNamesOffset = OS.tell();
   OS.write(0);
 
-  // Record the offset of 'Header::Size' field.
+  // Record the offset of 'Header::OnDiskHeaderByteSize' field.
   const uint64_t OnDiskHeaderSizeOffset = OS.tell();
 
+  uint64_t OnDiskProfileSizeOffset = 0, TemporalProfSizeOffset = 0;
+
   if (!WritePrevVersion) {
     // Reserve the space for `OnDiskByteSize` to allow back patching later.
     OS.write(0);
 
     // Write the minimum compatible version required.
     OS.write(minProfileReaderVersion());
+
+    // Record the offset of `Header::TemporalProfSizeOffset` field, and reserve
+    // the space for this field to allow back patching.
+    TemporalProfSizeOffset = OS.tell();
+    OS.write(0);
+
+    // Record the offset of `Header::OnDiskProfileByteSize` field, and reserve
+    // the space for this field to allow back patching.
+    OnDiskProfileSizeOffset = OS.tell();
+    OS.write(0);
   }
 
   // This is a test-only path to append dummy header fields.
@@ -863,6 +875,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
     OS.writeByte(0);
 
   uint64_t TemporalProfTracesSectionStart = 0;
+  uint64_t TemporalProfSize = 0;
   if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile)) {
     TemporalProfTracesSectionStart = OS.tell();
     OS.write(TemporalProfTraces.size());
@@ -873,8 +886,12 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
       for (auto &NameRef : Trace.FunctionNameRefs)
         OS.write(NameRef);
     }
+
+    TemporalProfSize = OS.tell() - TemporalProfTracesSectionStart;
   }
 
+  uint64_t OnDiskProfileByteSize = OS.tell() - StartOffset;
+
   // Allocate space for data to be serialized out.
   std::unique_ptr<IndexedInstrProf::Summary> TheSummary =
       IndexedInstrProf::allocSummary(SummarySize);
@@ -908,6 +925,8 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
         {TemporalProfTracesOffset, &TemporalProfTracesSectionStart, 1},
         {VTableNamesOffset, &VTableNamesSectionStart, 1},
         {OnDiskHeaderSizeOffset, &OnDiskHeaderSize, 1},
+        {TemporalProfSizeOffset, &TemporalProfSize, 1},
+        {OnDiskProfileSizeOffset, &OnDiskProfileByteSize, 1},
         // Patch the summary data.
         {SummaryOffset, reinterpret_cast<uint64_t *>(TheSummary.get()),
          (int)(SummarySize / sizeof(uint64_t))},



More information about the llvm-commits mailing list