[llvm] [nfc][InstrProf]Simplify instrumentation profile reader and writer code (PR #93346)

Mingming Liu via llvm-commits llvm-commits at lists.llvm.org
Tue May 28 15:13:13 PDT 2024


https://github.com/minglotus-6 updated https://github.com/llvm/llvm-project/pull/93346

>From 61b98217b25f1c414615382d1f5c5e73f9214ab9 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Fri, 24 May 2024 11:00:08 -0700
Subject: [PATCH 1/3] [nfc][InstrProf]Simplify instrumentation profile reader
 and writer code

Changes include:
1. In InstrProfReader.cpp, compute ProfileVersion once, rather than
   repeatedly use macro `GET_VERSION(H.Version)`.
2. In InstrProfWriter.cpp, store back-patched header fields in a vector
   and patch once.
3. In InstrProf.cpp, remove offsetOf and use a helper function
   read_and_decrement.

llvm/Support/Endian.h has `readNext` method which increments the
buffer. For `Header::readFromBuffer`, switch statements allow
code sharing by fall-through, and it's not very straightforward to
use 'readNext'.
---
 llvm/lib/ProfileData/InstrProf.cpp       | 45 ++++++++---------
 llvm/lib/ProfileData/InstrProfReader.cpp | 11 ++--
 llvm/lib/ProfileData/InstrProfWriter.cpp | 64 +++++++-----------------
 3 files changed, 45 insertions(+), 75 deletions(-)

diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index f9cd71b37002f..2eaffe84f10c7 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -1627,13 +1627,6 @@ void OverlapStats::dump(raw_fd_ostream &OS) const {
 }
 
 namespace IndexedInstrProf {
-// A C++14 compatible version of the offsetof macro.
-template <typename T1, typename T2>
-inline size_t constexpr offsetOf(T1 T2::*Member) {
-  constexpr T2 Object{};
-  return size_t(&(Object.*Member)) - size_t(&Object);
-}
-
 // Read a uint64_t from the specified buffer offset, and swap the bytes in
 // native endianness if necessary.
 static inline uint64_t read(const unsigned char *Buffer, size_t Offset) {
@@ -1642,6 +1635,14 @@ static inline uint64_t read(const unsigned char *Buffer, size_t Offset) {
                                                                      Offset);
 }
 
+// Read a uint64_t from the specified `Buffer`, and points `Buffer` to the
+// address of previous uint64_t.
+static inline uint64_t read_and_decrement(const unsigned char *&Buffer) {
+  uint64_t Val = read(Buffer, 0);
+  Buffer -= sizeof(uint64_t);
+  return Val;
+}
+
 Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
   using namespace support;
   static_assert(std::is_standard_layout_v<Header>,
@@ -1649,16 +1650,17 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
                 "of fields to read.");
   Header H;
 
-  H.Magic = read(Buffer, offsetOf(&Header::Magic));
+  H.Magic = read(Buffer, 0);
   // Check the magic number.
   if (H.Magic != IndexedInstrProf::Magic)
     return make_error<InstrProfError>(instrprof_error::bad_magic);
 
   // Read the version.
-  H.Version = read(Buffer, offsetOf(&Header::Version));
+  H.Version = read(Buffer, 8);
   if (GET_VERSION(H.Version) > IndexedInstrProf::ProfVersion::CurrentVersion)
     return make_error<InstrProfError>(instrprof_error::unsupported_version);
 
+  const unsigned char *NextHeader = Buffer + H.size() - sizeof(uint64_t);
   switch (GET_VERSION(H.Version)) {
     // When a new field is added in the header add a case statement here to
     // populate it.
@@ -1667,23 +1669,22 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
         "Please update the reading code below if a new field has been added, "
         "if not add a case statement to fall through to the latest version.");
   case 12ull:
-    H.VTableNamesOffset = read(Buffer, offsetOf(&Header::VTableNamesOffset));
+    H.VTableNamesOffset = read_and_decrement(NextHeader);
     [[fallthrough]];
   case 11ull:
     [[fallthrough]];
   case 10ull:
-    H.TemporalProfTracesOffset =
-        read(Buffer, offsetOf(&Header::TemporalProfTracesOffset));
+    H.TemporalProfTracesOffset = read_and_decrement(NextHeader);
     [[fallthrough]];
   case 9ull:
-    H.BinaryIdOffset = read(Buffer, offsetOf(&Header::BinaryIdOffset));
+    H.BinaryIdOffset = read_and_decrement(NextHeader);
     [[fallthrough]];
   case 8ull:
-    H.MemProfOffset = read(Buffer, offsetOf(&Header::MemProfOffset));
+    H.MemProfOffset = read_and_decrement(NextHeader);
     [[fallthrough]];
   default: // Version7 (when the backwards compatible header was introduced).
-    H.HashType = read(Buffer, offsetOf(&Header::HashType));
-    H.HashOffset = read(Buffer, offsetOf(&Header::HashOffset));
+    H.HashOffset = read_and_decrement(NextHeader);
+    H.HashType = read_and_decrement(NextHeader);
   }
 
   return H;
@@ -1699,19 +1700,17 @@ size_t Header::size() const {
                   "been added to the header, if not add a case statement to "
                   "fall through to the latest version.");
   case 12ull:
-    return offsetOf(&Header::VTableNamesOffset) +
-           sizeof(Header::VTableNamesOffset);
+    return 72;
   case 11ull:
     [[fallthrough]];
   case 10ull:
-    return offsetOf(&Header::TemporalProfTracesOffset) +
-           sizeof(Header::TemporalProfTracesOffset);
+    return 64;
   case 9ull:
-    return offsetOf(&Header::BinaryIdOffset) + sizeof(Header::BinaryIdOffset);
+    return 56;
   case 8ull:
-    return offsetOf(&Header::MemProfOffset) + sizeof(Header::MemProfOffset);
+    return 48;
   default: // Version7 (when the backwards compatible header was introduced).
-    return offsetOf(&Header::HashOffset) + sizeof(Header::HashOffset);
+    return 40;
   }
 }
 
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index 836206a4fd86e..af603285343f8 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -1325,17 +1325,17 @@ Error IndexedInstrProfReader::readHeader() {
   auto IndexPtr = std::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>(
       Start + Header->HashOffset, Cur, Start, HashType, Header->Version);
 
+  const uint64_t ProfileVersion = GET_VERSION(Header->Version);
   // The MemProfOffset field in the header is only valid when the format
   // version is higher than 8 (when it was introduced).
-  if (GET_VERSION(Header->Version) >= 8 &&
-      Header->Version & VARIANT_MASK_MEMPROF) {
+  if (ProfileVersion >= 8 && Header->Version & VARIANT_MASK_MEMPROF) {
     if (Error E = MemProfReader.deserialize(Start, Header->MemProfOffset))
       return E;
   }
 
   // BinaryIdOffset field in the header is only valid when the format version
   // is higher than 9 (when it was introduced).
-  if (GET_VERSION(Header->Version) >= 9) {
+  if (ProfileVersion >= 9) {
     const unsigned char *Ptr = Start + Header->BinaryIdOffset;
     // Read binary ids size.
     BinaryIdsSize =
@@ -1349,7 +1349,7 @@ Error IndexedInstrProfReader::readHeader() {
                                         "corrupted binary ids");
   }
 
-  if (GET_VERSION(Header->Version) >= 12) {
+  if (ProfileVersion >= 12) {
     const unsigned char *Ptr = Start + Header->VTableNamesOffset;
 
     CompressedVTableNamesLen =
@@ -1362,8 +1362,7 @@ Error IndexedInstrProfReader::readHeader() {
       return make_error<InstrProfError>(instrprof_error::truncated);
   }
 
-  if (GET_VERSION(Header->Version) >= 10 &&
-      Header->Version & VARIANT_MASK_TEMPORAL_PROF) {
+  if (ProfileVersion >= 10 && Header->Version & VARIANT_MASK_TEMPORAL_PROF) {
     const unsigned char *Ptr = Start + Header->TemporalProfTracesOffset;
     const auto *PtrEnd = (const unsigned char *)DataBuffer->getBufferEnd();
     // Expect at least two 64 bit fields: NumTraces, and TraceStreamSize
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index b67a9700b680a..f72df98444007 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -841,52 +841,24 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   }
   InfoObj->CSSummaryBuilder = nullptr;
 
-  const size_t MemProfOffset = BackPatchStartOffset + sizeof(uint64_t);
-  const size_t BinaryIdOffset = MemProfOffset + sizeof(uint64_t);
-  const size_t TemporalProfTracesOffset = BinaryIdOffset + sizeof(uint64_t);
-  const size_t VTableNamesOffset = TemporalProfTracesOffset + sizeof(uint64_t);
-  if (!WritePrevVersion) {
-    // Now do the final patch:
-    PatchItem PatchItems[] = {
-        // Patch the Header.HashOffset field.
-        {BackPatchStartOffset, &HashTableStart, 1},
-        // Patch the Header.MemProfOffset (=0 for profiles without MemProf
-        // data).
-        {MemProfOffset, &MemProfSectionStart, 1},
-        // Patch the Header.BinaryIdSectionOffset.
-        {BinaryIdOffset, &BinaryIdSectionStart, 1},
-        // Patch the Header.TemporalProfTracesOffset (=0 for profiles without
-        // traces).
-        {TemporalProfTracesOffset, &TemporalProfTracesSectionStart, 1},
-        {VTableNamesOffset, &VTableNamesSectionStart, 1},
-        // Patch the summary data.
-        {SummaryOffset, reinterpret_cast<uint64_t *>(TheSummary.get()),
-         (int)(SummarySize / sizeof(uint64_t))},
-        {CSSummaryOffset, reinterpret_cast<uint64_t *>(TheCSSummary.get()),
-         (int)CSSummarySize}};
-
-    OS.patch(PatchItems);
-  } else {
-    // Now do the final patch:
-    PatchItem PatchItems[] = {
-        // Patch the Header.HashOffset field.
-        {BackPatchStartOffset, &HashTableStart, 1},
-        // Patch the Header.MemProfOffset (=0 for profiles without MemProf
-        // data).
-        {MemProfOffset, &MemProfSectionStart, 1},
-        // Patch the Header.BinaryIdSectionOffset.
-        {BinaryIdOffset, &BinaryIdSectionStart, 1},
-        // Patch the Header.TemporalProfTracesOffset (=0 for profiles without
-        // traces).
-        {TemporalProfTracesOffset, &TemporalProfTracesSectionStart, 1},
-        // Patch the summary data.
-        {SummaryOffset, reinterpret_cast<uint64_t *>(TheSummary.get()),
-         (int)(SummarySize / sizeof(uint64_t))},
-        {CSSummaryOffset, reinterpret_cast<uint64_t *>(TheCSSummary.get()),
-         (int)CSSummarySize}};
-
-    OS.patch(PatchItems);
-  }
+  SmallVector<uint64_t, 8> HeaderOffsets = {HashTableStart, MemProfSectionStart,
+                                            BinaryIdSectionStart,
+                                            TemporalProfTracesSectionStart};
+
+  if (!WritePrevVersion)
+    HeaderOffsets.push_back(VTableNamesSectionStart);
+
+  // Now do the final patch.
+  PatchItem PatchItems[] = {
+      // Patch the Header fields
+      {BackPatchStartOffset, HeaderOffsets.data(), HeaderOffsets.size()},
+      // Patch the summary data.
+      {SummaryOffset, reinterpret_cast<uint64_t *>(TheSummary.get()),
+       (int)(SummarySize / sizeof(uint64_t))},
+      {CSSummaryOffset, reinterpret_cast<uint64_t *>(TheCSSummary.get()),
+       (int)CSSummarySize}};
+
+  OS.patch(PatchItems);
 
   for (const auto &I : FunctionData)
     for (const auto &F : I.getValue())

>From bb60ded148dcacf55f313fb3a28a2293d0a67381 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Fri, 24 May 2024 13:53:04 -0700
Subject: [PATCH 2/3] cast size_t to int to fix compile error in
 https://buildkite.com/llvm-project/github-pull-requests/builds/67098#018fac5e-811d-46e4-8ba4-62c8b57f28e3

---
 llvm/lib/ProfileData/InstrProfWriter.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index f72df98444007..93eb0043bf32e 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -851,7 +851,7 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   // Now do the final patch.
   PatchItem PatchItems[] = {
       // Patch the Header fields
-      {BackPatchStartOffset, HeaderOffsets.data(), HeaderOffsets.size()},
+      {BackPatchStartOffset, HeaderOffsets.data(), (int)HeaderOffsets.size()},
       // Patch the summary data.
       {SummaryOffset, reinterpret_cast<uint64_t *>(TheSummary.get()),
        (int)(SummarySize / sizeof(uint64_t))},

>From 3852e32dc04094d1481ba42f875a26bbd5efaa71 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Tue, 28 May 2024 15:12:54 -0700
Subject: [PATCH 3/3] Read the buffer in the forward direction and remove
 'offset_of'

---
 llvm/lib/ProfileData/InstrProf.cpp | 45 +++++++++++++++---------------
 1 file changed, 23 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 2eaffe84f10c7..f9cd71b37002f 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -1627,6 +1627,13 @@ void OverlapStats::dump(raw_fd_ostream &OS) const {
 }
 
 namespace IndexedInstrProf {
+// A C++14 compatible version of the offsetof macro.
+template <typename T1, typename T2>
+inline size_t constexpr offsetOf(T1 T2::*Member) {
+  constexpr T2 Object{};
+  return size_t(&(Object.*Member)) - size_t(&Object);
+}
+
 // Read a uint64_t from the specified buffer offset, and swap the bytes in
 // native endianness if necessary.
 static inline uint64_t read(const unsigned char *Buffer, size_t Offset) {
@@ -1635,14 +1642,6 @@ static inline uint64_t read(const unsigned char *Buffer, size_t Offset) {
                                                                      Offset);
 }
 
-// Read a uint64_t from the specified `Buffer`, and points `Buffer` to the
-// address of previous uint64_t.
-static inline uint64_t read_and_decrement(const unsigned char *&Buffer) {
-  uint64_t Val = read(Buffer, 0);
-  Buffer -= sizeof(uint64_t);
-  return Val;
-}
-
 Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
   using namespace support;
   static_assert(std::is_standard_layout_v<Header>,
@@ -1650,17 +1649,16 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
                 "of fields to read.");
   Header H;
 
-  H.Magic = read(Buffer, 0);
+  H.Magic = read(Buffer, offsetOf(&Header::Magic));
   // Check the magic number.
   if (H.Magic != IndexedInstrProf::Magic)
     return make_error<InstrProfError>(instrprof_error::bad_magic);
 
   // Read the version.
-  H.Version = read(Buffer, 8);
+  H.Version = read(Buffer, offsetOf(&Header::Version));
   if (GET_VERSION(H.Version) > IndexedInstrProf::ProfVersion::CurrentVersion)
     return make_error<InstrProfError>(instrprof_error::unsupported_version);
 
-  const unsigned char *NextHeader = Buffer + H.size() - sizeof(uint64_t);
   switch (GET_VERSION(H.Version)) {
     // When a new field is added in the header add a case statement here to
     // populate it.
@@ -1669,22 +1667,23 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
         "Please update the reading code below if a new field has been added, "
         "if not add a case statement to fall through to the latest version.");
   case 12ull:
-    H.VTableNamesOffset = read_and_decrement(NextHeader);
+    H.VTableNamesOffset = read(Buffer, offsetOf(&Header::VTableNamesOffset));
     [[fallthrough]];
   case 11ull:
     [[fallthrough]];
   case 10ull:
-    H.TemporalProfTracesOffset = read_and_decrement(NextHeader);
+    H.TemporalProfTracesOffset =
+        read(Buffer, offsetOf(&Header::TemporalProfTracesOffset));
     [[fallthrough]];
   case 9ull:
-    H.BinaryIdOffset = read_and_decrement(NextHeader);
+    H.BinaryIdOffset = read(Buffer, offsetOf(&Header::BinaryIdOffset));
     [[fallthrough]];
   case 8ull:
-    H.MemProfOffset = read_and_decrement(NextHeader);
+    H.MemProfOffset = read(Buffer, offsetOf(&Header::MemProfOffset));
     [[fallthrough]];
   default: // Version7 (when the backwards compatible header was introduced).
-    H.HashOffset = read_and_decrement(NextHeader);
-    H.HashType = read_and_decrement(NextHeader);
+    H.HashType = read(Buffer, offsetOf(&Header::HashType));
+    H.HashOffset = read(Buffer, offsetOf(&Header::HashOffset));
   }
 
   return H;
@@ -1700,17 +1699,19 @@ size_t Header::size() const {
                   "been added to the header, if not add a case statement to "
                   "fall through to the latest version.");
   case 12ull:
-    return 72;
+    return offsetOf(&Header::VTableNamesOffset) +
+           sizeof(Header::VTableNamesOffset);
   case 11ull:
     [[fallthrough]];
   case 10ull:
-    return 64;
+    return offsetOf(&Header::TemporalProfTracesOffset) +
+           sizeof(Header::TemporalProfTracesOffset);
   case 9ull:
-    return 56;
+    return offsetOf(&Header::BinaryIdOffset) + sizeof(Header::BinaryIdOffset);
   case 8ull:
-    return 48;
+    return offsetOf(&Header::MemProfOffset) + sizeof(Header::MemProfOffset);
   default: // Version7 (when the backwards compatible header was introduced).
-    return 40;
+    return offsetOf(&Header::HashOffset) + sizeof(Header::HashOffset);
   }
 }
 



More information about the llvm-commits mailing list