[compiler-rt] 1ae7d83 - [profile] Add binary ids into indexed profiles

Gulfem Savrun Yeniceri via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 29 10:48:49 PST 2022


Author: Gulfem Savrun Yeniceri
Date: 2022-12-29T18:46:56Z
New Revision: 1ae7d83803e45f6053ec6a606f259653846926b8

URL: https://github.com/llvm/llvm-project/commit/1ae7d83803e45f6053ec6a606f259653846926b8
DIFF: https://github.com/llvm/llvm-project/commit/1ae7d83803e45f6053ec6a606f259653846926b8.diff

LOG: [profile] Add binary ids into indexed profiles

This patch adds support for including binary ids in an indexed profile.
It adds a new field into the header that points to the offset of the
binary id section. The binary id section consists of a size of the
section, and a list of binary ids (if they are present) that consist
of two parts: length and data.

This patch guarantees that indexed profile is backwards compatible
after adding binary ids.

Differential Revision: https://reviews.llvm.org/D135929

Added: 
    

Modified: 
    compiler-rt/include/profile/InstrProfData.inc
    compiler-rt/test/profile/Linux/binary-id.c
    compiler-rt/test/profile/Linux/counter_promo_for.c
    compiler-rt/test/profile/Linux/counter_promo_nest.c
    compiler-rt/test/profile/Linux/counter_promo_while.c
    compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c
    llvm/include/llvm/ProfileData/InstrProf.h
    llvm/include/llvm/ProfileData/InstrProfData.inc
    llvm/include/llvm/ProfileData/InstrProfReader.h
    llvm/include/llvm/ProfileData/InstrProfWriter.h
    llvm/lib/ProfileData/InstrProf.cpp
    llvm/lib/ProfileData/InstrProfReader.cpp
    llvm/lib/ProfileData/InstrProfWriter.cpp
    llvm/tools/llvm-profdata/llvm-profdata.cpp

Removed: 
    


################################################################################
diff  --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc
index 282620d8b5dc0..05419bf01f52f 100644
--- a/compiler-rt/include/profile/InstrProfData.inc
+++ b/compiler-rt/include/profile/InstrProfData.inc
@@ -650,7 +650,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
 /* Raw profile format version (start from 1). */
 #define INSTR_PROF_RAW_VERSION 8
 /* Indexed profile format version (start from 1). */
-#define INSTR_PROF_INDEX_VERSION 8
+#define INSTR_PROF_INDEX_VERSION 9
 /* Coverage mapping format version (start from 0). */
 #define INSTR_PROF_COVMAP_VERSION 5
 

diff  --git a/compiler-rt/test/profile/Linux/binary-id.c b/compiler-rt/test/profile/Linux/binary-id.c
index 04f54b57a0757..61b8ed9268496 100644
--- a/compiler-rt/test/profile/Linux/binary-id.c
+++ b/compiler-rt/test/profile/Linux/binary-id.c
@@ -1,13 +1,13 @@
 // REQUIRES: linux
 // RUN: %clang_profgen -Wl,--build-id=none -O2 -o %t %s
 // RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t
-// RUN: llvm-profdata show --binary-ids  %t.profraw > %t.out
+// RUN: llvm-profdata show --binary-ids %t.profraw > %t.out
 // RUN: FileCheck %s --check-prefix=NO-BINARY-ID < %t.out
 // RUN: llvm-profdata merge -o %t.profdata %t.profraw
 
 // RUN: %clang_profgen -Wl,--build-id -O2 -o %t %s
 // RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t
-// RUN: llvm-profdata show --binary-ids  %t.profraw > %t.profraw.out
+// RUN: llvm-profdata show --binary-ids %t.profraw > %t.profraw.out
 // RUN: FileCheck %s --check-prefix=BINARY-ID-RAW-PROF < %t.profraw.out
 
 // RUN: rm -rf %t.profdir
@@ -17,6 +17,10 @@
 // RUN: llvm-profdata show --binary-ids  %t.profdir/default_*.profraw > %t.profraw.out
 // RUN: FileCheck %s --check-prefix=BINARY-ID-MERGE-PROF < %t.profraw.out
 
+// RUN: llvm-profdata merge -o %t.profdata %t.profraw %t.profraw
+// RUN: llvm-profdata show --binary-ids %t.profdata > %t.profdata.out
+// RUN: FileCheck %s --check-prefix=BINARY-ID-INDEXED-PROF < %t.profraw.out
+
 void foo() {
 }
 
@@ -48,3 +52,10 @@ int main() {
 // BINARY-ID-MERGE-PROF-NEXT: Maximum internal block count: 0
 // BINARY-ID-MERGE-PROF-NEXT: Binary IDs:
 // BINARY-ID-MERGE-PROF-NEXT: {{[0-9a-f]+}}
+
+// BINARY-ID-INDEXED-PROF: Instrumentation level: Front-end
+// BINARY-ID-INDEXED-PROF-NEXT: Total functions: 3
+// BINARY-ID-INDEXED-PROF-NEXT: Maximum function count: 3
+// BINARY-ID-INDEXED-PROF-NEXT: Maximum internal block count: 0
+// BINARY-ID-INDEXED-PROF-NEXT: Binary IDs:
+// BINARY-ID-INDEXED-PROF-NEXT: {{[0-9a-f]+}}

diff  --git a/compiler-rt/test/profile/Linux/counter_promo_for.c b/compiler-rt/test/profile/Linux/counter_promo_for.c
index 8699605855588..ffb107a7027b5 100644
--- a/compiler-rt/test/profile/Linux/counter_promo_for.c
+++ b/compiler-rt/test/profile/Linux/counter_promo_for.c
@@ -12,7 +12,7 @@
 // RUN: %run %t.nopromo.gen
 // RUN: llvm-profdata merge -o %t.nopromo.profdata %t.nopromo.prof/
 // RUN: llvm-profdata show --counts --all-functions %t.nopromo.profdata  > %t.nopromo.dump
-// RUN: 
diff  %t.promo.profdata %t.nopromo.profdata
+// RUN: 
diff  <(llvm-profdata show %t.promo.profdata) <(llvm-profdata show %t.nopromo.profdata)
 
 int g;
 __attribute__((noinline)) void bar(int i) { g += i; }

diff  --git a/compiler-rt/test/profile/Linux/counter_promo_nest.c b/compiler-rt/test/profile/Linux/counter_promo_nest.c
index ebd52dda7f48b..ac32d16d706ba 100644
--- a/compiler-rt/test/profile/Linux/counter_promo_nest.c
+++ b/compiler-rt/test/profile/Linux/counter_promo_nest.c
@@ -10,7 +10,7 @@
 // RUN: %run %t.nopromo.gen
 // RUN: llvm-profdata merge -o %t.nopromo.profdata %t.nopromo.prof/
 // RUN: llvm-profdata show --counts --all-functions %t.nopromo.profdata  > %t.nopromo.dump
-// RUN: 
diff  %t.promo.profdata %t.nopromo.profdata
+// RUN: 
diff  <(llvm-profdata show %t.promo.profdata) <(llvm-profdata show %t.nopromo.profdata)
 int g;
 __attribute__((noinline)) void bar() {
  g++;

diff  --git a/compiler-rt/test/profile/Linux/counter_promo_while.c b/compiler-rt/test/profile/Linux/counter_promo_while.c
index 8a186f49f9f8e..3fb6561777095 100644
--- a/compiler-rt/test/profile/Linux/counter_promo_while.c
+++ b/compiler-rt/test/profile/Linux/counter_promo_while.c
@@ -12,7 +12,7 @@
 // RUN: %run %t.nopromo.gen
 // RUN: llvm-profdata merge -o %t.nopromo.profdata %t.nopromo.prof/
 // RUN: llvm-profdata show --counts --all-functions %t.nopromo.profdata  > %t.nopromo.dump
-// RUN: 
diff  %t.promo.profdata %t.nopromo.profdata
+// RUN: 
diff  <(llvm-profdata show %t.promo.profdata) <(llvm-profdata show %t.nopromo.profdata)
 int g;
 __attribute__((noinline)) void bar(int i) { g += i; }
 __attribute__((noinline)) void foo(int n, int N) {

diff  --git a/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c b/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c
index bd50234f7ad10..e40d1bee692a5 100644
--- a/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c
+++ b/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c
@@ -7,13 +7,13 @@
 // RUN: env LLVM_PROFILE_FILE=%t.d4.proflite %run %t.d4
 // RUN: llvm-profdata merge -o %t.d4.profdata --debug-info=%t.d4 %t.d4.proflite
 
-// RUN: 
diff  %t.normal.profdata %t.d4.profdata
+// RUN: 
diff  <(llvm-profdata show %t.normal.profdata) <(llvm-profdata show %t.d4.profdata)
 
 // RUN: %clang_pgogen -o %t -g -mllvm --debug-info-correlate -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp
 // RUN: env LLVM_PROFILE_FILE=%t.proflite %run %t
 // RUN: llvm-profdata merge -o %t.profdata --debug-info=%t %t.proflite
 
-// RUN: 
diff  %t.normal.profdata %t.profdata
+// RUN: 
diff  <(llvm-profdata show %t.normal.profdata) <(llvm-profdata show %t.profdata)
 
 // RUN: %clang_pgogen -o %t.cov -g -mllvm --debug-info-correlate -mllvm -pgo-function-entry-coverage -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp
 // RUN: env LLVM_PROFILE_FILE=%t.cov.proflite %run %t.cov
@@ -23,4 +23,4 @@
 // RUN: env LLVM_PROFILE_FILE=%t.cov.profraw %run %t.cov.normal
 // RUN: llvm-profdata merge -o %t.cov.normal.profdata %t.cov.profraw
 
-// RUN: 
diff  %t.cov.normal.profdata %t.cov.profdata
+// RUN: 
diff  <(llvm-profdata show %t.cov.normal.profdata) <(llvm-profdata show %t.cov.profdata)

diff  --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 0d788a40f5339..eba7885e83b7a 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -1050,7 +1050,9 @@ enum ProfVersion {
   Version7 = 7,
   // An additional (optional) memory profile type is added.
   Version8 = 8,
-  // The current version is 8.
+  // Binary ids are added.
+  Version9 = 9,
+  // The current version is 9.
   CurrentVersion = INSTR_PROF_INDEX_VERSION
 };
 const uint64_t Version = ProfVersion::CurrentVersion;
@@ -1068,6 +1070,7 @@ struct Header {
   uint64_t HashType;
   uint64_t HashOffset;
   uint64_t MemProfOffset;
+  uint64_t BinaryIdOffset;
   // New fields should only be added at the end to ensure that the size
   // computation is correct. The methods below need to be updated to ensure that
   // the new field is read correctly.

diff  --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc
index 282620d8b5dc0..05419bf01f52f 100644
--- a/llvm/include/llvm/ProfileData/InstrProfData.inc
+++ b/llvm/include/llvm/ProfileData/InstrProfData.inc
@@ -650,7 +650,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
 /* Raw profile format version (start from 1). */
 #define INSTR_PROF_RAW_VERSION 8
 /* Indexed profile format version (start from 1). */
-#define INSTR_PROF_INDEX_VERSION 8
+#define INSTR_PROF_INDEX_VERSION 9
 /* Coverage mapping format version (start from 0). */
 #define INSTR_PROF_COVMAP_VERSION 5
 

diff  --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index ad7ced931c6ea..8beba947c8630 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -17,12 +17,14 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/ProfileSummary.h"
+#include "llvm/Object/BuildID.h"
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/ProfileData/InstrProfCorrelator.h"
 #include "llvm/ProfileData/MemProf.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/OnDiskHashTable.h"
 #include "llvm/Support/SwapByteOrder.h"
@@ -96,7 +98,12 @@ class InstrProfReader {
   /// Read a single record.
   virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0;
 
-  /// Print binary ids on stream OS.
+  /// Read a list of binary ids.
+  virtual Error readBinaryIds(std::vector<llvm::object::BuildID> &BinaryIds) {
+    return success();
+  }
+
+  /// Print binary ids.
   virtual Error printBinaryIds(raw_ostream &OS) { return success(); };
 
   /// Iterator over profile data.
@@ -295,7 +302,9 @@ class RawInstrProfReader : public InstrProfReader {
   uint32_t ValueKindLast;
   uint32_t CurValueDataSize;
 
-  uint64_t BinaryIdsSize;
+  /// Total size of binary ids.
+  uint64_t BinaryIdsSize{0};
+  /// Start address of binary id length and data pairs.
   const uint8_t *BinaryIdsStart;
 
 public:
@@ -310,6 +319,7 @@ class RawInstrProfReader : public InstrProfReader {
   static bool hasFormat(const MemoryBuffer &DataBuffer);
   Error readHeader() override;
   Error readNextRecord(NamedInstrProfRecord &Record) override;
+  Error readBinaryIds(std::vector<llvm::object::BuildID> &BinaryIds) override;
   Error printBinaryIds(raw_ostream &OS) override;
 
   uint64_t getVersion() const override { return Version; }
@@ -596,6 +606,10 @@ class IndexedInstrProfReader : public InstrProfReader {
   std::unique_ptr<MemProfRecordHashTable> MemProfRecordTable;
   /// MemProf frame profile data on-disk indexed via frame id.
   std::unique_ptr<MemProfFrameHashTable> MemProfFrameTable;
+  /// Total size of binary ids.
+  uint64_t BinaryIdsSize{0};
+  /// Start address of binary id length and data pairs.
+  const uint8_t *BinaryIdsStart;
 
   // Index to the current record in the record array.
   unsigned RecordIndex;
@@ -706,6 +720,9 @@ class IndexedInstrProfReader : public InstrProfReader {
       return *Summary;
     }
   }
+
+  Error readBinaryIds(std::vector<llvm::object::BuildID> &BinaryIds) override;
+  Error printBinaryIds(raw_ostream &OS) override;
 };
 
 } // end namespace llvm

diff  --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h
index 29e07961a2f43..087f22996657f 100644
--- a/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/IR/GlobalValue.h"
+#include "llvm/Object/BuildID.h"
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/ProfileData/MemProf.h"
 #include "llvm/Support/Endian.h"
@@ -50,6 +51,9 @@ class InstrProfWriter {
   // inline.
   llvm::MapVector<memprof::FrameId, memprof::Frame> MemProfFrameData;
 
+  // List of binary ids.
+  std::vector<llvm::object::BuildID> BinaryIds;
+
   // An enum describing the attributes of the profile.
   InstrProfKind ProfileKind = InstrProfKind::Unknown;
   // Use raw pointer here for the incomplete type object.
@@ -79,6 +83,9 @@ class InstrProfWriter {
   bool addMemProfFrame(const memprof::FrameId, const memprof::Frame &F,
                        function_ref<void(Error)> Warn);
 
+  // Add a binary id to the binary ids list.
+  void addBinaryIds(ArrayRef<llvm::object::BuildID> BIs);
+
   /// Merge existing function counts from the given writer.
   void mergeRecordsFromWriter(InstrProfWriter &&IPW,
                               function_ref<void(Error)> Warn);

diff  --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index fec25a84423f7..7f415dfd0a6f7 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -1372,9 +1372,12 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
     // When a new field is added in the header add a case statement here to
     // populate it.
     static_assert(
-        IndexedInstrProf::ProfVersion::CurrentVersion == Version8,
+        IndexedInstrProf::ProfVersion::CurrentVersion == Version9,
         "Please update the reading code below if a new field has been added, "
         "if not add a case statement to fall through to the latest version.");
+  case 9ull:
+    H.BinaryIdOffset = read(Buffer, offsetOf(&Header::BinaryIdOffset));
+    [[fallthrough]];
   case 8ull:
     H.MemProfOffset = read(Buffer, offsetOf(&Header::MemProfOffset));
     [[fallthrough]];
@@ -1391,10 +1394,12 @@ size_t Header::size() const {
     // When a new field is added to the header add a case statement here to
     // compute the size as offset of the new field + size of the new field. This
     // relies on the field being added to the end of the list.
-    static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version8,
+    static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version9,
                   "Please update the size computation below if a new field has "
                   "been added to the header, if not add a case statement to "
                   "fall through to the latest version.");
+  case 9ull:
+    return offsetOf(&Header::BinaryIdOffset) + sizeof(Header::BinaryIdOffset);
   case 8ull:
     return offsetOf(&Header::MemProfOffset) + sizeof(Header::MemProfOffset);
   default: // Version7 (when the backwards compatible header was introduced).

diff  --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index 26d23aefd71dd..4dfc3bab932db 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -75,6 +75,91 @@ static Error initializeReader(InstrProfReader &Reader) {
   return Reader.readHeader();
 }
 
+/// Read a list of binary ids from a profile that consist of
+/// a. uint64_t binary id length
+/// b. uint8_t  binary id data
+/// c. uint8_t  padding (if necessary)
+/// This function is shared between raw and indexed profiles.
+/// Raw profiles are in host-endian format, and indexed profiles are in
+/// little-endian format. So, this function takes an argument indicating the
+/// associated endian format to read the binary ids correctly.
+static Error
+readBinaryIdsInternal(const MemoryBuffer &DataBuffer,
+                      const uint64_t BinaryIdsSize,
+                      const uint8_t *BinaryIdsStart,
+                      std::vector<llvm::object::BuildID> &BinaryIds,
+                      const llvm::support::endianness Endian) {
+  using namespace support;
+
+  if (BinaryIdsSize == 0)
+    return Error::success();
+
+  const uint8_t *BI = BinaryIdsStart;
+  const uint8_t *BIEnd = BinaryIdsStart + BinaryIdsSize;
+  const uint8_t *End =
+      reinterpret_cast<const uint8_t *>(DataBuffer.getBufferEnd());
+
+  while (BI < BIEnd) {
+    size_t Remaining = BIEnd - BI;
+    // There should be enough left to read the binary id length.
+    if (Remaining < sizeof(uint64_t))
+      return make_error<InstrProfError>(
+          instrprof_error::malformed,
+          "not enough data to read binary id length");
+
+    uint64_t BILen = 0;
+    if (Endian == little)
+      BILen = endian::readNext<uint64_t, little, unaligned>(BI);
+    else
+      BILen = endian::readNext<uint64_t, big, unaligned>(BI);
+
+    if (BILen == 0)
+      return make_error<InstrProfError>(instrprof_error::malformed,
+                                        "binary id length is 0");
+
+    Remaining = BIEnd - BI;
+    // There should be enough left to read the binary id data.
+    if (Remaining < alignToPowerOf2(BILen, sizeof(uint64_t)))
+      return make_error<InstrProfError>(
+          instrprof_error::malformed, "not enough data to read binary id data");
+
+    // Add binary id to the binary ids list.
+    BinaryIds.push_back(object::BuildID(BI, BI + BILen));
+
+    // Increment by binary id data length, which aligned to the size of uint64.
+    BI += alignToPowerOf2(BILen, sizeof(uint64_t));
+    if (BI > End)
+      return make_error<InstrProfError>(
+          instrprof_error::malformed,
+          "binary id section is greater than buffer size");
+  }
+
+  return Error::success();
+}
+
+static Error printBinaryIdsInternal(raw_ostream &OS,
+                                    const MemoryBuffer &DataBuffer,
+                                    uint64_t BinaryIdsSize,
+                                    const uint8_t *BinaryIdsStart,
+                                    llvm::support::endianness Endian) {
+  if (BinaryIdsSize == 0)
+    return Error::success();
+
+  std::vector<llvm::object::BuildID> BinaryIds;
+  if (Error E = readBinaryIdsInternal(DataBuffer, BinaryIdsSize, BinaryIdsStart,
+                                      BinaryIds, Endian))
+    return E;
+
+  OS << "Binary IDs: \n";
+  for (auto BI : BinaryIds) {
+    for (uint64_t I = 0; I < BI.size(); I++)
+      OS << format("%02x", BI[I]);
+    OS << "\n";
+  }
+
+  return Error::success();
+}
+
 Expected<std::unique_ptr<InstrProfReader>>
 InstrProfReader::create(const Twine &Path,
                         const InstrProfCorrelator *Correlator) {
@@ -573,54 +658,18 @@ Error RawInstrProfReader<IntPtrT>::readNextRecord(NamedInstrProfRecord &Record)
   return success();
 }
 
-static size_t RoundUp(size_t size, size_t align) {
-  return (size + align - 1) & ~(align - 1);
+template <class IntPtrT>
+Error RawInstrProfReader<IntPtrT>::readBinaryIds(
+    std::vector<llvm::object::BuildID> &BinaryIds) {
+  return readBinaryIdsInternal(*DataBuffer, BinaryIdsSize, BinaryIdsStart,
+                               BinaryIds,
+                               llvm::support::endian::system_endianness());
 }
 
 template <class IntPtrT>
 Error RawInstrProfReader<IntPtrT>::printBinaryIds(raw_ostream &OS) {
-  if (BinaryIdsSize == 0)
-    return success();
-
-  OS << "Binary IDs: \n";
-  const uint8_t *BI = BinaryIdsStart;
-  const uint8_t *BIEnd = BinaryIdsStart + BinaryIdsSize;
-  while (BI < BIEnd) {
-    size_t Remaining = BIEnd - BI;
-
-    // There should be enough left to read the binary ID size field.
-    if (Remaining < sizeof(uint64_t))
-      return make_error<InstrProfError>(
-          instrprof_error::malformed,
-          "not enough data to read binary id length");
-
-    uint64_t BinaryIdLen = swap(*reinterpret_cast<const uint64_t *>(BI));
-
-    // There should be enough left to read the binary ID size field, and the
-    // binary ID.
-    if (Remaining < sizeof(BinaryIdLen) + BinaryIdLen)
-      return make_error<InstrProfError>(
-          instrprof_error::malformed, "not enough data to read binary id data");
-
-    // Increment by binary id length data type size.
-    BI += sizeof(BinaryIdLen);
-    if (BI > (const uint8_t *)DataBuffer->getBufferEnd())
-      return make_error<InstrProfError>(
-          instrprof_error::malformed,
-          "binary id that is read is bigger than buffer size");
-
-    for (uint64_t I = 0; I < BinaryIdLen; I++)
-      OS << format("%02x", BI[I]);
-    OS << "\n";
-
-    // Increment by binary id data length, rounded to the next 8 bytes. This
-    // accounts for the zero-padding after each build ID.
-    BI += RoundUp(BinaryIdLen, sizeof(uint64_t));
-    if (BI > (const uint8_t *)DataBuffer->getBufferEnd())
-      return make_error<InstrProfError>(instrprof_error::malformed);
-  }
-
-  return success();
+  return printBinaryIdsInternal(OS, *DataBuffer, BinaryIdsSize, BinaryIdsStart,
+                                llvm::support::endian::system_endianness());
 }
 
 namespace llvm {
@@ -948,9 +997,9 @@ Error IndexedInstrProfReader::readHeader() {
   Cur = readSummary((IndexedInstrProf::ProfVersion)Header->formatVersion(), Cur,
                     /* UseCS */ false);
   if (Header->formatVersion() & VARIANT_MASK_CSIR_PROF)
-    Cur = readSummary((IndexedInstrProf::ProfVersion)Header->formatVersion(), Cur,
-                      /* UseCS */ true);
-
+    Cur =
+        readSummary((IndexedInstrProf::ProfVersion)Header->formatVersion(), Cur,
+                    /* UseCS */ true);
   // Read the hash type and start offset.
   IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>(
       endian::byte_swap<uint64_t, little>(Header->HashType));
@@ -963,8 +1012,8 @@ Error IndexedInstrProfReader::readHeader() {
   auto IndexPtr = std::make_unique<InstrProfReaderIndex<OnDiskHashTableImplV3>>(
       Start + HashOffset, Cur, Start, HashType, Header->formatVersion());
 
-  // The MemProfOffset field in the header is only valid when the format version
-  // is higher than 8 (when it was introduced).
+  // The MemProfOffset field in the header is only valid when the format
+  // version is higher than 8 (when it was introduced).
   if (GET_VERSION(Header->formatVersion()) >= 8 &&
       Header->formatVersion() & VARIANT_MASK_MEMPROF) {
     uint64_t MemProfOffset =
@@ -974,7 +1023,8 @@ Error IndexedInstrProfReader::readHeader() {
     // The value returned from RecordTableGenerator.Emit.
     const uint64_t RecordTableOffset =
         support::endian::readNext<uint64_t, little, unaligned>(Ptr);
-    // The offset in the stream right before invoking FrameTableGenerator.Emit.
+    // The offset in the stream right before invoking
+    // FrameTableGenerator.Emit.
     const uint64_t FramePayloadOffset =
         support::endian::readNext<uint64_t, little, unaligned>(Ptr);
     // The value returned from FrameTableGenerator.Emit.
@@ -1000,11 +1050,28 @@ Error IndexedInstrProfReader::readHeader() {
         /*Base=*/Start, memprof::FrameLookupTrait()));
   }
 
+  // BinaryIdOffset field in the header is only valid when the format version
+  // is higher than 9 (when it was introduced).
+  if (GET_VERSION(Header->formatVersion()) >= 9) {
+    uint64_t BinaryIdOffset =
+        endian::byte_swap<uint64_t, little>(Header->BinaryIdOffset);
+    const unsigned char *Ptr = Start + BinaryIdOffset;
+    // Read binary ids size.
+    BinaryIdsSize = support::endian::readNext<uint64_t, little, unaligned>(Ptr);
+    if (BinaryIdsSize % sizeof(uint64_t))
+      return error(instrprof_error::bad_header);
+    // Set the binary ids start.
+    BinaryIdsStart = Ptr;
+    if (BinaryIdsStart > (const unsigned char *)DataBuffer->getBufferEnd())
+      return make_error<InstrProfError>(instrprof_error::malformed,
+                                        "corrupted binary ids");
+  }
+
   // Load the remapping table now if requested.
   if (RemappingBuffer) {
-    Remapper = std::make_unique<
-        InstrProfReaderItaniumRemapper<OnDiskHashTableImplV3>>(
-        std::move(RemappingBuffer), *IndexPtr);
+    Remapper =
+        std::make_unique<InstrProfReaderItaniumRemapper<OnDiskHashTableImplV3>>(
+            std::move(RemappingBuffer), *IndexPtr);
     if (Error E = Remapper->populateRemappings())
       return E;
   } else {
@@ -1136,6 +1203,17 @@ Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
   return success();
 }
 
+Error IndexedInstrProfReader::readBinaryIds(
+    std::vector<llvm::object::BuildID> &BinaryIds) {
+  return readBinaryIdsInternal(*DataBuffer, BinaryIdsSize, BinaryIdsStart,
+                               BinaryIds, llvm::support::little);
+}
+
+Error IndexedInstrProfReader::printBinaryIds(raw_ostream &OS) {
+  return printBinaryIdsInternal(OS, *DataBuffer, BinaryIdsSize, BinaryIdsStart,
+                                llvm::support::little);
+}
+
 void InstrProfReader::accumulateCounts(CountSumOrPercent &Sum, bool IsCS) {
   uint64_t NumFuncs = 0;
   for (const auto &Func : *this) {

diff  --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index bbea275ab445e..af3c27ebac76d 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -54,6 +54,7 @@ class ProfOStream {
 
   uint64_t tell() { return OS.tell(); }
   void write(uint64_t V) { LE.write<uint64_t>(V); }
+  void writeByte(uint8_t V) { LE.write<uint8_t>(V); }
 
   // \c patch can only be called when all data is written and flushed.
   // For raw_string_ostream, the patch is done on the target string
@@ -280,12 +281,20 @@ bool InstrProfWriter::addMemProfFrame(const memprof::FrameId Id,
   return true;
 }
 
+void InstrProfWriter::addBinaryIds(ArrayRef<llvm::object::BuildID> BIs) {
+  llvm::append_range(BinaryIds, BIs);
+}
+
 void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW,
                                              function_ref<void(Error)> Warn) {
   for (auto &I : IPW.FunctionData)
     for (auto &Func : I.getValue())
       addRecord(I.getKey(), Func.first, std::move(Func.second), 1, Warn);
 
+  BinaryIds.reserve(BinaryIds.size() + IPW.BinaryIds.size());
+  for (auto &I : IPW.BinaryIds)
+    addBinaryIds(I);
+
   MemProfFrameData.reserve(IPW.MemProfFrameData.size());
   for (auto &I : IPW.MemProfFrameData) {
     // If we weren't able to add the frame mappings then it doesn't make sense
@@ -330,6 +339,7 @@ static void setSummary(IndexedInstrProf::Summary *TheSummary,
 
 Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   using namespace IndexedInstrProf;
+  using namespace support;
 
   OnDiskChainedHashTableGenerator<InstrProfRecordWriterTrait> Generator;
 
@@ -365,11 +375,13 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   Header.HashType = static_cast<uint64_t>(IndexedInstrProf::HashType);
   Header.HashOffset = 0;
   Header.MemProfOffset = 0;
+  Header.BinaryIdOffset = 0;
   int N = sizeof(IndexedInstrProf::Header) / sizeof(uint64_t);
 
-  // Only write out all the fields except 'HashOffset' and 'MemProfOffset'. We
-  // need to remember the offset of these fields to allow back patching later.
-  for (int I = 0; I < N - 2; I++)
+  // Only write out all the fields except 'HashOffset', 'MemProfOffset' and
+  // 'BinaryIdOffset'. We need to remember the offset of these fields to allow
+  // back patching later.
+  for (int I = 0; I < N - 3; I++)
     OS.write(reinterpret_cast<uint64_t *>(&Header)[I]);
 
   // Save the location of Header.HashOffset field in \c OS.
@@ -384,6 +396,12 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   // profile contains memory profile information.
   OS.write(0);
 
+  // Save the location of binary ids section.
+  uint64_t BinaryIdSectionOffset = OS.tell();
+  // Reserve space for the BinaryIdOffset field to be patched later if this
+  // profile contains binary ids.
+  OS.write(0);
+
   // Reserve space to write profile summary data.
   uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size();
   uint32_t SummarySize = Summary::getSize(Summary::NumKinds, NumEntries);
@@ -460,6 +478,43 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
     OS.patch(PatchItems, 3);
   }
 
+  // BinaryIdSection has two parts:
+  // 1. uint64_t BinaryIdsSectionSize
+  // 2. list of binary ids that consist of:
+  //    a. uint64_t BinaryIdLength
+  //    b. uint8_t  BinaryIdData
+  //    c. uint8_t  Padding (if necessary)
+  uint64_t BinaryIdSectionStart = OS.tell();
+  // Calculate size of binary section.
+  uint64_t BinaryIdsSectionSize = 0;
+
+  // Remove duplicate binary ids.
+  llvm::sort(BinaryIds);
+  BinaryIds.erase(std::unique(BinaryIds.begin(), BinaryIds.end()),
+                  BinaryIds.end());
+
+  for (auto BI : BinaryIds) {
+    // Increment by binary id length data type size.
+    BinaryIdsSectionSize += sizeof(uint64_t);
+    // Increment by binary id data length, aligned to 8 bytes.
+    BinaryIdsSectionSize += alignToPowerOf2(BI.size(), sizeof(uint64_t));
+  }
+  // Write binary ids section size.
+  OS.write(BinaryIdsSectionSize);
+
+  for (auto BI : BinaryIds) {
+    uint64_t BILen = BI.size();
+    // Write binary id length.
+    OS.write(BILen);
+    // Write binary id data.
+    for (unsigned K = 0; K < BILen; K++)
+      OS.writeByte(BI[K]);
+    // Write padding if necessary.
+    uint64_t PaddingSize = alignToPowerOf2(BILen, sizeof(uint64_t)) - BILen;
+    for (unsigned K = 0; K < PaddingSize; K++)
+      OS.writeByte(0);
+  }
+
   // Allocate space for data to be serialized out.
   std::unique_ptr<IndexedInstrProf::Summary> TheSummary =
       IndexedInstrProf::allocSummary(SummarySize);
@@ -482,8 +537,11 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   PatchItem PatchItems[] = {
       // Patch the Header.HashOffset field.
       {HashTableStartFieldOffset, &HashTableStart, 1},
-      // Patch the Header.MemProfOffset (=0 for profiles without MemProf data).
+      // Patch the Header.MemProfOffset (=0 for profiles without MemProf
+      // data).
       {MemProfSectionOffset, &MemProfSectionStart, 1},
+      // Patch the Header.BinaryIdSectionOffset.
+      {BinaryIdSectionOffset, &BinaryIdSectionStart, 1},
       // Patch the summary data.
       {SummaryOffset, reinterpret_cast<uint64_t *>(TheSummary.get()),
        (int)(SummarySize / sizeof(uint64_t))},

diff  --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 76b745b13448e..90c9e560d47c7 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -338,9 +338,16 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
                              FuncName, firstTime);
     });
   }
-  if (Reader->hasError())
+
+  if (Reader->hasError()) {
     if (Error E = Reader->getError())
       WC->Errors.emplace_back(std::move(E), Filename);
+  }
+
+  std::vector<llvm::object::BuildID> BinaryIds;
+  if (Error E = Reader->readBinaryIds(BinaryIds))
+    WC->Errors.emplace_back(std::move(E), Filename);
+  WC->Writer.addBinaryIds(BinaryIds);
 }
 
 /// Merge the \p Src writer context into \p Dst.


        


More information about the llvm-commits mailing list