[compiler-rt] b9f547e - [llvm][profile] Add padding after binary IDs

Leonard Chan via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 28 11:51:02 PDT 2021


Author: Leonard Chan
Date: 2021-09-28T11:50:50-07:00
New Revision: b9f547e8e51182d32f1912f97a3e53f4899ea6be

URL: https://github.com/llvm/llvm-project/commit/b9f547e8e51182d32f1912f97a3e53f4899ea6be
DIFF: https://github.com/llvm/llvm-project/commit/b9f547e8e51182d32f1912f97a3e53f4899ea6be.diff

LOG: [llvm][profile] Add padding after binary IDs

Some tests with binary IDs would fail with error: no profile can be merged.
This is because raw profiles could have unaligned headers when emitting binary
IDs. This means padding should be emitted after binary IDs are emitted to
ensure everything else is aligned. This patch adds padding after each binary ID
to ensure the next binary ID size is 8-byte aligned. This also adds extra
checks to ensure we aren't reading corrupted data when printing binary IDs.

Differential Revision: https://reviews.llvm.org/D110365

Added: 
    compiler-rt/test/profile/Linux/binary-id-padding.c
    llvm/test/tools/llvm-profdata/binary-ids-padding.test
    llvm/test/tools/llvm-profdata/insufficient-binary-ids-size.test
    llvm/test/tools/llvm-profdata/large-binary-id-size.test
    llvm/test/tools/llvm-profdata/misaligned-binary-ids-size.test

Modified: 
    compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
    llvm/lib/ProfileData/InstrProfReader.cpp

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
index 5d47083b8bfe7..e61f90b2cef96 100644
--- a/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
+++ b/compiler-rt/lib/profile/InstrProfilingPlatformLinux.c
@@ -95,10 +95,13 @@ static size_t RoundUp(size_t size, size_t align) {
  * have a fixed length.
  */
 static int WriteOneBinaryId(ProfDataWriter *Writer, uint64_t BinaryIdLen,
-                            const uint8_t *BinaryIdData) {
+                            const uint8_t *BinaryIdData,
+                            uint64_t BinaryIdPadding) {
   ProfDataIOVec BinaryIdIOVec[] = {
       {&BinaryIdLen, sizeof(uint64_t), 1, 0},
-      {BinaryIdData, sizeof(uint8_t), BinaryIdLen, 0}};
+      {BinaryIdData, sizeof(uint8_t), BinaryIdLen, 0},
+      {NULL, sizeof(uint8_t), BinaryIdPadding, 1},
+  };
   if (Writer->Write(Writer, BinaryIdIOVec,
                     sizeof(BinaryIdIOVec) / sizeof(*BinaryIdIOVec)))
     return -1;
@@ -130,11 +133,12 @@ static int WriteBinaryIdForNote(ProfDataWriter *Writer,
     uint64_t BinaryIdLen = Note->n_descsz;
     const uint8_t *BinaryIdData =
         (const uint8_t *)(NoteName + RoundUp(Note->n_namesz, 4));
-    if (Writer != NULL &&
-        WriteOneBinaryId(Writer, BinaryIdLen, BinaryIdData) == -1)
+    uint8_t BinaryIdPadding = __llvm_profile_get_num_padding_bytes(BinaryIdLen);
+    if (Writer != NULL && WriteOneBinaryId(Writer, BinaryIdLen, BinaryIdData,
+                                           BinaryIdPadding) == -1)
       return -1;
 
-    BinaryIdSize = sizeof(BinaryIdLen) + BinaryIdLen;
+    BinaryIdSize = sizeof(BinaryIdLen) + BinaryIdLen + BinaryIdPadding;
   }
 
   return BinaryIdSize;

diff  --git a/compiler-rt/test/profile/Linux/binary-id-padding.c b/compiler-rt/test/profile/Linux/binary-id-padding.c
new file mode 100644
index 0000000000000..e0ec634dc5f0d
--- /dev/null
+++ b/compiler-rt/test/profile/Linux/binary-id-padding.c
@@ -0,0 +1,82 @@
+// Set these requirements to ensure that we have an 8-byte binary ID.
+// REQUIRES: linux
+//
+// This will generate a 20-byte build ID, which requires 4-byte padding.
+// RUN: %clang_profgen -Wl,--build-id=sha1 -o %t %s
+// RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t
+// RUN: %run %t %t.profraw
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+enum ValueKind {
+#define VALUE_PROF_KIND(Enumerator, Value, Descr) Enumerator = Value,
+#include "profile/InstrProfData.inc"
+};
+
+typedef struct __llvm_profile_header {
+#define INSTR_PROF_RAW_HEADER(Type, Name, Initializer) Type Name;
+#include "profile/InstrProfData.inc"
+} __llvm_profile_header;
+
+typedef void *IntPtrT;
+typedef struct __llvm_profile_data {
+#define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) Type Name;
+#include "profile/InstrProfData.inc"
+} __llvm_profile_data;
+
+void bail(const char* str) {
+  fprintf(stderr, "%s %s\n", str, strerror(errno));
+  exit(1);
+}
+
+void func() {}
+
+int main(int argc, char** argv) {
+  if (argc == 2) {
+    int fd = open(argv[1], O_RDONLY);
+    if (fd == -1)
+      bail("open");
+
+    struct stat st;
+    if (stat(argv[1], &st))
+      bail("stat");
+    uint64_t FileSize = st.st_size;
+
+    char* Buf = (char *)mmap(NULL, FileSize, PROT_READ, MAP_SHARED, fd, 0);
+    if (Buf == MAP_FAILED)
+      bail("mmap");
+
+    __llvm_profile_header *Header = (__llvm_profile_header *)Buf;
+    if (Header->BinaryIdsSize != 32)
+      bail("Invalid binary ID size");
+
+    char *BinaryIdsStart = Buf + sizeof(__llvm_profile_header);
+
+    uint64_t BinaryIdSize = *((uint64_t *)BinaryIdsStart);
+    if (BinaryIdSize != 20)
+      bail("Expected a binary ID of size 20");
+
+    // Skip the size and the binary ID itself to check padding.
+    BinaryIdsStart += 8 + 20;
+    if (*((uint32_t *)BinaryIdsStart))
+      bail("Found non-zero binary ID padding");
+
+    if (munmap(Buf, FileSize))
+      bail("munmap");
+
+    if (close(fd))
+      bail("close");
+  } else {
+    func();
+  }
+  return 0;
+}

diff  --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index d7b8844ff6fb1..e9a2139051e8d 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -367,6 +367,9 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
     return error(instrprof_error::unsupported_version);
 
   BinaryIdsSize = swap(Header.BinaryIdsSize);
+  if (BinaryIdsSize % sizeof(uint64_t))
+    return error(instrprof_error::bad_header);
+
   CountersDelta = swap(Header.CountersDelta);
   NamesDelta = swap(Header.NamesDelta);
   auto DataSize = swap(Header.DataSize);
@@ -402,6 +405,10 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
   NamesStart = Start + NamesOffset;
   ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset);
 
+  const uint8_t *BufferEnd = (const uint8_t *)DataBuffer->getBufferEnd();
+  if (BinaryIdsStart + BinaryIdsSize > BufferEnd)
+    return error(instrprof_error::bad_header);
+
   std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>();
   if (Error E = createSymtab(*NewSymtab.get()))
     return E;
@@ -520,6 +527,10 @@ Error RawInstrProfReader<IntPtrT>::readNextRecord(NamedInstrProfRecord &Record)
   return success();
 }
 
+static size_t RoundUp(size_t size, size_t align) {
+  return (size + align - 1) & ~(align - 1);
+}
+
 template <class IntPtrT>
 Error RawInstrProfReader<IntPtrT>::printBinaryIds(raw_ostream &OS) {
   if (BinaryIdsSize == 0)
@@ -527,8 +538,21 @@ Error RawInstrProfReader<IntPtrT>::printBinaryIds(raw_ostream &OS) {
 
   OS << "Binary IDs: \n";
   const uint8_t *BI = BinaryIdsStart;
-  while (BI < BinaryIdsStart + BinaryIdsSize) {
+  const uint8_t *BIEnd = BinaryIdsStart + BinaryIdsSize;
+  while (BI < BIEnd) {
+    size_t Remaining = BIEnd - BI;
+
+    // There should be enough left to read the binary ID size field.
+    if (Remaining < sizeof(uint64_t))
+      return make_error<InstrProfError>(instrprof_error::malformed);
+
     uint64_t BinaryIdLen = swap(*reinterpret_cast<const uint64_t *>(BI));
+
+    // There should be enough left to read the binary ID size field, and the
+    // binary ID.
+    if (Remaining < sizeof(BinaryIdLen) + BinaryIdLen)
+      return make_error<InstrProfError>(instrprof_error::malformed);
+
     // Increment by binary id length data type size.
     BI += sizeof(BinaryIdLen);
     if (BI > (const uint8_t *)DataBuffer->getBufferEnd())
@@ -538,8 +562,9 @@ Error RawInstrProfReader<IntPtrT>::printBinaryIds(raw_ostream &OS) {
       OS << format("%02x", BI[I]);
     OS << "\n";
 
-    // Increment by binary id data length.
-    BI += BinaryIdLen;
+    // Increment by binary id data length, rounded to the next 8 bytes. This
+    // accounts for the zero-padding after each build ID.
+    BI += RoundUp(BinaryIdLen, sizeof(uint64_t));
     if (BI > (const uint8_t *)DataBuffer->getBufferEnd())
       return make_error<InstrProfError>(instrprof_error::malformed);
   }

diff  --git a/llvm/test/tools/llvm-profdata/binary-ids-padding.test b/llvm/test/tools/llvm-profdata/binary-ids-padding.test
new file mode 100644
index 0000000000000..d5ba34db934ab
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/binary-ids-padding.test
@@ -0,0 +1,72 @@
+// Header
+//
+// INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
+// INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
+// INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
+// INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
+// INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)
+// INSTR_PROF_RAW_HEADER(uint64_t, NamesSize,  NamesSize)
+// INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin)
+// INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
+// INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
+
+RUN: printf '\201rforpl\377' > %t.profraw
+RUN: printf '\7\0\0\0\0\0\0\0' >> %t.profraw
+// There will be 2 20-byte binary IDs, so the total Binary IDs size will be 64 bytes.
+//   2 * 8  binary ID sizes
+// + 2 * 20 binary IDs (of size 20)
+// + 2 * 4  binary ID paddings
+// --------
+// = 64     bytes
+RUN: printf '\100\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\2\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\3\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\20\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\4\0\1\0\0\0' >> %t.profraw
+RUN: printf '\0\0\4\0\2\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+
+// Binary IDs - There are only two in this case that are 20 bytes.
+RUN: printf '\24\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\1\2\3\4\5\6\7' >> %t.profraw
+RUN: printf '\0\1\2\3\4\5\6\7' >> %t.profraw
+RUN: printf '\0\1\2\3\0\0\0\0' >> %t.profraw
+RUN: printf '\24\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\1\1\1\1\1\1\1\1' >> %t.profraw
+RUN: printf '\2\2\2\2\2\2\2\2' >> %t.profraw
+RUN: printf '\3\3\3\3\0\0\0\0' >> %t.profraw
+
+// Data Section
+//
+// struct ProfData {
+// #define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \
+//    Type Name;
+// #include "llvm/ProfileData/InstrProfData.inc"
+// };
+
+RUN: printf '\254\275\030\333\114\302\370\134' >> %t.profraw
+RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\4\0\1\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\1\0\0\0\0\0\0\0' >> %t.profraw
+
+RUN: printf '\067\265\035\031\112\165\023\344' >> %t.profraw
+RUN: printf '\02\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\xd8\xff\3\0\1\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\02\0\0\0\0\0\0\0' >> %t.profraw
+
+RUN: printf '\023\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\067\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\101\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\7\0foo\1bar\0\0\0\0\0\0\0' >> %t.profraw
+
+// RUN: llvm-profdata show --binary-ids  %t.profraw | FileCheck %s
+// CHECK: Instrumentation level: Front-end
+// CHECK: Binary IDs:
+// CHECK-NEXT: 0001020304050607000102030405060700010203
+// CHECK-NEXT: 0101010101010101020202020202020203030303

diff  --git a/llvm/test/tools/llvm-profdata/insufficient-binary-ids-size.test b/llvm/test/tools/llvm-profdata/insufficient-binary-ids-size.test
new file mode 100644
index 0000000000000..7d5b84eaaff82
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/insufficient-binary-ids-size.test
@@ -0,0 +1,20 @@
+RUN: printf '\201rforpl\377' > %t.profraw
+RUN: printf '\7\0\0\0\0\0\0\0' >> %t.profraw
+// We should fail on this because the data buffer (profraw file) is not long
+// enough to hold this binary IDs size. NOTE that this (combined with the 8-byte
+// alignment requirement for binary IDs size) will ensure we can at least read one
+// 8-byte size if the binary IDs are provided.
+RUN: printf '\8\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+
+RUN: printf '\0\0\0\0\0\0\0' >> %t.profraw
+
+// RUN: not llvm-profdata show --binary-ids  %t.profraw 2>&1 | FileCheck %s
+// CHECK: invalid instrumentation profile data (file header is corrupt)

diff  --git a/llvm/test/tools/llvm-profdata/large-binary-id-size.test b/llvm/test/tools/llvm-profdata/large-binary-id-size.test
new file mode 100644
index 0000000000000..41ece3275c1ac
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/large-binary-id-size.test
@@ -0,0 +1,20 @@
+RUN: printf '\201rforpl\377' > %t.profraw
+RUN: printf '\7\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\40\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+
+// Check for a corrupted size being too large past the end of the file.
+RUN: printf '\7\7\7\7\7\7\7\7' >> %t.profraw
+RUN: printf '\0\1\2\3\4\5\6\7' >> %t.profraw
+RUN: printf '\0\1\2\3\4\5\6\7' >> %t.profraw
+RUN: printf '\0\1\2\3\0\0\0\0' >> %t.profraw
+
+// RUN: not llvm-profdata show --binary-ids  %t.profraw 2>&1 | FileCheck %s
+// CHECK: malformed instrumentation profile data

diff  --git a/llvm/test/tools/llvm-profdata/misaligned-binary-ids-size.test b/llvm/test/tools/llvm-profdata/misaligned-binary-ids-size.test
new file mode 100644
index 0000000000000..87acde8db3c6f
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/misaligned-binary-ids-size.test
@@ -0,0 +1,25 @@
+RUN: printf '\201rforpl\377' > %t.profraw
+RUN: printf '\7\0\0\0\0\0\0\0' >> %t.profraw
+// We should fail on this because the binary IDs is not a multiple of 8 bytes.
+RUN: printf '\77\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t.profraw
+
+// Binary IDs - There are only two in this case that are 20 bytes.
+RUN: printf '\24\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\0\1\2\3\4\5\6\7' >> %t.profraw
+RUN: printf '\0\1\2\3\4\5\6\7' >> %t.profraw
+RUN: printf '\0\1\2\3\0\0\0\0' >> %t.profraw
+RUN: printf '\24\0\0\0\0\0\0\0' >> %t.profraw
+RUN: printf '\1\1\1\1\1\1\1\1' >> %t.profraw
+RUN: printf '\2\2\2\2\2\2\2\2' >> %t.profraw
+RUN: printf '\3\3\3\3\0\0\0\0' >> %t.profraw
+
+// RUN: not llvm-profdata show --binary-ids  %t.profraw 2>&1 | FileCheck %s
+// CHECK: invalid instrumentation profile data (file header is corrupt)


        


More information about the llvm-commits mailing list