[PATCH] D13758: Instr PGO code small restructuring

Fri Oct 16 19:28:16 PDT 2015

David Li <davidxl at google.com> writes:
> davidxl created this revision.
> davidxl added a reviewer: bogner.
> davidxl added a subscriber: llvm-commits.
>
> In this clean up patch, 
> 1) key constant values (version, magic) and data structures related to
> raw and indexed profile format are moved into one centralized place:
> InstrProf.h.
> 2) Utility function such as MD5Hash computation is also moved to the
> common header to allow sharing by other components in the future
> 3) A header data structure is introduced for Indexed format so that
> the reader and writer can always be in sync
> 4) Added some comments to document different places where multiple
> definition of the data structure must be kept in sync (reader/writer,
> runtime, lowering etc).

One minor comment below, then this LGTM.

> No functional change is intended.
>
> http://reviews.llvm.org/D13758
>
> Files:
>   include/llvm/ProfileData/InstrProf.h
>   include/llvm/ProfileData/InstrProfReader.h
>   lib/ProfileData/InstrProfIndexed.h
>   lib/ProfileData/InstrProfReader.cpp
>   lib/ProfileData/InstrProfWriter.cpp
>
> Index: lib/ProfileData/InstrProfWriter.cpp
> ===================================================================
> --- lib/ProfileData/InstrProfWriter.cpp
> +++ lib/ProfileData/InstrProfWriter.cpp
> @@ -13,7 +13,6 @@
>  //===----------------------------------------------------------------------===//
>  
>  #include "llvm/ProfileData/InstrProfWriter.h"
> -#include "InstrProfIndexed.h"
>  #include "llvm/ADT/StringExtras.h"
>  #include "llvm/Support/EndianStream.h"
>  #include "llvm/Support/OnDiskHashTable.h"
> @@ -197,10 +196,17 @@
>    endian::Writer<little> LE(OS);
>  
>    // Write the header.
> -  LE.write<uint64_t>(IndexedInstrProf::Magic);
> -  LE.write<uint64_t>(IndexedInstrProf::Version);
> -  LE.write<uint64_t>(MaxFunctionCount);
> -  LE.write<uint64_t>(static_cast<uint64_t>(IndexedInstrProf::HashType));
> +  IndexedInstrProf::Header Header;
> +  Header.Magic = IndexedInstrProf::Magic;
> +  Header.Version = IndexedInstrProf::Version;
> +  Header.MaxFunctionCount = MaxFunctionCount;
> +  Header.HashType = static_cast<uint64_t>(IndexedInstrProf::HashType);
> +  Header.HashOffset = 0;
> +  int N = sizeof(IndexedInstrProf::Header) / sizeof(uint64_t);
> +
> +  // Only write out the first N-1 fields.

This comment isn't helpful - that's apparent from the next line.
Something like "Print everything except the HashOffset" is a little
better.

> +  for (int I = 0; I < N - 1; I++)
> +    LE.write<uint64_t>(reinterpret_cast<uint64_t *>(&Header)[I]);
>  
>    // Save a space to write the hash table start location.
>    uint64_t HashTableStartLoc = OS.tell();
> Index: lib/ProfileData/InstrProfReader.cpp
> ===================================================================
> --- lib/ProfileData/InstrProfReader.cpp
> +++ lib/ProfileData/InstrProfReader.cpp
> @@ -13,7 +13,6 @@
>  //===----------------------------------------------------------------------===//
>  
>  #include "llvm/ProfileData/InstrProfReader.h"
> -#include "InstrProfIndexed.h"
>  #include "llvm/ADT/STLExtras.h"
>  #include <cassert>
>  
> @@ -140,53 +139,24 @@
>  }
>  
>  template <class IntPtrT>
> -static uint64_t getRawMagic();
> -
> -template <>
> -uint64_t getRawMagic<uint64_t>() {
> -  return
> -    uint64_t(255) << 56 |
> -    uint64_t('l') << 48 |
> -    uint64_t('p') << 40 |
> -    uint64_t('r') << 32 |
> -    uint64_t('o') << 24 |
> -    uint64_t('f') << 16 |
> -    uint64_t('r') <<  8 |
> -    uint64_t(129);
> -}
> -
> -template <>
> -uint64_t getRawMagic<uint32_t>() {
> -  return
> -    uint64_t(255) << 56 |
> -    uint64_t('l') << 48 |
> -    uint64_t('p') << 40 |
> -    uint64_t('r') << 32 |
> -    uint64_t('o') << 24 |
> -    uint64_t('f') << 16 |
> -    uint64_t('R') <<  8 |
> -    uint64_t(129);
> -}
> -
> -template <class IntPtrT>
>  bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) {
>    if (DataBuffer.getBufferSize() < sizeof(uint64_t))
>      return false;
>    uint64_t Magic =
>      *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart());
> -  return getRawMagic<IntPtrT>() == Magic ||
> -    sys::getSwappedBytes(getRawMagic<IntPtrT>()) == Magic;
> +  return RawInstrProf::getMagic<IntPtrT>() == Magic ||
> +         sys::getSwappedBytes(RawInstrProf::getMagic<IntPtrT>()) == Magic;
>  }
>  
>  template <class IntPtrT>
>  std::error_code RawInstrProfReader<IntPtrT>::readHeader() {
>    if (!hasFormat(*DataBuffer))
>      return error(instrprof_error::bad_magic);
> -  if (DataBuffer->getBufferSize() < sizeof(RawHeader))
> +  if (DataBuffer->getBufferSize() < sizeof(RawInstrProf::Header))
>      return error(instrprof_error::bad_header);
> -  auto *Header =
> -    reinterpret_cast<const RawHeader *>(DataBuffer->getBufferStart());
> -  ShouldSwapBytes = Header->Magic != getRawMagic<IntPtrT>();
> +  auto *Header = reinterpret_cast<const RawInstrProf::Header *>(
> +      DataBuffer->getBufferStart());
> +  ShouldSwapBytes = Header->Magic != RawInstrProf::getMagic<IntPtrT>();
>    return readHeader(*Header);
>  }
>  
> @@ -202,47 +172,45 @@
>      return instrprof_error::eof;
>    // If there isn't enough space for another header, this is probably just
>    // garbage at the end of the file.
> -  if (CurrentPos + sizeof(RawHeader) > End)
> +  if (CurrentPos + sizeof(RawInstrProf::Header) > End)
>      return instrprof_error::malformed;
>    // The writer ensures each profile is padded to start at an aligned address.
>    if (reinterpret_cast<size_t>(CurrentPos) % alignOf<uint64_t>())
>      return instrprof_error::malformed;
>    // The magic should have the same byte order as in the previous header.
>    uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos);
> -  if (Magic != swap(getRawMagic<IntPtrT>()))
> +  if (Magic != swap(RawInstrProf::getMagic<IntPtrT>()))
>      return instrprof_error::bad_magic;
>  
>    // There's another profile to read, so we need to process the header.
> -  auto *Header = reinterpret_cast<const RawHeader *>(CurrentPos);
> +  auto *Header = reinterpret_cast<const RawInstrProf::Header *>(CurrentPos);
>    return readHeader(*Header);
>  }
>  
> -static uint64_t getRawVersion() {
> -  return 1;
> -}
> -
>  template <class IntPtrT>
> -std::error_code
> -RawInstrProfReader<IntPtrT>::readHeader(const RawHeader &Header) {
> -  if (swap(Header.Version) != getRawVersion())
> +std::error_code RawInstrProfReader<IntPtrT>::readHeader(
> +    const RawInstrProf::Header &Header) {
> +  if (swap(Header.Version) != RawInstrProf::Version)
>      return error(instrprof_error::unsupported_version);
>  
>    CountersDelta = swap(Header.CountersDelta);
>    NamesDelta = swap(Header.NamesDelta);
>    auto DataSize = swap(Header.DataSize);
>    auto CountersSize = swap(Header.CountersSize);
>    auto NamesSize = swap(Header.NamesSize);
>  
> -  ptrdiff_t DataOffset = sizeof(RawHeader);
> -  ptrdiff_t CountersOffset = DataOffset + sizeof(ProfileData) * DataSize;
> +  ptrdiff_t DataOffset = sizeof(RawInstrProf::Header);
> +  ptrdiff_t CountersOffset =
> +      DataOffset + sizeof(RawInstrProf::ProfileData<IntPtrT>) * DataSize;
>    ptrdiff_t NamesOffset = CountersOffset + sizeof(uint64_t) * CountersSize;
>    size_t ProfileSize = NamesOffset + sizeof(char) * NamesSize;
>  
>    auto *Start = reinterpret_cast<const char *>(&Header);
>    if (Start + ProfileSize > DataBuffer->getBufferEnd())
>      return error(instrprof_error::bad_header);
>  
> -  Data = reinterpret_cast<const ProfileData *>(Start + DataOffset);
> +  Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>(
> +      Start + DataOffset);
>    DataEnd = Data + DataSize;
>    CountersStart = reinterpret_cast<const uint64_t *>(Start + CountersOffset);
>    NamesStart = Start + NamesOffset;
> @@ -421,25 +389,30 @@
>  
>    using namespace support;
>  
> +  auto *Header = reinterpret_cast<const IndexedInstrProf::Header *>(Cur);
> +  Cur += sizeof(IndexedInstrProf::Header);
> +
>    // Check the magic number.
> -  uint64_t Magic = endian::readNext<uint64_t, little, unaligned>(Cur);
> +  uint64_t Magic = endian::byte_swap<uint64_t, little>(Header->Magic);
>    if (Magic != IndexedInstrProf::Magic)
>      return error(instrprof_error::bad_magic);
>  
>    // Read the version.
> -  FormatVersion = endian::readNext<uint64_t, little, unaligned>(Cur);
> +  FormatVersion = endian::byte_swap<uint64_t, little>(Header->Version);
>    if (FormatVersion > IndexedInstrProf::Version)
>      return error(instrprof_error::unsupported_version);
>  
>    // Read the maximal function count.
> -  MaxFunctionCount = endian::readNext<uint64_t, little, unaligned>(Cur);
> +  MaxFunctionCount =
> +      endian::byte_swap<uint64_t, little>(Header->MaxFunctionCount);
>  
>    // Read the hash type and start offset.
>    IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>(
> -      endian::readNext<uint64_t, little, unaligned>(Cur));
> +      endian::byte_swap<uint64_t, little>(Header->HashType));
>    if (HashType > IndexedInstrProf::HashT::Last)
>      return error(instrprof_error::unsupported_hash_type);
> -  uint64_t HashOffset = endian::readNext<uint64_t, little, unaligned>(Cur);
> +
> +  uint64_t HashOffset = endian::byte_swap<uint64_t, little>(Header->HashOffset);
>  
>    // The rest of the file is an on disk hash table.
>    Index.reset(InstrProfReaderIndex::Create(
> Index: lib/ProfileData/InstrProfIndexed.h
> ===================================================================
> --- lib/ProfileData/InstrProfIndexed.h
> +++ /dev/null
> @@ -1,56 +0,0 @@
> -//=-- InstrProfIndexed.h - Indexed profiling format support -------*- C++ -*-=//
> -//
> -//                     The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -//
> -// Shared header for the instrumented profile data reader and writer.
> -//
> -//===----------------------------------------------------------------------===//
> -
> -#ifndef LLVM_LIB_PROFILEDATA_INSTRPROFINDEXED_H
> -#define LLVM_LIB_PROFILEDATA_INSTRPROFINDEXED_H
> -
> -#include "llvm/Support/Endian.h"
> -#include "llvm/Support/ErrorHandling.h"
> -#include "llvm/Support/MD5.h"
> -
> -namespace llvm {
> -
> -namespace IndexedInstrProf {
> -enum class HashT : uint32_t {
> -  MD5,
> -
> -  Last = MD5
> -};
> -
> -static inline uint64_t MD5Hash(StringRef Str) {
> -  MD5 Hash;
> -  Hash.update(Str);
> -  llvm::MD5::MD5Result Result;
> -  Hash.final(Result);
> -  // Return the least significant 8 bytes. Our MD5 implementation returns the
> -  // result in little endian, so we may need to swap bytes.
> -  using namespace llvm::support;
> -  return endian::read<uint64_t, little, unaligned>(Result);
> -}
> -
> -static inline uint64_t ComputeHash(HashT Type, StringRef K) {
> -  switch (Type) {
> -  case HashT::MD5:
> -    return IndexedInstrProf::MD5Hash(K);
> -  }
> -  llvm_unreachable("Unhandled hash type");
> -}
> -
> -const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81"
> -const uint64_t Version = 3;
> -const HashT HashType = HashT::MD5;
> -}
> -
> -} // end namespace llvm
> -
> -#endif
> Index: include/llvm/ProfileData/InstrProfReader.h
> ===================================================================
> --- include/llvm/ProfileData/InstrProfReader.h
> +++ include/llvm/ProfileData/InstrProfReader.h
> @@ -132,28 +132,12 @@
>  private:
>    /// The profile data file contents.
>    std::unique_ptr<MemoryBuffer> DataBuffer;
> -  struct ProfileData {
> -    const uint32_t NameSize;
> -    const uint32_t NumCounters;
> -    const uint64_t FuncHash;
> -    const IntPtrT NamePtr;
> -    const IntPtrT CounterPtr;
> -  };
> -  struct RawHeader {
> -    const uint64_t Magic;
> -    const uint64_t Version;
> -    const uint64_t DataSize;
> -    const uint64_t CountersSize;
> -    const uint64_t NamesSize;
> -    const uint64_t CountersDelta;
> -    const uint64_t NamesDelta;
> -  };
>  
>    bool ShouldSwapBytes;
>    uint64_t CountersDelta;
>    uint64_t NamesDelta;
> -  const ProfileData *Data;
> -  const ProfileData *DataEnd;
> +  const RawInstrProf::ProfileData<IntPtrT> *Data;
> +  const RawInstrProf::ProfileData<IntPtrT> *DataEnd;
>    const uint64_t *CountersStart;
>    const char *NamesStart;
>    const char *ProfileEnd;
> @@ -170,7 +154,7 @@
>  
>  private:
>    std::error_code readNextHeader(const char *CurrentPos);
> -  std::error_code readHeader(const RawHeader &Header);
> +  std::error_code readHeader(const RawInstrProf::Header &Header);
>    template <class IntT>
>    IntT swap(IntT Int) const {
>      return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int;
> Index: include/llvm/ProfileData/InstrProf.h
> ===================================================================
> --- include/llvm/ProfileData/InstrProf.h
> +++ include/llvm/ProfileData/InstrProf.h
> @@ -16,8 +16,11 @@
>  #ifndef LLVM_PROFILEDATA_INSTRPROF_H_
>  #define LLVM_PROFILEDATA_INSTRPROF_H_
>  
> +#include "llvm/ADT/StringRef.h"
>  #include "llvm/ADT/StringSet.h"
> +#include "llvm/Support/Endian.h"
>  #include "llvm/Support/ErrorHandling.h"
> +#include "llvm/Support/MD5.h"
>  #include <cstdint>
>  #include <list>
>  #include <system_error>
> @@ -132,6 +135,105 @@
>    }
>  };
>  
> +namespace IndexedInstrProf {
> +enum class HashT : uint32_t {
> +  MD5,
> +
> +  Last = MD5
> +};
> +
> +static inline uint64_t MD5Hash(StringRef Str) {
> +  MD5 Hash;
> +  Hash.update(Str);
> +  llvm::MD5::MD5Result Result;
> +  Hash.final(Result);
> +  // Return the least significant 8 bytes. Our MD5 implementation returns the
> +  // result in little endian, so we may need to swap bytes.
> +  using namespace llvm::support;
> +  return endian::read<uint64_t, little, unaligned>(Result);
> +}
> +
> +static inline uint64_t ComputeHash(HashT Type, StringRef K) {
> +  switch (Type) {
> +    case HashT::MD5:
> +      return IndexedInstrProf::MD5Hash(K);
> +  }
> +  llvm_unreachable("Unhandled hash type");
> +}
> +
> +const uint64_t Magic = 0x8169666f72706cff;  // "\xfflprofi\x81"
> +const uint64_t Version = 3;
> +const HashT HashType = HashT::MD5;
> +
> +struct Header {
> +  uint64_t Magic;
> +  uint64_t Version;
> +  uint64_t MaxFunctionCount;
> +  uint64_t HashType;
> +  uint64_t HashOffset;
> +};
> +
> +}  // end namespace IndexedInstrProf
> +
> +namespace RawInstrProf {
> +
> +const uint64_t Version = 1;
> +
> +// Magic number to detect file format and endianness.
> +// Use 255 at one end, since no UTF-8 file can use that character.  Avoid 0,
> +// so that utilities, like strings, don't grab it as a string.  129 is also
> +// invalid UTF-8, and high enough to be interesting.
> +// Use "lprofr" in the centre to stand for "LLVM Profile Raw", or "lprofR"
> +// for 32-bit platforms.
> +// The magic and version need to be kept in sync with
> +// projects/compiler-rt/lib/profile/InstrProfiling.c
> +
> +template <class IntPtrT>
> +static uint64_t getMagic();
> +template <>
> +uint64_t getMagic<uint64_t>() {
> +  return uint64_t(255) << 56 | uint64_t('l') << 48 | uint64_t('p') << 40 |
> +         uint64_t('r') << 32 | uint64_t('o') << 24 | uint64_t('f') << 16 |
> +         uint64_t('r') << 8 | uint64_t(129);
> +}
> +
> +template <>
> +uint64_t getMagic<uint32_t>() {
> +  return uint64_t(255) << 56 | uint64_t('l') << 48 | uint64_t('p') << 40 |
> +         uint64_t('r') << 32 | uint64_t('o') << 24 | uint64_t('f') << 16 |
> +         uint64_t('R') << 8 | uint64_t(129);
> +}
> +
> +// The definition should match the structure defined in
> +// compiler-rt/lib/profile/InstrProfiling.h.
> +// It should also match the synthesized type in
> +// Transforms/Instrumentation/InstrProfiling.cpp:getOrCreateRegionCounters.
> +
> +template <class IntPtrT>
> +struct ProfileData {
> +  const uint32_t NameSize;
> +  const uint32_t NumCounters;
> +  const uint64_t FuncHash;
> +  const IntPtrT NamePtr;
> +  const IntPtrT CounterPtr;
> +};
> +
> +// The definition should match the header referenced in
> +// compiler-rt/lib/profile/InstrProfilingFile.c  and
> +// InstrProfilingBuffer.c.
> +
> +struct Header {
> +  const uint64_t Magic;
> +  const uint64_t Version;
> +  const uint64_t DataSize;
> +  const uint64_t CountersSize;
> +  const uint64_t NamesSize;
> +  const uint64_t CountersDelta;
> +  const uint64_t NamesDelta;
> +};
> +
> +}  // end namespace RawInstrProf
> +
>  } // end namespace llvm
>  
>  namespace std {