<div dir="ltr">Fixed the comment and committed the patch.<div><br></div><div>David</div></div><div class="gmail_extra"><br><div class="gmail_quote">On Fri, Oct 16, 2015 at 7:28 PM, Justin Bogner <span dir="ltr"><<a href="mailto:mail@justinbogner.com" target="_blank">mail@justinbogner.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><span class="">David Li <<a href="mailto:davidxl@google.com">davidxl@google.com</a>> writes:<br>

> davidxl created this revision.<br>

> davidxl added a reviewer: bogner.<br>

> davidxl added a subscriber: llvm-commits.<br>

><br>

> In this clean up patch,<br>

> 1) key constant values (version, magic) and data structures related to<br>

> raw and indexed profile format are moved into one centralized place:<br>

> InstrProf.h.<br>

> 2) Utility function such as MD5Hash computation is also moved to the<br>

> common header to allow sharing by other components in the future<br>

> 3) A header data structure is introduced for Indexed format so that<br>

> the reader and writer can always be in sync<br>

> 4) Added some comments to document different places where multiple<br>

> definition of the data structure must be kept in sync (reader/writer,<br>

> runtime, lowering etc).<br>

<br>

</span>One minor comment below, then this LGTM.<br>

<span class=""><br>

> No functional change is intended.<br>

><br>

> <a href="http://reviews.llvm.org/D13758" rel="noreferrer" target="_blank">http://reviews.llvm.org/D13758</a><br>

><br>

> Files:<br>

>   include/llvm/ProfileData/InstrProf.h<br>

>   include/llvm/ProfileData/InstrProfReader.h<br>

>   lib/ProfileData/InstrProfIndexed.h<br>

>   lib/ProfileData/InstrProfReader.cpp<br>

>   lib/ProfileData/InstrProfWriter.cpp<br>

><br>

</span>> Index: lib/ProfileData/InstrProfWriter.cpp<br>

> ===================================================================<br>

> --- lib/ProfileData/InstrProfWriter.cpp<br>

> +++ lib/ProfileData/InstrProfWriter.cpp<br>

> @@ -13,7 +13,6 @@<br>

>  //===----------------------------------------------------------------------===//<br>

><br>

>  #include "llvm/ProfileData/InstrProfWriter.h"<br>

> -#include "InstrProfIndexed.h"<br>

>  #include "llvm/ADT/StringExtras.h"<br>

>  #include "llvm/Support/EndianStream.h"<br>

>  #include "llvm/Support/OnDiskHashTable.h"<br>

> @@ -197,10 +196,17 @@<br>

>    endian::Writer<little> LE(OS);<br>

><br>

>    // Write the header.<br>

> -  LE.write<uint64_t>(IndexedInstrProf::Magic);<br>

> -  LE.write<uint64_t>(IndexedInstrProf::Version);<br>

> -  LE.write<uint64_t>(MaxFunctionCount);<br>

> -  LE.write<uint64_t>(static_cast<uint64_t>(IndexedInstrProf::HashType));<br>

> +  IndexedInstrProf::Header Header;<br>

> +  Header.Magic = IndexedInstrProf::Magic;<br>

> +  Header.Version = IndexedInstrProf::Version;<br>

> +  Header.MaxFunctionCount = MaxFunctionCount;<br>

> +  Header.HashType = static_cast<uint64_t>(IndexedInstrProf::HashType);<br>

> +  Header.HashOffset = 0;<br>

> +  int N = sizeof(IndexedInstrProf::Header) / sizeof(uint64_t);<br>

> +<br>

> +  // Only write out the first N-1 fields.<br>

<br>

This comment isn't helpful - that's apparent from the next line.<br>

Something like "Print everything except the HashOffset" is a little<br>

better.<br>

<br>

> +  for (int I = 0; I < N - 1; I++)<br>

> +    LE.write<uint64_t>(reinterpret_cast<uint64_t *>(&Header)[I]);<br>

><br>

>    // Save a space to write the hash table start location.<br>

>    uint64_t HashTableStartLoc = OS.tell();<br>

> Index: lib/ProfileData/InstrProfReader.cpp<br>

> ===================================================================<br>

> --- lib/ProfileData/InstrProfReader.cpp<br>

> +++ lib/ProfileData/InstrProfReader.cpp<br>

> @@ -13,7 +13,6 @@<br>

>  //===----------------------------------------------------------------------===//<br>

><br>

>  #include "llvm/ProfileData/InstrProfReader.h"<br>

> -#include "InstrProfIndexed.h"<br>

>  #include "llvm/ADT/STLExtras.h"<br>

>  #include <cassert><br>

><br>

> @@ -140,53 +139,24 @@<br>

>  }<br>

><br>

>  template <class IntPtrT><br>

> -static uint64_t getRawMagic();<br>

> -<br>

> -template <><br>

> -uint64_t getRawMagic<uint64_t>() {<br>

> -  return<br>

> -    uint64_t(255) << 56 |<br>

> -    uint64_t('l') << 48 |<br>

> -    uint64_t('p') << 40 |<br>

> -    uint64_t('r') << 32 |<br>

> -    uint64_t('o') << 24 |<br>

> -    uint64_t('f') << 16 |<br>

> -    uint64_t('r') <<  8 |<br>

> -    uint64_t(129);<br>

> -}<br>

> -<br>

> -template <><br>

> -uint64_t getRawMagic<uint32_t>() {<br>

> -  return<br>

> -    uint64_t(255) << 56 |<br>

> -    uint64_t('l') << 48 |<br>

> -    uint64_t('p') << 40 |<br>

> -    uint64_t('r') << 32 |<br>

> -    uint64_t('o') << 24 |<br>

> -    uint64_t('f') << 16 |<br>

> -    uint64_t('R') <<  8 |<br>

> -    uint64_t(129);<br>

> -}<br>

> -<br>

> -template <class IntPtrT><br>

>  bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) {<br>

>    if (DataBuffer.getBufferSize() < sizeof(uint64_t))<br>

>      return false;<br>

>    uint64_t Magic =<br>

>      *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart());<br>

> -  return getRawMagic<IntPtrT>() == Magic ||<br>

> -    sys::getSwappedBytes(getRawMagic<IntPtrT>()) == Magic;<br>

> +  return RawInstrProf::getMagic<IntPtrT>() == Magic ||<br>

> +         sys::getSwappedBytes(RawInstrProf::getMagic<IntPtrT>()) == Magic;<br>

>  }<br>

><br>

>  template <class IntPtrT><br>

>  std::error_code RawInstrProfReader<IntPtrT>::readHeader() {<br>

>    if (!hasFormat(*DataBuffer))<br>

>      return error(instrprof_error::bad_magic);<br>

> -  if (DataBuffer->getBufferSize() < sizeof(RawHeader))<br>

> +  if (DataBuffer->getBufferSize() < sizeof(RawInstrProf::Header))<br>

>      return error(instrprof_error::bad_header);<br>

> -  auto *Header =<br>

> -    reinterpret_cast<const RawHeader *>(DataBuffer->getBufferStart());<br>

> -  ShouldSwapBytes = Header->Magic != getRawMagic<IntPtrT>();<br>

> +  auto *Header = reinterpret_cast<const RawInstrProf::Header *>(<br>

> +      DataBuffer->getBufferStart());<br>

> +  ShouldSwapBytes = Header->Magic != RawInstrProf::getMagic<IntPtrT>();<br>

>    return readHeader(*Header);<br>

>  }<br>

><br>

> @@ -202,47 +172,45 @@<br>

>      return instrprof_error::eof;<br>

>    // If there isn't enough space for another header, this is probably just<br>

>    // garbage at the end of the file.<br>

> -  if (CurrentPos + sizeof(RawHeader) > End)<br>

> +  if (CurrentPos + sizeof(RawInstrProf::Header) > End)<br>

>      return instrprof_error::malformed;<br>

>    // The writer ensures each profile is padded to start at an aligned address.<br>

>    if (reinterpret_cast<size_t>(CurrentPos) % alignOf<uint64_t>())<br>

>      return instrprof_error::malformed;<br>

>    // The magic should have the same byte order as in the previous header.<br>

>    uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos);<br>

> -  if (Magic != swap(getRawMagic<IntPtrT>()))<br>

> +  if (Magic != swap(RawInstrProf::getMagic<IntPtrT>()))<br>

>      return instrprof_error::bad_magic;<br>

><br>

>    // There's another profile to read, so we need to process the header.<br>

> -  auto *Header = reinterpret_cast<const RawHeader *>(CurrentPos);<br>

> +  auto *Header = reinterpret_cast<const RawInstrProf::Header *>(CurrentPos);<br>

>    return readHeader(*Header);<br>

>  }<br>

><br>

> -static uint64_t getRawVersion() {<br>

> -  return 1;<br>

> -}<br>

> -<br>

>  template <class IntPtrT><br>

> -std::error_code<br>

> -RawInstrProfReader<IntPtrT>::readHeader(const RawHeader &Header) {<br>

> -  if (swap(Header.Version) != getRawVersion())<br>

> +std::error_code RawInstrProfReader<IntPtrT>::readHeader(<br>

> +    const RawInstrProf::Header &Header) {<br>

> +  if (swap(Header.Version) != RawInstrProf::Version)<br>

>      return error(instrprof_error::unsupported_version);<br>

><br>

>    CountersDelta = swap(Header.CountersDelta);<br>

>    NamesDelta = swap(Header.NamesDelta);<br>

>    auto DataSize = swap(Header.DataSize);<br>

>    auto CountersSize = swap(Header.CountersSize);<br>

>    auto NamesSize = swap(Header.NamesSize);<br>

><br>

> -  ptrdiff_t DataOffset = sizeof(RawHeader);<br>

> -  ptrdiff_t CountersOffset = DataOffset + sizeof(ProfileData) * DataSize;<br>

> +  ptrdiff_t DataOffset = sizeof(RawInstrProf::Header);<br>

> +  ptrdiff_t CountersOffset =<br>

> +      DataOffset + sizeof(RawInstrProf::ProfileData<IntPtrT>) * DataSize;<br>

>    ptrdiff_t NamesOffset = CountersOffset + sizeof(uint64_t) * CountersSize;<br>

>    size_t ProfileSize = NamesOffset + sizeof(char) * NamesSize;<br>

><br>

>    auto *Start = reinterpret_cast<const char *>(&Header);<br>

>    if (Start + ProfileSize > DataBuffer->getBufferEnd())<br>

>      return error(instrprof_error::bad_header);<br>

><br>

> -  Data = reinterpret_cast<const ProfileData *>(Start + DataOffset);<br>

> +  Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>(<br>

> +      Start + DataOffset);<br>

>    DataEnd = Data + DataSize;<br>

>    CountersStart = reinterpret_cast<const uint64_t *>(Start + CountersOffset);<br>

>    NamesStart = Start + NamesOffset;<br>

> @@ -421,25 +389,30 @@<br>

><br>

>    using namespace support;<br>

><br>

> +  auto *Header = reinterpret_cast<const IndexedInstrProf::Header *>(Cur);<br>

> +  Cur += sizeof(IndexedInstrProf::Header);<br>

> +<br>

>    // Check the magic number.<br>

> -  uint64_t Magic = endian::readNext<uint64_t, little, unaligned>(Cur);<br>

> +  uint64_t Magic = endian::byte_swap<uint64_t, little>(Header->Magic);<br>

>    if (Magic != IndexedInstrProf::Magic)<br>

>      return error(instrprof_error::bad_magic);<br>

><br>

>    // Read the version.<br>

> -  FormatVersion = endian::readNext<uint64_t, little, unaligned>(Cur);<br>

> +  FormatVersion = endian::byte_swap<uint64_t, little>(Header->Version);<br>

>    if (FormatVersion > IndexedInstrProf::Version)<br>

>      return error(instrprof_error::unsupported_version);<br>

><br>

>    // Read the maximal function count.<br>

> -  MaxFunctionCount = endian::readNext<uint64_t, little, unaligned>(Cur);<br>

> +  MaxFunctionCount =<br>

> +      endian::byte_swap<uint64_t, little>(Header->MaxFunctionCount);<br>

><br>

>    // Read the hash type and start offset.<br>

>    IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>(<br>

> -      endian::readNext<uint64_t, little, unaligned>(Cur));<br>

> +      endian::byte_swap<uint64_t, little>(Header->HashType));<br>

>    if (HashType > IndexedInstrProf::HashT::Last)<br>

>      return error(instrprof_error::unsupported_hash_type);<br>

> -  uint64_t HashOffset = endian::readNext<uint64_t, little, unaligned>(Cur);<br>

> +<br>

> +  uint64_t HashOffset = endian::byte_swap<uint64_t, little>(Header->HashOffset);<br>

><br>

>    // The rest of the file is an on disk hash table.<br>

>    Index.reset(InstrProfReaderIndex::Create(<br>

> Index: lib/ProfileData/InstrProfIndexed.h<br>

> ===================================================================<br>

> --- lib/ProfileData/InstrProfIndexed.h<br>

> +++ /dev/null<br>

> @@ -1,56 +0,0 @@<br>

> -//=-- InstrProfIndexed.h - Indexed profiling format support -------*- C++ -*-=//<br>

> -//<br>

> -//                     The LLVM Compiler Infrastructure<br>

> -//<br>

> -// This file is distributed under the University of Illinois Open Source<br>

> -// License. See LICENSE.TXT for details.<br>

> -//<br>

> -//===----------------------------------------------------------------------===//<br>

> -//<br>

> -// Shared header for the instrumented profile data reader and writer.<br>

> -//<br>

> -//===----------------------------------------------------------------------===//<br>

> -<br>

> -#ifndef LLVM_LIB_PROFILEDATA_INSTRPROFINDEXED_H<br>

> -#define LLVM_LIB_PROFILEDATA_INSTRPROFINDEXED_H<br>

> -<br>

> -#include "llvm/Support/Endian.h"<br>

> -#include "llvm/Support/ErrorHandling.h"<br>

> -#include "llvm/Support/MD5.h"<br>

> -<br>

> -namespace llvm {<br>

> -<br>

> -namespace IndexedInstrProf {<br>

> -enum class HashT : uint32_t {<br>

> -  MD5,<br>

> -<br>

> -  Last = MD5<br>

> -};<br>

> -<br>

> -static inline uint64_t MD5Hash(StringRef Str) {<br>

> -  MD5 Hash;<br>

> -  Hash.update(Str);<br>

> -  llvm::MD5::MD5Result Result;<br>

> -  Hash.final(Result);<br>

> -  // Return the least significant 8 bytes. Our MD5 implementation returns the<br>

> -  // result in little endian, so we may need to swap bytes.<br>

> -  using namespace llvm::support;<br>

> -  return endian::read<uint64_t, little, unaligned>(Result);<br>

> -}<br>

> -<br>

> -static inline uint64_t ComputeHash(HashT Type, StringRef K) {<br>

> -  switch (Type) {<br>

> -  case HashT::MD5:<br>

> -    return IndexedInstrProf::MD5Hash(K);<br>

> -  }<br>

> -  llvm_unreachable("Unhandled hash type");<br>

> -}<br>

> -<br>

> -const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81"<br>

> -const uint64_t Version = 3;<br>

> -const HashT HashType = HashT::MD5;<br>

> -}<br>

> -<br>

> -} // end namespace llvm<br>

> -<br>

> -#endif<br>

> Index: include/llvm/ProfileData/InstrProfReader.h<br>

> ===================================================================<br>

> --- include/llvm/ProfileData/InstrProfReader.h<br>

> +++ include/llvm/ProfileData/InstrProfReader.h<br>

> @@ -132,28 +132,12 @@<br>

>  private:<br>

>    /// The profile data file contents.<br>

>    std::unique_ptr<MemoryBuffer> DataBuffer;<br>

> -  struct ProfileData {<br>

> -    const uint32_t NameSize;<br>

> -    const uint32_t NumCounters;<br>

> -    const uint64_t FuncHash;<br>

> -    const IntPtrT NamePtr;<br>

> -    const IntPtrT CounterPtr;<br>

> -  };<br>

> -  struct RawHeader {<br>

> -    const uint64_t Magic;<br>

> -    const uint64_t Version;<br>

> -    const uint64_t DataSize;<br>

> -    const uint64_t CountersSize;<br>

> -    const uint64_t NamesSize;<br>

> -    const uint64_t CountersDelta;<br>

> -    const uint64_t NamesDelta;<br>

> -  };<br>

><br>

>    bool ShouldSwapBytes;<br>

>    uint64_t CountersDelta;<br>

>    uint64_t NamesDelta;<br>

> -  const ProfileData *Data;<br>

> -  const ProfileData *DataEnd;<br>

> +  const RawInstrProf::ProfileData<IntPtrT> *Data;<br>

> +  const RawInstrProf::ProfileData<IntPtrT> *DataEnd;<br>

>    const uint64_t *CountersStart;<br>

>    const char *NamesStart;<br>

>    const char *ProfileEnd;<br>

> @@ -170,7 +154,7 @@<br>

><br>

>  private:<br>

>    std::error_code readNextHeader(const char *CurrentPos);<br>

> -  std::error_code readHeader(const RawHeader &Header);<br>

> +  std::error_code readHeader(const RawInstrProf::Header &Header);<br>

>    template <class IntT><br>

>    IntT swap(IntT Int) const {<br>

>      return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int;<br>

> Index: include/llvm/ProfileData/InstrProf.h<br>

> ===================================================================<br>

> --- include/llvm/ProfileData/InstrProf.h<br>

> +++ include/llvm/ProfileData/InstrProf.h<br>

> @@ -16,8 +16,11 @@<br>

>  #ifndef LLVM_PROFILEDATA_INSTRPROF_H_<br>

>  #define LLVM_PROFILEDATA_INSTRPROF_H_<br>

><br>

> +#include "llvm/ADT/StringRef.h"<br>

>  #include "llvm/ADT/StringSet.h"<br>

> +#include "llvm/Support/Endian.h"<br>

>  #include "llvm/Support/ErrorHandling.h"<br>

> +#include "llvm/Support/MD5.h"<br>

>  #include <cstdint><br>

>  #include <list><br>

>  #include <system_error><br>

> @@ -132,6 +135,105 @@<br>

>    }<br>

>  };<br>

><br>

> +namespace IndexedInstrProf {<br>

> +enum class HashT : uint32_t {<br>

> +  MD5,<br>

> +<br>

> +  Last = MD5<br>

> +};<br>

> +<br>

> +static inline uint64_t MD5Hash(StringRef Str) {<br>

> +  MD5 Hash;<br>

> +  Hash.update(Str);<br>

> +  llvm::MD5::MD5Result Result;<br>

> +  Hash.final(Result);<br>

> +  // Return the least significant 8 bytes. Our MD5 implementation returns the<br>

> +  // result in little endian, so we may need to swap bytes.<br>

> +  using namespace llvm::support;<br>

> +  return endian::read<uint64_t, little, unaligned>(Result);<br>

> +}<br>

> +<br>

> +static inline uint64_t ComputeHash(HashT Type, StringRef K) {<br>

> +  switch (Type) {<br>

> +    case HashT::MD5:<br>

> +      return IndexedInstrProf::MD5Hash(K);<br>

> +  }<br>

> +  llvm_unreachable("Unhandled hash type");<br>

> +}<br>

> +<br>

> +const uint64_t Magic = 0x8169666f72706cff;  // "\xfflprofi\x81"<br>

> +const uint64_t Version = 3;<br>

> +const HashT HashType = HashT::MD5;<br>

> +<br>

> +struct Header {<br>

> +  uint64_t Magic;<br>

> +  uint64_t Version;<br>

> +  uint64_t MaxFunctionCount;<br>

> +  uint64_t HashType;<br>

> +  uint64_t HashOffset;<br>

> +};<br>

> +<br>

> +}  // end namespace IndexedInstrProf<br>

> +<br>

> +namespace RawInstrProf {<br>

> +<br>

> +const uint64_t Version = 1;<br>

> +<br>

> +// Magic number to detect file format and endianness.<br>

> +// Use 255 at one end, since no UTF-8 file can use that character.  Avoid 0,<br>

> +// so that utilities, like strings, don't grab it as a string.  129 is also<br>

> +// invalid UTF-8, and high enough to be interesting.<br>

> +// Use "lprofr" in the centre to stand for "LLVM Profile Raw", or "lprofR"<br>

> +// for 32-bit platforms.<br>

> +// The magic and version need to be kept in sync with<br>

> +// projects/compiler-rt/lib/profile/InstrProfiling.c<br>

> +<br>

> +template <class IntPtrT><br>

> +static uint64_t getMagic();<br>

> +template <><br>

> +uint64_t getMagic<uint64_t>() {<br>

> +  return uint64_t(255) << 56 | uint64_t('l') << 48 | uint64_t('p') << 40 |<br>

> +         uint64_t('r') << 32 | uint64_t('o') << 24 | uint64_t('f') << 16 |<br>

> +         uint64_t('r') << 8 | uint64_t(129);<br>

> +}<br>

> +<br>

> +template <><br>

> +uint64_t getMagic<uint32_t>() {<br>

> +  return uint64_t(255) << 56 | uint64_t('l') << 48 | uint64_t('p') << 40 |<br>

> +         uint64_t('r') << 32 | uint64_t('o') << 24 | uint64_t('f') << 16 |<br>

> +         uint64_t('R') << 8 | uint64_t(129);<br>

> +}<br>

> +<br>

> +// The definition should match the structure defined in<br>

> +// compiler-rt/lib/profile/InstrProfiling.h.<br>

> +// It should also match the synthesized type in<br>

> +// Transforms/Instrumentation/InstrProfiling.cpp:getOrCreateRegionCounters.<br>

> +<br>

> +template <class IntPtrT><br>

> +struct ProfileData {<br>

> +  const uint32_t NameSize;<br>

> +  const uint32_t NumCounters;<br>

> +  const uint64_t FuncHash;<br>

> +  const IntPtrT NamePtr;<br>

> +  const IntPtrT CounterPtr;<br>

> +};<br>

> +<br>

> +// The definition should match the header referenced in<br>

> +// compiler-rt/lib/profile/InstrProfilingFile.c  and<br>

> +// InstrProfilingBuffer.c.<br>

> +<br>

> +struct Header {<br>

> +  const uint64_t Magic;<br>

> +  const uint64_t Version;<br>

> +  const uint64_t DataSize;<br>

> +  const uint64_t CountersSize;<br>

> +  const uint64_t NamesSize;<br>

> +  const uint64_t CountersDelta;<br>

> +  const uint64_t NamesDelta;<br>

> +};<br>

> +<br>

> +}  // end namespace RawInstrProf<br>

> +<br>

>  } // end namespace llvm<br>

><br>

>  namespace std {<br>

</blockquote></div><br></div>