<div dir="ltr">Fixed the comment and committed the patch.<div><br></div><div>David</div></div><div class="gmail_extra"><br><div class="gmail_quote">On Fri, Oct 16, 2015 at 7:28 PM, Justin Bogner <span dir="ltr"><<a href="mailto:mail@justinbogner.com" target="_blank">mail@justinbogner.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><span class="">David Li <<a href="mailto:davidxl@google.com">davidxl@google.com</a>> writes:<br>
> davidxl created this revision.<br>
> davidxl added a reviewer: bogner.<br>
> davidxl added a subscriber: llvm-commits.<br>
><br>
> In this clean up patch,<br>
> 1) key constant values (version, magic) and data structures related to<br>
> raw and indexed profile format are moved into one centralized place:<br>
> InstrProf.h.<br>
> 2) Utility function such as MD5Hash computation is also moved to the<br>
> common header to allow sharing by other components in the future<br>
> 3) A header data structure is introduced for Indexed format so that<br>
> the reader and writer can always be in sync<br>
> 4) Added some comments to document different places where multiple<br>
> definition of the data structure must be kept in sync (reader/writer,<br>
> runtime, lowering etc).<br>
<br>
</span>One minor comment below, then this LGTM.<br>
<span class=""><br>
> No functional change is intended.<br>
><br>
> <a href="http://reviews.llvm.org/D13758" rel="noreferrer" target="_blank">http://reviews.llvm.org/D13758</a><br>
><br>
> Files:<br>
> include/llvm/ProfileData/InstrProf.h<br>
> include/llvm/ProfileData/InstrProfReader.h<br>
> lib/ProfileData/InstrProfIndexed.h<br>
> lib/ProfileData/InstrProfReader.cpp<br>
> lib/ProfileData/InstrProfWriter.cpp<br>
><br>
</span>> Index: lib/ProfileData/InstrProfWriter.cpp<br>
> ===================================================================<br>
> --- lib/ProfileData/InstrProfWriter.cpp<br>
> +++ lib/ProfileData/InstrProfWriter.cpp<br>
> @@ -13,7 +13,6 @@<br>
> //===----------------------------------------------------------------------===//<br>
><br>
> #include "llvm/ProfileData/InstrProfWriter.h"<br>
> -#include "InstrProfIndexed.h"<br>
> #include "llvm/ADT/StringExtras.h"<br>
> #include "llvm/Support/EndianStream.h"<br>
> #include "llvm/Support/OnDiskHashTable.h"<br>
> @@ -197,10 +196,17 @@<br>
> endian::Writer<little> LE(OS);<br>
><br>
> // Write the header.<br>
> - LE.write<uint64_t>(IndexedInstrProf::Magic);<br>
> - LE.write<uint64_t>(IndexedInstrProf::Version);<br>
> - LE.write<uint64_t>(MaxFunctionCount);<br>
> - LE.write<uint64_t>(static_cast<uint64_t>(IndexedInstrProf::HashType));<br>
> + IndexedInstrProf::Header Header;<br>
> + Header.Magic = IndexedInstrProf::Magic;<br>
> + Header.Version = IndexedInstrProf::Version;<br>
> + Header.MaxFunctionCount = MaxFunctionCount;<br>
> + Header.HashType = static_cast<uint64_t>(IndexedInstrProf::HashType);<br>
> + Header.HashOffset = 0;<br>
> + int N = sizeof(IndexedInstrProf::Header) / sizeof(uint64_t);<br>
> +<br>
> + // Only write out the first N-1 fields.<br>
<br>
This comment isn't helpful - that's apparent from the next line.<br>
Something like "Print everything except the HashOffset" is a little<br>
better.<br>
<br>
> + for (int I = 0; I < N - 1; I++)<br>
> + LE.write<uint64_t>(reinterpret_cast<uint64_t *>(&Header)[I]);<br>
><br>
> // Save a space to write the hash table start location.<br>
> uint64_t HashTableStartLoc = OS.tell();<br>
> Index: lib/ProfileData/InstrProfReader.cpp<br>
> ===================================================================<br>
> --- lib/ProfileData/InstrProfReader.cpp<br>
> +++ lib/ProfileData/InstrProfReader.cpp<br>
> @@ -13,7 +13,6 @@<br>
> //===----------------------------------------------------------------------===//<br>
><br>
> #include "llvm/ProfileData/InstrProfReader.h"<br>
> -#include "InstrProfIndexed.h"<br>
> #include "llvm/ADT/STLExtras.h"<br>
> #include <cassert><br>
><br>
> @@ -140,53 +139,24 @@<br>
> }<br>
><br>
> template <class IntPtrT><br>
> -static uint64_t getRawMagic();<br>
> -<br>
> -template <><br>
> -uint64_t getRawMagic<uint64_t>() {<br>
> - return<br>
> - uint64_t(255) << 56 |<br>
> - uint64_t('l') << 48 |<br>
> - uint64_t('p') << 40 |<br>
> - uint64_t('r') << 32 |<br>
> - uint64_t('o') << 24 |<br>
> - uint64_t('f') << 16 |<br>
> - uint64_t('r') << 8 |<br>
> - uint64_t(129);<br>
> -}<br>
> -<br>
> -template <><br>
> -uint64_t getRawMagic<uint32_t>() {<br>
> - return<br>
> - uint64_t(255) << 56 |<br>
> - uint64_t('l') << 48 |<br>
> - uint64_t('p') << 40 |<br>
> - uint64_t('r') << 32 |<br>
> - uint64_t('o') << 24 |<br>
> - uint64_t('f') << 16 |<br>
> - uint64_t('R') << 8 |<br>
> - uint64_t(129);<br>
> -}<br>
> -<br>
> -template <class IntPtrT><br>
> bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) {<br>
> if (DataBuffer.getBufferSize() < sizeof(uint64_t))<br>
> return false;<br>
> uint64_t Magic =<br>
> *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart());<br>
> - return getRawMagic<IntPtrT>() == Magic ||<br>
> - sys::getSwappedBytes(getRawMagic<IntPtrT>()) == Magic;<br>
> + return RawInstrProf::getMagic<IntPtrT>() == Magic ||<br>
> + sys::getSwappedBytes(RawInstrProf::getMagic<IntPtrT>()) == Magic;<br>
> }<br>
><br>
> template <class IntPtrT><br>
> std::error_code RawInstrProfReader<IntPtrT>::readHeader() {<br>
> if (!hasFormat(*DataBuffer))<br>
> return error(instrprof_error::bad_magic);<br>
> - if (DataBuffer->getBufferSize() < sizeof(RawHeader))<br>
> + if (DataBuffer->getBufferSize() < sizeof(RawInstrProf::Header))<br>
> return error(instrprof_error::bad_header);<br>
> - auto *Header =<br>
> - reinterpret_cast<const RawHeader *>(DataBuffer->getBufferStart());<br>
> - ShouldSwapBytes = Header->Magic != getRawMagic<IntPtrT>();<br>
> + auto *Header = reinterpret_cast<const RawInstrProf::Header *>(<br>
> + DataBuffer->getBufferStart());<br>
> + ShouldSwapBytes = Header->Magic != RawInstrProf::getMagic<IntPtrT>();<br>
> return readHeader(*Header);<br>
> }<br>
><br>
> @@ -202,47 +172,45 @@<br>
> return instrprof_error::eof;<br>
> // If there isn't enough space for another header, this is probably just<br>
> // garbage at the end of the file.<br>
> - if (CurrentPos + sizeof(RawHeader) > End)<br>
> + if (CurrentPos + sizeof(RawInstrProf::Header) > End)<br>
> return instrprof_error::malformed;<br>
> // The writer ensures each profile is padded to start at an aligned address.<br>
> if (reinterpret_cast<size_t>(CurrentPos) % alignOf<uint64_t>())<br>
> return instrprof_error::malformed;<br>
> // The magic should have the same byte order as in the previous header.<br>
> uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos);<br>
> - if (Magic != swap(getRawMagic<IntPtrT>()))<br>
> + if (Magic != swap(RawInstrProf::getMagic<IntPtrT>()))<br>
> return instrprof_error::bad_magic;<br>
><br>
> // There's another profile to read, so we need to process the header.<br>
> - auto *Header = reinterpret_cast<const RawHeader *>(CurrentPos);<br>
> + auto *Header = reinterpret_cast<const RawInstrProf::Header *>(CurrentPos);<br>
> return readHeader(*Header);<br>
> }<br>
><br>
> -static uint64_t getRawVersion() {<br>
> - return 1;<br>
> -}<br>
> -<br>
> template <class IntPtrT><br>
> -std::error_code<br>
> -RawInstrProfReader<IntPtrT>::readHeader(const RawHeader &Header) {<br>
> - if (swap(Header.Version) != getRawVersion())<br>
> +std::error_code RawInstrProfReader<IntPtrT>::readHeader(<br>
> + const RawInstrProf::Header &Header) {<br>
> + if (swap(Header.Version) != RawInstrProf::Version)<br>
> return error(instrprof_error::unsupported_version);<br>
><br>
> CountersDelta = swap(Header.CountersDelta);<br>
> NamesDelta = swap(Header.NamesDelta);<br>
> auto DataSize = swap(Header.DataSize);<br>
> auto CountersSize = swap(Header.CountersSize);<br>
> auto NamesSize = swap(Header.NamesSize);<br>
><br>
> - ptrdiff_t DataOffset = sizeof(RawHeader);<br>
> - ptrdiff_t CountersOffset = DataOffset + sizeof(ProfileData) * DataSize;<br>
> + ptrdiff_t DataOffset = sizeof(RawInstrProf::Header);<br>
> + ptrdiff_t CountersOffset =<br>
> + DataOffset + sizeof(RawInstrProf::ProfileData<IntPtrT>) * DataSize;<br>
> ptrdiff_t NamesOffset = CountersOffset + sizeof(uint64_t) * CountersSize;<br>
> size_t ProfileSize = NamesOffset + sizeof(char) * NamesSize;<br>
><br>
> auto *Start = reinterpret_cast<const char *>(&Header);<br>
> if (Start + ProfileSize > DataBuffer->getBufferEnd())<br>
> return error(instrprof_error::bad_header);<br>
><br>
> - Data = reinterpret_cast<const ProfileData *>(Start + DataOffset);<br>
> + Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>(<br>
> + Start + DataOffset);<br>
> DataEnd = Data + DataSize;<br>
> CountersStart = reinterpret_cast<const uint64_t *>(Start + CountersOffset);<br>
> NamesStart = Start + NamesOffset;<br>
> @@ -421,25 +389,30 @@<br>
><br>
> using namespace support;<br>
><br>
> + auto *Header = reinterpret_cast<const IndexedInstrProf::Header *>(Cur);<br>
> + Cur += sizeof(IndexedInstrProf::Header);<br>
> +<br>
> // Check the magic number.<br>
> - uint64_t Magic = endian::readNext<uint64_t, little, unaligned>(Cur);<br>
> + uint64_t Magic = endian::byte_swap<uint64_t, little>(Header->Magic);<br>
> if (Magic != IndexedInstrProf::Magic)<br>
> return error(instrprof_error::bad_magic);<br>
><br>
> // Read the version.<br>
> - FormatVersion = endian::readNext<uint64_t, little, unaligned>(Cur);<br>
> + FormatVersion = endian::byte_swap<uint64_t, little>(Header->Version);<br>
> if (FormatVersion > IndexedInstrProf::Version)<br>
> return error(instrprof_error::unsupported_version);<br>
><br>
> // Read the maximal function count.<br>
> - MaxFunctionCount = endian::readNext<uint64_t, little, unaligned>(Cur);<br>
> + MaxFunctionCount =<br>
> + endian::byte_swap<uint64_t, little>(Header->MaxFunctionCount);<br>
><br>
> // Read the hash type and start offset.<br>
> IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>(<br>
> - endian::readNext<uint64_t, little, unaligned>(Cur));<br>
> + endian::byte_swap<uint64_t, little>(Header->HashType));<br>
> if (HashType > IndexedInstrProf::HashT::Last)<br>
> return error(instrprof_error::unsupported_hash_type);<br>
> - uint64_t HashOffset = endian::readNext<uint64_t, little, unaligned>(Cur);<br>
> +<br>
> + uint64_t HashOffset = endian::byte_swap<uint64_t, little>(Header->HashOffset);<br>
><br>
> // The rest of the file is an on disk hash table.<br>
> Index.reset(InstrProfReaderIndex::Create(<br>
> Index: lib/ProfileData/InstrProfIndexed.h<br>
> ===================================================================<br>
> --- lib/ProfileData/InstrProfIndexed.h<br>
> +++ /dev/null<br>
> @@ -1,56 +0,0 @@<br>
> -//=-- InstrProfIndexed.h - Indexed profiling format support -------*- C++ -*-=//<br>
> -//<br>
> -// The LLVM Compiler Infrastructure<br>
> -//<br>
> -// This file is distributed under the University of Illinois Open Source<br>
> -// License. See LICENSE.TXT for details.<br>
> -//<br>
> -//===----------------------------------------------------------------------===//<br>
> -//<br>
> -// Shared header for the instrumented profile data reader and writer.<br>
> -//<br>
> -//===----------------------------------------------------------------------===//<br>
> -<br>
> -#ifndef LLVM_LIB_PROFILEDATA_INSTRPROFINDEXED_H<br>
> -#define LLVM_LIB_PROFILEDATA_INSTRPROFINDEXED_H<br>
> -<br>
> -#include "llvm/Support/Endian.h"<br>
> -#include "llvm/Support/ErrorHandling.h"<br>
> -#include "llvm/Support/MD5.h"<br>
> -<br>
> -namespace llvm {<br>
> -<br>
> -namespace IndexedInstrProf {<br>
> -enum class HashT : uint32_t {<br>
> - MD5,<br>
> -<br>
> - Last = MD5<br>
> -};<br>
> -<br>
> -static inline uint64_t MD5Hash(StringRef Str) {<br>
> - MD5 Hash;<br>
> - Hash.update(Str);<br>
> - llvm::MD5::MD5Result Result;<br>
> - Hash.final(Result);<br>
> - // Return the least significant 8 bytes. Our MD5 implementation returns the<br>
> - // result in little endian, so we may need to swap bytes.<br>
> - using namespace llvm::support;<br>
> - return endian::read<uint64_t, little, unaligned>(Result);<br>
> -}<br>
> -<br>
> -static inline uint64_t ComputeHash(HashT Type, StringRef K) {<br>
> - switch (Type) {<br>
> - case HashT::MD5:<br>
> - return IndexedInstrProf::MD5Hash(K);<br>
> - }<br>
> - llvm_unreachable("Unhandled hash type");<br>
> -}<br>
> -<br>
> -const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81"<br>
> -const uint64_t Version = 3;<br>
> -const HashT HashType = HashT::MD5;<br>
> -}<br>
> -<br>
> -} // end namespace llvm<br>
> -<br>
> -#endif<br>
> Index: include/llvm/ProfileData/InstrProfReader.h<br>
> ===================================================================<br>
> --- include/llvm/ProfileData/InstrProfReader.h<br>
> +++ include/llvm/ProfileData/InstrProfReader.h<br>
> @@ -132,28 +132,12 @@<br>
> private:<br>
> /// The profile data file contents.<br>
> std::unique_ptr<MemoryBuffer> DataBuffer;<br>
> - struct ProfileData {<br>
> - const uint32_t NameSize;<br>
> - const uint32_t NumCounters;<br>
> - const uint64_t FuncHash;<br>
> - const IntPtrT NamePtr;<br>
> - const IntPtrT CounterPtr;<br>
> - };<br>
> - struct RawHeader {<br>
> - const uint64_t Magic;<br>
> - const uint64_t Version;<br>
> - const uint64_t DataSize;<br>
> - const uint64_t CountersSize;<br>
> - const uint64_t NamesSize;<br>
> - const uint64_t CountersDelta;<br>
> - const uint64_t NamesDelta;<br>
> - };<br>
><br>
> bool ShouldSwapBytes;<br>
> uint64_t CountersDelta;<br>
> uint64_t NamesDelta;<br>
> - const ProfileData *Data;<br>
> - const ProfileData *DataEnd;<br>
> + const RawInstrProf::ProfileData<IntPtrT> *Data;<br>
> + const RawInstrProf::ProfileData<IntPtrT> *DataEnd;<br>
> const uint64_t *CountersStart;<br>
> const char *NamesStart;<br>
> const char *ProfileEnd;<br>
> @@ -170,7 +154,7 @@<br>
><br>
> private:<br>
> std::error_code readNextHeader(const char *CurrentPos);<br>
> - std::error_code readHeader(const RawHeader &Header);<br>
> + std::error_code readHeader(const RawInstrProf::Header &Header);<br>
> template <class IntT><br>
> IntT swap(IntT Int) const {<br>
> return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int;<br>
> Index: include/llvm/ProfileData/InstrProf.h<br>
> ===================================================================<br>
> --- include/llvm/ProfileData/InstrProf.h<br>
> +++ include/llvm/ProfileData/InstrProf.h<br>
> @@ -16,8 +16,11 @@<br>
> #ifndef LLVM_PROFILEDATA_INSTRPROF_H_<br>
> #define LLVM_PROFILEDATA_INSTRPROF_H_<br>
><br>
> +#include "llvm/ADT/StringRef.h"<br>
> #include "llvm/ADT/StringSet.h"<br>
> +#include "llvm/Support/Endian.h"<br>
> #include "llvm/Support/ErrorHandling.h"<br>
> +#include "llvm/Support/MD5.h"<br>
> #include <cstdint><br>
> #include <list><br>
> #include <system_error><br>
> @@ -132,6 +135,105 @@<br>
> }<br>
> };<br>
><br>
> +namespace IndexedInstrProf {<br>
> +enum class HashT : uint32_t {<br>
> + MD5,<br>
> +<br>
> + Last = MD5<br>
> +};<br>
> +<br>
> +static inline uint64_t MD5Hash(StringRef Str) {<br>
> + MD5 Hash;<br>
> + Hash.update(Str);<br>
> + llvm::MD5::MD5Result Result;<br>
> + Hash.final(Result);<br>
> + // Return the least significant 8 bytes. Our MD5 implementation returns the<br>
> + // result in little endian, so we may need to swap bytes.<br>
> + using namespace llvm::support;<br>
> + return endian::read<uint64_t, little, unaligned>(Result);<br>
> +}<br>
> +<br>
> +static inline uint64_t ComputeHash(HashT Type, StringRef K) {<br>
> + switch (Type) {<br>
> + case HashT::MD5:<br>
> + return IndexedInstrProf::MD5Hash(K);<br>
> + }<br>
> + llvm_unreachable("Unhandled hash type");<br>
> +}<br>
> +<br>
> +const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81"<br>
> +const uint64_t Version = 3;<br>
> +const HashT HashType = HashT::MD5;<br>
> +<br>
> +struct Header {<br>
> + uint64_t Magic;<br>
> + uint64_t Version;<br>
> + uint64_t MaxFunctionCount;<br>
> + uint64_t HashType;<br>
> + uint64_t HashOffset;<br>
> +};<br>
> +<br>
> +} // end namespace IndexedInstrProf<br>
> +<br>
> +namespace RawInstrProf {<br>
> +<br>
> +const uint64_t Version = 1;<br>
> +<br>
> +// Magic number to detect file format and endianness.<br>
> +// Use 255 at one end, since no UTF-8 file can use that character. Avoid 0,<br>
> +// so that utilities, like strings, don't grab it as a string. 129 is also<br>
> +// invalid UTF-8, and high enough to be interesting.<br>
> +// Use "lprofr" in the centre to stand for "LLVM Profile Raw", or "lprofR"<br>
> +// for 32-bit platforms.<br>
> +// The magic and version need to be kept in sync with<br>
> +// projects/compiler-rt/lib/profile/InstrProfiling.c<br>
> +<br>
> +template <class IntPtrT><br>
> +static uint64_t getMagic();<br>
> +template <><br>
> +uint64_t getMagic<uint64_t>() {<br>
> + return uint64_t(255) << 56 | uint64_t('l') << 48 | uint64_t('p') << 40 |<br>
> + uint64_t('r') << 32 | uint64_t('o') << 24 | uint64_t('f') << 16 |<br>
> + uint64_t('r') << 8 | uint64_t(129);<br>
> +}<br>
> +<br>
> +template <><br>
> +uint64_t getMagic<uint32_t>() {<br>
> + return uint64_t(255) << 56 | uint64_t('l') << 48 | uint64_t('p') << 40 |<br>
> + uint64_t('r') << 32 | uint64_t('o') << 24 | uint64_t('f') << 16 |<br>
> + uint64_t('R') << 8 | uint64_t(129);<br>
> +}<br>
> +<br>
> +// The definition should match the structure defined in<br>
> +// compiler-rt/lib/profile/InstrProfiling.h.<br>
> +// It should also match the synthesized type in<br>
> +// Transforms/Instrumentation/InstrProfiling.cpp:getOrCreateRegionCounters.<br>
> +<br>
> +template <class IntPtrT><br>
> +struct ProfileData {<br>
> + const uint32_t NameSize;<br>
> + const uint32_t NumCounters;<br>
> + const uint64_t FuncHash;<br>
> + const IntPtrT NamePtr;<br>
> + const IntPtrT CounterPtr;<br>
> +};<br>
> +<br>
> +// The definition should match the header referenced in<br>
> +// compiler-rt/lib/profile/InstrProfilingFile.c and<br>
> +// InstrProfilingBuffer.c.<br>
> +<br>
> +struct Header {<br>
> + const uint64_t Magic;<br>
> + const uint64_t Version;<br>
> + const uint64_t DataSize;<br>
> + const uint64_t CountersSize;<br>
> + const uint64_t NamesSize;<br>
> + const uint64_t CountersDelta;<br>
> + const uint64_t NamesDelta;<br>
> +};<br>
> +<br>
> +} // end namespace RawInstrProf<br>
> +<br>
> } // end namespace llvm<br>
><br>
> namespace std {<br>
</blockquote></div><br></div>