[PATCH] D13758: Instr PGO code small restructuring
Justin Bogner via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 16 19:28:16 PDT 2015
David Li <davidxl at google.com> writes:
> davidxl created this revision.
> davidxl added a reviewer: bogner.
> davidxl added a subscriber: llvm-commits.
>
> In this clean up patch,
> 1) key constant values (version, magic) and data structures related to
> raw and indexed profile format are moved into one centralized place:
> InstrProf.h.
> 2) Utility function such as MD5Hash computation is also moved to the
> common header to allow sharing by other components in the future
> 3) A header data structure is introduced for Indexed format so that
> the reader and writer can always be in sync
> 4) Added some comments to document different places where multiple
> definition of the data structure must be kept in sync (reader/writer,
> runtime, lowering etc).
One minor comment below, then this LGTM.
> No functional change is intended.
>
> http://reviews.llvm.org/D13758
>
> Files:
> include/llvm/ProfileData/InstrProf.h
> include/llvm/ProfileData/InstrProfReader.h
> lib/ProfileData/InstrProfIndexed.h
> lib/ProfileData/InstrProfReader.cpp
> lib/ProfileData/InstrProfWriter.cpp
>
> Index: lib/ProfileData/InstrProfWriter.cpp
> ===================================================================
> --- lib/ProfileData/InstrProfWriter.cpp
> +++ lib/ProfileData/InstrProfWriter.cpp
> @@ -13,7 +13,6 @@
> //===----------------------------------------------------------------------===//
>
> #include "llvm/ProfileData/InstrProfWriter.h"
> -#include "InstrProfIndexed.h"
> #include "llvm/ADT/StringExtras.h"
> #include "llvm/Support/EndianStream.h"
> #include "llvm/Support/OnDiskHashTable.h"
> @@ -197,10 +196,17 @@
> endian::Writer<little> LE(OS);
>
> // Write the header.
> - LE.write<uint64_t>(IndexedInstrProf::Magic);
> - LE.write<uint64_t>(IndexedInstrProf::Version);
> - LE.write<uint64_t>(MaxFunctionCount);
> - LE.write<uint64_t>(static_cast<uint64_t>(IndexedInstrProf::HashType));
> + IndexedInstrProf::Header Header;
> + Header.Magic = IndexedInstrProf::Magic;
> + Header.Version = IndexedInstrProf::Version;
> + Header.MaxFunctionCount = MaxFunctionCount;
> + Header.HashType = static_cast<uint64_t>(IndexedInstrProf::HashType);
> + Header.HashOffset = 0;
> + int N = sizeof(IndexedInstrProf::Header) / sizeof(uint64_t);
> +
> + // Only write out the first N-1 fields.
This comment isn't helpful - that's apparent from the next line.
Something like "Print everything except the HashOffset" is a little
better.
> + for (int I = 0; I < N - 1; I++)
> + LE.write<uint64_t>(reinterpret_cast<uint64_t *>(&Header)[I]);
>
> // Save a space to write the hash table start location.
> uint64_t HashTableStartLoc = OS.tell();
> Index: lib/ProfileData/InstrProfReader.cpp
> ===================================================================
> --- lib/ProfileData/InstrProfReader.cpp
> +++ lib/ProfileData/InstrProfReader.cpp
> @@ -13,7 +13,6 @@
> //===----------------------------------------------------------------------===//
>
> #include "llvm/ProfileData/InstrProfReader.h"
> -#include "InstrProfIndexed.h"
> #include "llvm/ADT/STLExtras.h"
> #include <cassert>
>
> @@ -140,53 +139,24 @@
> }
>
> template <class IntPtrT>
> -static uint64_t getRawMagic();
> -
> -template <>
> -uint64_t getRawMagic<uint64_t>() {
> - return
> - uint64_t(255) << 56 |
> - uint64_t('l') << 48 |
> - uint64_t('p') << 40 |
> - uint64_t('r') << 32 |
> - uint64_t('o') << 24 |
> - uint64_t('f') << 16 |
> - uint64_t('r') << 8 |
> - uint64_t(129);
> -}
> -
> -template <>
> -uint64_t getRawMagic<uint32_t>() {
> - return
> - uint64_t(255) << 56 |
> - uint64_t('l') << 48 |
> - uint64_t('p') << 40 |
> - uint64_t('r') << 32 |
> - uint64_t('o') << 24 |
> - uint64_t('f') << 16 |
> - uint64_t('R') << 8 |
> - uint64_t(129);
> -}
> -
> -template <class IntPtrT>
> bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) {
> if (DataBuffer.getBufferSize() < sizeof(uint64_t))
> return false;
> uint64_t Magic =
> *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart());
> - return getRawMagic<IntPtrT>() == Magic ||
> - sys::getSwappedBytes(getRawMagic<IntPtrT>()) == Magic;
> + return RawInstrProf::getMagic<IntPtrT>() == Magic ||
> + sys::getSwappedBytes(RawInstrProf::getMagic<IntPtrT>()) == Magic;
> }
>
> template <class IntPtrT>
> std::error_code RawInstrProfReader<IntPtrT>::readHeader() {
> if (!hasFormat(*DataBuffer))
> return error(instrprof_error::bad_magic);
> - if (DataBuffer->getBufferSize() < sizeof(RawHeader))
> + if (DataBuffer->getBufferSize() < sizeof(RawInstrProf::Header))
> return error(instrprof_error::bad_header);
> - auto *Header =
> - reinterpret_cast<const RawHeader *>(DataBuffer->getBufferStart());
> - ShouldSwapBytes = Header->Magic != getRawMagic<IntPtrT>();
> + auto *Header = reinterpret_cast<const RawInstrProf::Header *>(
> + DataBuffer->getBufferStart());
> + ShouldSwapBytes = Header->Magic != RawInstrProf::getMagic<IntPtrT>();
> return readHeader(*Header);
> }
>
> @@ -202,47 +172,45 @@
> return instrprof_error::eof;
> // If there isn't enough space for another header, this is probably just
> // garbage at the end of the file.
> - if (CurrentPos + sizeof(RawHeader) > End)
> + if (CurrentPos + sizeof(RawInstrProf::Header) > End)
> return instrprof_error::malformed;
> // The writer ensures each profile is padded to start at an aligned address.
> if (reinterpret_cast<size_t>(CurrentPos) % alignOf<uint64_t>())
> return instrprof_error::malformed;
> // The magic should have the same byte order as in the previous header.
> uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos);
> - if (Magic != swap(getRawMagic<IntPtrT>()))
> + if (Magic != swap(RawInstrProf::getMagic<IntPtrT>()))
> return instrprof_error::bad_magic;
>
> // There's another profile to read, so we need to process the header.
> - auto *Header = reinterpret_cast<const RawHeader *>(CurrentPos);
> + auto *Header = reinterpret_cast<const RawInstrProf::Header *>(CurrentPos);
> return readHeader(*Header);
> }
>
> -static uint64_t getRawVersion() {
> - return 1;
> -}
> -
> template <class IntPtrT>
> -std::error_code
> -RawInstrProfReader<IntPtrT>::readHeader(const RawHeader &Header) {
> - if (swap(Header.Version) != getRawVersion())
> +std::error_code RawInstrProfReader<IntPtrT>::readHeader(
> + const RawInstrProf::Header &Header) {
> + if (swap(Header.Version) != RawInstrProf::Version)
> return error(instrprof_error::unsupported_version);
>
> CountersDelta = swap(Header.CountersDelta);
> NamesDelta = swap(Header.NamesDelta);
> auto DataSize = swap(Header.DataSize);
> auto CountersSize = swap(Header.CountersSize);
> auto NamesSize = swap(Header.NamesSize);
>
> - ptrdiff_t DataOffset = sizeof(RawHeader);
> - ptrdiff_t CountersOffset = DataOffset + sizeof(ProfileData) * DataSize;
> + ptrdiff_t DataOffset = sizeof(RawInstrProf::Header);
> + ptrdiff_t CountersOffset =
> + DataOffset + sizeof(RawInstrProf::ProfileData<IntPtrT>) * DataSize;
> ptrdiff_t NamesOffset = CountersOffset + sizeof(uint64_t) * CountersSize;
> size_t ProfileSize = NamesOffset + sizeof(char) * NamesSize;
>
> auto *Start = reinterpret_cast<const char *>(&Header);
> if (Start + ProfileSize > DataBuffer->getBufferEnd())
> return error(instrprof_error::bad_header);
>
> - Data = reinterpret_cast<const ProfileData *>(Start + DataOffset);
> + Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>(
> + Start + DataOffset);
> DataEnd = Data + DataSize;
> CountersStart = reinterpret_cast<const uint64_t *>(Start + CountersOffset);
> NamesStart = Start + NamesOffset;
> @@ -421,25 +389,30 @@
>
> using namespace support;
>
> + auto *Header = reinterpret_cast<const IndexedInstrProf::Header *>(Cur);
> + Cur += sizeof(IndexedInstrProf::Header);
> +
> // Check the magic number.
> - uint64_t Magic = endian::readNext<uint64_t, little, unaligned>(Cur);
> + uint64_t Magic = endian::byte_swap<uint64_t, little>(Header->Magic);
> if (Magic != IndexedInstrProf::Magic)
> return error(instrprof_error::bad_magic);
>
> // Read the version.
> - FormatVersion = endian::readNext<uint64_t, little, unaligned>(Cur);
> + FormatVersion = endian::byte_swap<uint64_t, little>(Header->Version);
> if (FormatVersion > IndexedInstrProf::Version)
> return error(instrprof_error::unsupported_version);
>
> // Read the maximal function count.
> - MaxFunctionCount = endian::readNext<uint64_t, little, unaligned>(Cur);
> + MaxFunctionCount =
> + endian::byte_swap<uint64_t, little>(Header->MaxFunctionCount);
>
> // Read the hash type and start offset.
> IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>(
> - endian::readNext<uint64_t, little, unaligned>(Cur));
> + endian::byte_swap<uint64_t, little>(Header->HashType));
> if (HashType > IndexedInstrProf::HashT::Last)
> return error(instrprof_error::unsupported_hash_type);
> - uint64_t HashOffset = endian::readNext<uint64_t, little, unaligned>(Cur);
> +
> + uint64_t HashOffset = endian::byte_swap<uint64_t, little>(Header->HashOffset);
>
> // The rest of the file is an on disk hash table.
> Index.reset(InstrProfReaderIndex::Create(
> Index: lib/ProfileData/InstrProfIndexed.h
> ===================================================================
> --- lib/ProfileData/InstrProfIndexed.h
> +++ /dev/null
> @@ -1,56 +0,0 @@
> -//=-- InstrProfIndexed.h - Indexed profiling format support -------*- C++ -*-=//
> -//
> -// The LLVM Compiler Infrastructure
> -//
> -// This file is distributed under the University of Illinois Open Source
> -// License. See LICENSE.TXT for details.
> -//
> -//===----------------------------------------------------------------------===//
> -//
> -// Shared header for the instrumented profile data reader and writer.
> -//
> -//===----------------------------------------------------------------------===//
> -
> -#ifndef LLVM_LIB_PROFILEDATA_INSTRPROFINDEXED_H
> -#define LLVM_LIB_PROFILEDATA_INSTRPROFINDEXED_H
> -
> -#include "llvm/Support/Endian.h"
> -#include "llvm/Support/ErrorHandling.h"
> -#include "llvm/Support/MD5.h"
> -
> -namespace llvm {
> -
> -namespace IndexedInstrProf {
> -enum class HashT : uint32_t {
> - MD5,
> -
> - Last = MD5
> -};
> -
> -static inline uint64_t MD5Hash(StringRef Str) {
> - MD5 Hash;
> - Hash.update(Str);
> - llvm::MD5::MD5Result Result;
> - Hash.final(Result);
> - // Return the least significant 8 bytes. Our MD5 implementation returns the
> - // result in little endian, so we may need to swap bytes.
> - using namespace llvm::support;
> - return endian::read<uint64_t, little, unaligned>(Result);
> -}
> -
> -static inline uint64_t ComputeHash(HashT Type, StringRef K) {
> - switch (Type) {
> - case HashT::MD5:
> - return IndexedInstrProf::MD5Hash(K);
> - }
> - llvm_unreachable("Unhandled hash type");
> -}
> -
> -const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81"
> -const uint64_t Version = 3;
> -const HashT HashType = HashT::MD5;
> -}
> -
> -} // end namespace llvm
> -
> -#endif
> Index: include/llvm/ProfileData/InstrProfReader.h
> ===================================================================
> --- include/llvm/ProfileData/InstrProfReader.h
> +++ include/llvm/ProfileData/InstrProfReader.h
> @@ -132,28 +132,12 @@
> private:
> /// The profile data file contents.
> std::unique_ptr<MemoryBuffer> DataBuffer;
> - struct ProfileData {
> - const uint32_t NameSize;
> - const uint32_t NumCounters;
> - const uint64_t FuncHash;
> - const IntPtrT NamePtr;
> - const IntPtrT CounterPtr;
> - };
> - struct RawHeader {
> - const uint64_t Magic;
> - const uint64_t Version;
> - const uint64_t DataSize;
> - const uint64_t CountersSize;
> - const uint64_t NamesSize;
> - const uint64_t CountersDelta;
> - const uint64_t NamesDelta;
> - };
>
> bool ShouldSwapBytes;
> uint64_t CountersDelta;
> uint64_t NamesDelta;
> - const ProfileData *Data;
> - const ProfileData *DataEnd;
> + const RawInstrProf::ProfileData<IntPtrT> *Data;
> + const RawInstrProf::ProfileData<IntPtrT> *DataEnd;
> const uint64_t *CountersStart;
> const char *NamesStart;
> const char *ProfileEnd;
> @@ -170,7 +154,7 @@
>
> private:
> std::error_code readNextHeader(const char *CurrentPos);
> - std::error_code readHeader(const RawHeader &Header);
> + std::error_code readHeader(const RawInstrProf::Header &Header);
> template <class IntT>
> IntT swap(IntT Int) const {
> return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int;
> Index: include/llvm/ProfileData/InstrProf.h
> ===================================================================
> --- include/llvm/ProfileData/InstrProf.h
> +++ include/llvm/ProfileData/InstrProf.h
> @@ -16,8 +16,11 @@
> #ifndef LLVM_PROFILEDATA_INSTRPROF_H_
> #define LLVM_PROFILEDATA_INSTRPROF_H_
>
> +#include "llvm/ADT/StringRef.h"
> #include "llvm/ADT/StringSet.h"
> +#include "llvm/Support/Endian.h"
> #include "llvm/Support/ErrorHandling.h"
> +#include "llvm/Support/MD5.h"
> #include <cstdint>
> #include <list>
> #include <system_error>
> @@ -132,6 +135,105 @@
> }
> };
>
> +namespace IndexedInstrProf {
> +enum class HashT : uint32_t {
> + MD5,
> +
> + Last = MD5
> +};
> +
> +static inline uint64_t MD5Hash(StringRef Str) {
> + MD5 Hash;
> + Hash.update(Str);
> + llvm::MD5::MD5Result Result;
> + Hash.final(Result);
> + // Return the least significant 8 bytes. Our MD5 implementation returns the
> + // result in little endian, so we may need to swap bytes.
> + using namespace llvm::support;
> + return endian::read<uint64_t, little, unaligned>(Result);
> +}
> +
> +static inline uint64_t ComputeHash(HashT Type, StringRef K) {
> + switch (Type) {
> + case HashT::MD5:
> + return IndexedInstrProf::MD5Hash(K);
> + }
> + llvm_unreachable("Unhandled hash type");
> +}
> +
> +const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81"
> +const uint64_t Version = 3;
> +const HashT HashType = HashT::MD5;
> +
> +struct Header {
> + uint64_t Magic;
> + uint64_t Version;
> + uint64_t MaxFunctionCount;
> + uint64_t HashType;
> + uint64_t HashOffset;
> +};
> +
> +} // end namespace IndexedInstrProf
> +
> +namespace RawInstrProf {
> +
> +const uint64_t Version = 1;
> +
> +// Magic number to detect file format and endianness.
> +// Use 255 at one end, since no UTF-8 file can use that character. Avoid 0,
> +// so that utilities, like strings, don't grab it as a string. 129 is also
> +// invalid UTF-8, and high enough to be interesting.
> +// Use "lprofr" in the centre to stand for "LLVM Profile Raw", or "lprofR"
> +// for 32-bit platforms.
> +// The magic and version need to be kept in sync with
> +// projects/compiler-rt/lib/profile/InstrProfiling.c
> +
> +template <class IntPtrT>
> +static uint64_t getMagic();
> +template <>
> +uint64_t getMagic<uint64_t>() {
> + return uint64_t(255) << 56 | uint64_t('l') << 48 | uint64_t('p') << 40 |
> + uint64_t('r') << 32 | uint64_t('o') << 24 | uint64_t('f') << 16 |
> + uint64_t('r') << 8 | uint64_t(129);
> +}
> +
> +template <>
> +uint64_t getMagic<uint32_t>() {
> + return uint64_t(255) << 56 | uint64_t('l') << 48 | uint64_t('p') << 40 |
> + uint64_t('r') << 32 | uint64_t('o') << 24 | uint64_t('f') << 16 |
> + uint64_t('R') << 8 | uint64_t(129);
> +}
> +
> +// The definition should match the structure defined in
> +// compiler-rt/lib/profile/InstrProfiling.h.
> +// It should also match the synthesized type in
> +// Transforms/Instrumentation/InstrProfiling.cpp:getOrCreateRegionCounters.
> +
> +template <class IntPtrT>
> +struct ProfileData {
> + const uint32_t NameSize;
> + const uint32_t NumCounters;
> + const uint64_t FuncHash;
> + const IntPtrT NamePtr;
> + const IntPtrT CounterPtr;
> +};
> +
> +// The definition should match the header referenced in
> +// compiler-rt/lib/profile/InstrProfilingFile.c and
> +// InstrProfilingBuffer.c.
> +
> +struct Header {
> + const uint64_t Magic;
> + const uint64_t Version;
> + const uint64_t DataSize;
> + const uint64_t CountersSize;
> + const uint64_t NamesSize;
> + const uint64_t CountersDelta;
> + const uint64_t NamesDelta;
> +};
> +
> +} // end namespace RawInstrProf
> +
> } // end namespace llvm
>
> namespace std {
More information about the llvm-commits
mailing list