[PATCH] Value profiling - patchset 2 - merge intended

Justin Bogner mail at justinbogner.com
Thu Jun 18 13:50:27 PDT 2015


Betul Buyukkurt <betulb at codeaurora.org> writes:
> InstrProfRecord struct is moved into InstrProf.h
> Raw and indexed readers are updated in preparation for version updates.
> clang-format is ran on all the affected files.

If you're going to clang-format the entire file (rather than just the
parts you change) it's better for that to go in as a separate commit
beforehand. It's hard to tell which parts are actual changes and which
are just formatting this way.

For formatting just the parts of the file that you've changed, there's a
script in clang/tools/clang-format/git-clang-format that can be helpful.

In any case, this looks basically fine to me, other than one comment
below. Do you need me to commit this for you?

>
> http://reviews.llvm.org/D10493
>
> Files:
>   include/llvm/ProfileData/InstrProf.h
>   include/llvm/ProfileData/InstrProfReader.h
>   lib/ProfileData/InstrProfReader.cpp
>
> EMAIL PREFERENCES
>   http://reviews.llvm.org/settings/panel/emailpreferences/
> Index: include/llvm/ProfileData/InstrProf.h
> ===================================================================
> --- include/llvm/ProfileData/InstrProf.h
> +++ include/llvm/ProfileData/InstrProf.h
> @@ -16,31 +16,44 @@
>  #ifndef LLVM_PROFILEDATA_INSTRPROF_H_
>  #define LLVM_PROFILEDATA_INSTRPROF_H_
>  
> +#include "llvm/ADT/ArrayRef.h"
> +#include "llvm/ADT/StringRef.h"
>  #include <system_error>
> +#include <vector>
>  
>  namespace llvm {
>  const std::error_category &instrprof_category();
>  
>  enum class instrprof_error {
> -    success = 0,
> -    eof,
> -    bad_magic,
> -    bad_header,
> -    unsupported_version,
> -    unsupported_hash_type,
> -    too_large,
> -    truncated,
> -    malformed,
> -    unknown_function,
> -    hash_mismatch,
> -    count_mismatch,
> -    counter_overflow
> +  success = 0,
> +  eof,
> +  bad_magic,
> +  bad_header,
> +  unsupported_version,
> +  unsupported_hash_type,
> +  too_large,
> +  truncated,
> +  malformed,
> +  unknown_function,
> +  hash_mismatch,
> +  count_mismatch,
> +  counter_overflow
>  };
>  
>  inline std::error_code make_error_code(instrprof_error E) {
>    return std::error_code(static_cast<int>(E), instrprof_category());
>  }
>  
> +/// Profiling information for a single function.
> +struct InstrProfRecord {
> +  InstrProfRecord() {}
> +  InstrProfRecord(StringRef Name, uint64_t Hash, ArrayRef<uint64_t> Counts)
> +      : Name(Name), Hash(Hash), Counts(Counts) {}
> +  StringRef Name;
> +  uint64_t Hash;
> +  std::vector<uint64_t> Counts;
> +};
> +
>  } // end namespace llvm
>  
>  namespace std {
> Index: include/llvm/ProfileData/InstrProfReader.h
> ===================================================================
> --- include/llvm/ProfileData/InstrProfReader.h
> +++ include/llvm/ProfileData/InstrProfReader.h
> @@ -15,7 +15,6 @@
>  #ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H
>  #define LLVM_PROFILEDATA_INSTRPROFREADER_H
>  
> -#include "llvm/ADT/ArrayRef.h"
>  #include "llvm/ADT/StringExtras.h"
>  #include "llvm/ProfileData/InstrProf.h"
>  #include "llvm/Support/EndianStream.h"
> @@ -29,28 +28,22 @@
>  
>  class InstrProfReader;
>  
> -/// Profiling information for a single function.
> -struct InstrProfRecord {
> -  InstrProfRecord() {}
> -  InstrProfRecord(StringRef Name, uint64_t Hash, ArrayRef<uint64_t> Counts)
> -      : Name(Name), Hash(Hash), Counts(Counts) {}
> -  StringRef Name;
> -  uint64_t Hash;
> -  ArrayRef<uint64_t> Counts;
> -};
> -
>  /// A file format agnostic iterator over profiling data.
> -class InstrProfIterator : public std::iterator<std::input_iterator_tag,
> -                                               InstrProfRecord> {
> +class InstrProfIterator
> +    : public std::iterator<std::input_iterator_tag, InstrProfRecord> {
>    InstrProfReader *Reader;
>    InstrProfRecord Record;
>  
>    void Increment();
> +
>  public:
>    InstrProfIterator() : Reader(nullptr) {}
>    InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); }
>  
> -  InstrProfIterator &operator++() { Increment(); return *this; }
> +  InstrProfIterator &operator++() {
> +    Increment();
> +    return *this;
> +  }
>    bool operator==(const InstrProfIterator &RHS) { return Reader == RHS.Reader; }
>    bool operator!=(const InstrProfIterator &RHS) { return Reader != RHS.Reader; }
>    InstrProfRecord &operator*() { return Record; }
> @@ -114,11 +107,10 @@
>    std::unique_ptr<MemoryBuffer> DataBuffer;
>    /// Iterator over the profile data.
>    line_iterator Line;
> -  /// The current set of counter values.
> -  std::vector<uint64_t> Counts;
>  
>    TextInstrProfReader(const TextInstrProfReader &) = delete;
>    TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
> +
>  public:
>    TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)
>        : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {}
> @@ -136,13 +128,10 @@
>  ///
>  /// Templated on the unsigned type whose size matches pointers on the platform
>  /// that wrote the profile.
> -template <class IntPtrT>
> -class RawInstrProfReader : public InstrProfReader {
> +template <class IntPtrT> class RawInstrProfReader : public InstrProfReader {
>  private:
>    /// The profile data file contents.
>    std::unique_ptr<MemoryBuffer> DataBuffer;
> -  /// The current set of counter values.
> -  std::vector<uint64_t> Counts;
>    struct ProfileData {
>      const uint32_t NameSize;
>      const uint32_t NumCounters;
> @@ -171,19 +160,19 @@
>  
>    RawInstrProfReader(const RawInstrProfReader &) = delete;
>    RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
> +
>  public:
>    RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
> -      : DataBuffer(std::move(DataBuffer)) { }
> +      : DataBuffer(std::move(DataBuffer)) {}
>  
>    static bool hasFormat(const MemoryBuffer &DataBuffer);
>    std::error_code readHeader() override;
>    std::error_code readNextRecord(InstrProfRecord &Record) override;
>  
>  private:
>    std::error_code readNextHeader(const char *CurrentPos);
>    std::error_code readHeader(const RawHeader &Header);
> -  template <class IntT>
> -  IntT swap(IntT Int) const {
> +  template <class IntT> IntT swap(IntT Int) const {
>      return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int;
>    }
>    const uint64_t *getCounter(IntPtrT CounterPtr) const {
> @@ -206,17 +195,16 @@
>  /// Trait for lookups into the on-disk hash table for the binary instrprof
>  /// format.
>  class InstrProfLookupTrait {
> -  std::vector<uint64_t> DataBuffer;
> +  std::vector<InstrProfRecord> DataBuffer;
>    IndexedInstrProf::HashT HashType;
> +  unsigned FormatVersion;
> +
>  public:
> -  InstrProfLookupTrait(IndexedInstrProf::HashT HashType) : HashType(HashType) {}
> +  InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
> +      : HashType(HashType), FormatVersion(FormatVersion) {}
> +
> +  typedef ArrayRef<InstrProfRecord> data_type;
>  
> -  struct data_type {
> -    data_type(StringRef Name, ArrayRef<uint64_t> Data)
> -        : Name(Name), Data(Data) {}
> -    StringRef Name;
> -    ArrayRef<uint64_t> Data;
> -  };
>    typedef StringRef internal_key_type;
>    typedef StringRef external_key_type;
>    typedef uint64_t hash_value_type;
> @@ -239,22 +227,18 @@
>      return StringRef((const char *)D, N);
>    }
>  
> -  data_type ReadData(StringRef K, const unsigned char *D, offset_type N) {
> -    DataBuffer.clear();
> -    if (N % sizeof(uint64_t))
> -      // The data is corrupt, don't try to read it.
> -      return data_type("", DataBuffer);
> +  data_type ReadDataV1V2(StringRef K, const unsigned char *D, offset_type N);
>  
> -    using namespace support;
> -    // We just treat the data as opaque here. It's simpler to handle in
> -    // IndexedInstrProfReader.
> -    unsigned NumEntries = N / sizeof(uint64_t);
> -    DataBuffer.reserve(NumEntries);
> -    for (unsigned I = 0; I < NumEntries; ++I)
> -      DataBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D));
> -    return data_type(K, DataBuffer);
> +  data_type ReadData(StringRef K, const unsigned char *D, offset_type N) {
> +    switch (FormatVersion) {
> +    case 1:
> +    case 2:
> +      return ReadDataV1V2(K, D, N);
> +    }
> +    return data_type();

Let's not split this into ReadData and ReadDataV1V2 in this commit. Just
forward declare ReadData here and rename the ReadDataV1V2 you have in
the cpp file to ReadData for now.

>    }
>  };
> +
>  typedef OnDiskIterableChainedHashTable<InstrProfLookupTrait>
>      InstrProfReaderIndex;
>  
> @@ -267,18 +251,17 @@
>    std::unique_ptr<InstrProfReaderIndex> Index;
>    /// Iterator over the profile data.
>    InstrProfReaderIndex::data_iterator RecordIterator;
> -  /// Offset into our current data set.
> -  size_t CurrentOffset;
>    /// The file format version of the profile data.
>    uint64_t FormatVersion;
>    /// The maximal execution count among all functions.
>    uint64_t MaxFunctionCount;
>  
>    IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
>    IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
> +
>  public:
>    IndexedInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
> -      : DataBuffer(std::move(DataBuffer)), Index(nullptr), CurrentOffset(0) {}
> +      : DataBuffer(std::move(DataBuffer)), Index(nullptr) {}
>  
>    /// Return true if the given buffer is in an indexed instrprof format.
>    static bool hasFormat(const MemoryBuffer &DataBuffer);
> Index: lib/ProfileData/InstrProfReader.cpp
> ===================================================================
> --- lib/ProfileData/InstrProfReader.cpp
> +++ lib/ProfileData/InstrProfReader.cpp
> @@ -15,7 +15,6 @@
>  #include "llvm/ProfileData/InstrProfReader.h"
>  #include "InstrProfIndexed.h"
>  #include "llvm/ADT/STLExtras.h"
> -#include "llvm/ProfileData/InstrProf.h"
>  #include <cassert>
>  
>  using namespace llvm;
> @@ -75,7 +74,6 @@
>    return IndexedInstrProfReader::create(std::move(BufferOrError.get()));
>  }
>  
> -
>  ErrorOr<std::unique_ptr<IndexedInstrProfReader>>
>  IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
>    // Sanity check the buffer.
> @@ -126,59 +124,42 @@
>      return error(instrprof_error::malformed);
>  
>    // Read each counter and fill our internal storage with the values.
> -  Counts.clear();
> -  Counts.reserve(NumCounters);
> +  Record.Counts.clear();
> +  Record.Counts.reserve(NumCounters);
>    for (uint64_t I = 0; I < NumCounters; ++I) {
>      if (Line.is_at_end())
>        return error(instrprof_error::truncated);
>      uint64_t Count;
>      if ((Line++)->getAsInteger(10, Count))
>        return error(instrprof_error::malformed);
> -    Counts.push_back(Count);
> +    Record.Counts.push_back(Count);
>    }
> -  // Give the record a reference to our internal counter storage.
> -  Record.Counts = Counts;
>  
>    return success();
>  }
>  
> -template <class IntPtrT>
> -static uint64_t getRawMagic();
> -
> -template <>
> -uint64_t getRawMagic<uint64_t>() {
> -  return
> -    uint64_t(255) << 56 |
> -    uint64_t('l') << 48 |
> -    uint64_t('p') << 40 |
> -    uint64_t('r') << 32 |
> -    uint64_t('o') << 24 |
> -    uint64_t('f') << 16 |
> -    uint64_t('r') <<  8 |
> -    uint64_t(129);
> +template <class IntPtrT> static uint64_t getRawMagic();
> +
> +template <> uint64_t getRawMagic<uint64_t>() {
> +  return uint64_t(255) << 56 | uint64_t('l') << 48 | uint64_t('p') << 40 |
> +         uint64_t('r') << 32 | uint64_t('o') << 24 | uint64_t('f') << 16 |
> +         uint64_t('r') << 8 | uint64_t(129);
>  }
>  
> -template <>
> -uint64_t getRawMagic<uint32_t>() {
> -  return
> -    uint64_t(255) << 56 |
> -    uint64_t('l') << 48 |
> -    uint64_t('p') << 40 |
> -    uint64_t('r') << 32 |
> -    uint64_t('o') << 24 |
> -    uint64_t('f') << 16 |
> -    uint64_t('R') <<  8 |
> -    uint64_t(129);
> +template <> uint64_t getRawMagic<uint32_t>() {
> +  return uint64_t(255) << 56 | uint64_t('l') << 48 | uint64_t('p') << 40 |
> +         uint64_t('r') << 32 | uint64_t('o') << 24 | uint64_t('f') << 16 |
> +         uint64_t('R') << 8 | uint64_t(129);
>  }
>  
>  template <class IntPtrT>
>  bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) {
>    if (DataBuffer.getBufferSize() < sizeof(uint64_t))
>      return false;
>    uint64_t Magic =
> -    *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart());
> +      *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart());
>    return getRawMagic<IntPtrT>() == Magic ||
> -    sys::getSwappedBytes(getRawMagic<IntPtrT>()) == Magic;
> +         sys::getSwappedBytes(getRawMagic<IntPtrT>()) == Magic;
>  }
>  
>  template <class IntPtrT>
> @@ -188,7 +169,7 @@
>    if (DataBuffer->getBufferSize() < sizeof(RawHeader))
>      return error(instrprof_error::bad_header);
>    auto *Header =
> -    reinterpret_cast<const RawHeader *>(DataBuffer->getBufferStart());
> +      reinterpret_cast<const RawHeader *>(DataBuffer->getBufferStart());
>    ShouldSwapBytes = Header->Magic != getRawMagic<IntPtrT>();
>    return readHeader(*Header);
>  }
> @@ -220,9 +201,7 @@
>    return readHeader(*Header);
>  }
>  
> -static uint64_t getRawVersion() {
> -  return 1;
> -}
> +static uint64_t getRawVersion() { return 1; }
>  
>  template <class IntPtrT>
>  std::error_code
> @@ -280,11 +259,10 @@
>    Record.Hash = swap(Data->FuncHash);
>    Record.Name = RawName;
>    if (ShouldSwapBytes) {
> -    Counts.clear();
> -    Counts.reserve(RawCounts.size());
> +    Record.Counts.clear();
> +    Record.Counts.reserve(RawCounts.size());
>      for (uint64_t Count : RawCounts)
> -      Counts.push_back(swap(Count));
> -    Record.Counts = Counts;
> +      Record.Counts.push_back(swap(Count));
>    } else
>      Record.Counts = RawCounts;
>  
> @@ -303,6 +281,50 @@
>    return IndexedInstrProf::ComputeHash(HashType, K);
>  }
>  
> +typedef InstrProfLookupTrait::data_type data_type;
> +typedef InstrProfLookupTrait::offset_type offset_type;
> +
> +data_type InstrProfLookupTrait::ReadDataV1V2(StringRef K,
> +                                             const unsigned char *D,
> +                                             offset_type N) {
> +
> +  // Check if the data is corrupt. If so, don't try to read it.
> +  if (N % sizeof(uint64_t))
> +    return data_type();
> +
> +  DataBuffer.clear();
> +  uint64_t NumCounts;
> +  uint64_t NumEntries = N / sizeof(uint64_t);
> +  std::vector<uint64_t> CounterBuffer;
> +  for (uint64_t I = 0; I < NumEntries; I += NumCounts) {
> +    using namespace support;
> +    // The function hash comes first.
> +    uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D);
> +
> +    if (++I >= NumEntries)
> +      return data_type();
> +
> +    // In v1, we have at least one count.
> +    // Later, we have the number of counts.
> +    NumCounts = (1 == FormatVersion)
> +                    ? NumEntries - I
> +                    : endian::readNext<uint64_t, little, unaligned>(D);
> +    if (1 != FormatVersion)
> +      ++I;
> +
> +    // If we have more counts than data, this is bogus.
> +    if (I + NumCounts > NumEntries)
> +      return data_type();
> +
> +    CounterBuffer.clear();
> +    for (unsigned J = 0; J < NumCounts; ++J)
> +      CounterBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D));
> +
> +    DataBuffer.push_back(InstrProfRecord(K, Hash, CounterBuffer));
> +  }
> +  return DataBuffer;
> +}
> +
>  bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) {
>    if (DataBuffer.getBufferSize() < 8)
>      return false;
> @@ -342,36 +364,32 @@
>    uint64_t HashOffset = endian::readNext<uint64_t, little, unaligned>(Cur);
>  
>    // The rest of the file is an on disk hash table.
> -  Index.reset(InstrProfReaderIndex::Create(Start + HashOffset, Cur, Start,
> -                                           InstrProfLookupTrait(HashType)));
> +  Index.reset(InstrProfReaderIndex::Create(
> +      Start + HashOffset, Cur, Start,
> +      InstrProfLookupTrait(HashType, FormatVersion)));
>    // Set up our iterator for readNextRecord.
>    RecordIterator = Index->data_begin();
>  
>    return success();
>  }
>  
> -std::error_code IndexedInstrProfReader::getFunctionCounts(
> -    StringRef FuncName, uint64_t FuncHash, std::vector<uint64_t> &Counts) {
> +std::error_code
> +IndexedInstrProfReader::getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
> +                                          std::vector<uint64_t> &Counts) {
> +
>    auto Iter = Index->find(FuncName);
>    if (Iter == Index->end())
>      return error(instrprof_error::unknown_function);
>  
>    // Found it. Look for counters with the right hash.
> -  ArrayRef<uint64_t> Data = (*Iter).Data;
> -  uint64_t NumCounts;
> -  for (uint64_t I = 0, E = Data.size(); I != E; I += NumCounts) {
> -    // The function hash comes first.
> -    uint64_t FoundHash = Data[I++];
> -    // In v1, we have at least one count. Later, we have the number of counts.
> -    if (I == E)
> -      return error(instrprof_error::malformed);
> -    NumCounts = FormatVersion == 1 ? E - I : Data[I++];
> -    // If we have more counts than data, this is bogus.
> -    if (I + NumCounts > E)
> -      return error(instrprof_error::malformed);
> +  ArrayRef<InstrProfRecord> Data = (*Iter);
> +  if (Data.empty())
> +    return error(instrprof_error::malformed);
> +
> +  for (unsigned I = 0, E = Data.size(); I < E; ++I) {
>      // Check for a match and fill the vector if there is one.
> -    if (FoundHash == FuncHash) {
> -      Counts = Data.slice(I, NumCounts);
> +    if (Data[I].Hash == FuncHash) {
> +      Counts = Data[I].Counts;
>        return success();
>      }
>    }
> @@ -384,30 +402,15 @@
>    if (RecordIterator == Index->data_end())
>      return error(instrprof_error::eof);
>  
> -  // Record the current function name.
> -  Record.Name = (*RecordIterator).Name;
> -
> -  ArrayRef<uint64_t> Data = (*RecordIterator).Data;
> -  // Valid data starts with a hash and either a count or the number of counts.
> -  if (CurrentOffset + 1 > Data.size())
> -    return error(instrprof_error::malformed);
> -  // First we have a function hash.
> -  Record.Hash = Data[CurrentOffset++];
> -  // In version 1 we knew the number of counters implicitly, but in newer
> -  // versions we store the number of counters next.
> -  uint64_t NumCounts =
> -      FormatVersion == 1 ? Data.size() - CurrentOffset : Data[CurrentOffset++];
> -  if (CurrentOffset + NumCounts > Data.size())
> +  if ((*RecordIterator).empty())
>      return error(instrprof_error::malformed);
> -  // And finally the counts themselves.
> -  Record.Counts = Data.slice(CurrentOffset, NumCounts);
>  
> -  // If we've exhausted this function's data, increment the record.
> -  CurrentOffset += NumCounts;
> -  if (CurrentOffset == Data.size()) {
> +  static unsigned RecordIndex = 0;
> +  ArrayRef<InstrProfRecord> Data = (*RecordIterator);
> +  Record = Data[RecordIndex++];
> +  if (RecordIndex >= Data.size()) {
>      ++RecordIterator;
> -    CurrentOffset = 0;
> +    RecordIndex = 0;
>    }
> -
>    return success();
>  }



More information about the llvm-commits mailing list