[PATCH] Value profiling - patchset 2 - merge intended

Fri Jun 19 16:11:08 PDT 2015

> Betul Buyukkurt <betulb at codeaurora.org> writes:
>> InstrProfRecord struct is moved into InstrProf.h
>> Raw and indexed readers are updated in preparation for version updates.
>> clang-format is ran on all the affected files.
>
> If you're going to clang-format the entire file (rather than just the
> parts you change) it's better for that to go in as a separate commit
> beforehand. It's hard to tell which parts are actual changes and which
> are just formatting this way.
>
> For formatting just the parts of the file that you've changed, there's a
> script in clang/tools/clang-format/git-clang-format that can be helpful.
>
> In any case, this looks basically fine to me, other than one comment
> below. Do you need me to commit this for you?

I'd appreciate if you do so. The latest patch addressed the naming
concern. I've also made sure clang-format related formatting changes are
not present in the latest patch.

Thanks,
-Betul

>>
>> http://reviews.llvm.org/D10493
>>
>> Files:
>>   include/llvm/ProfileData/InstrProf.h
>>   include/llvm/ProfileData/InstrProfReader.h
>>   lib/ProfileData/InstrProfReader.cpp
>>
>> EMAIL PREFERENCES
>>   http://reviews.llvm.org/settings/panel/emailpreferences/
>> Index: include/llvm/ProfileData/InstrProf.h
>> ===================================================================
>> --- include/llvm/ProfileData/InstrProf.h
>> +++ include/llvm/ProfileData/InstrProf.h
>> @@ -16,31 +16,44 @@
>>  #ifndef LLVM_PROFILEDATA_INSTRPROF_H_
>>  #define LLVM_PROFILEDATA_INSTRPROF_H_
>>
>> +#include "llvm/ADT/ArrayRef.h"
>> +#include "llvm/ADT/StringRef.h"
>>  #include <system_error>
>> +#include <vector>
>>
>>  namespace llvm {
>>  const std::error_category &instrprof_category();
>>
>>  enum class instrprof_error {
>> -    success = 0,
>> -    eof,
>> -    bad_magic,
>> -    bad_header,
>> -    unsupported_version,
>> -    unsupported_hash_type,
>> -    too_large,
>> -    truncated,
>> -    malformed,
>> -    unknown_function,
>> -    hash_mismatch,
>> -    count_mismatch,
>> -    counter_overflow
>> +  success = 0,
>> +  eof,
>> +  bad_magic,
>> +  bad_header,
>> +  unsupported_version,
>> +  unsupported_hash_type,
>> +  too_large,
>> +  truncated,
>> +  malformed,
>> +  unknown_function,
>> +  hash_mismatch,
>> +  count_mismatch,
>> +  counter_overflow
>>  };
>>
>>  inline std::error_code make_error_code(instrprof_error E) {
>>    return std::error_code(static_cast<int>(E), instrprof_category());
>>  }
>>
>> +/// Profiling information for a single function.
>> +struct InstrProfRecord {
>> +  InstrProfRecord() {}
>> +  InstrProfRecord(StringRef Name, uint64_t Hash, ArrayRef<uint64_t>
> Counts)
>> +      : Name(Name), Hash(Hash), Counts(Counts) {}
>> +  StringRef Name;
>> +  uint64_t Hash;
>> +  std::vector<uint64_t> Counts;
>> +};
>> +
>>  } // end namespace llvm
>>
>>  namespace std {
>> Index: include/llvm/ProfileData/InstrProfReader.h
>> ===================================================================
>> --- include/llvm/ProfileData/InstrProfReader.h
>> +++ include/llvm/ProfileData/InstrProfReader.h
>> @@ -15,7 +15,6 @@
>>  #ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H
>>  #define LLVM_PROFILEDATA_INSTRPROFREADER_H
>>
>> -#include "llvm/ADT/ArrayRef.h"
>>  #include "llvm/ADT/StringExtras.h"
>>  #include "llvm/ProfileData/InstrProf.h"
>>  #include "llvm/Support/EndianStream.h"
>> @@ -29,28 +28,22 @@
>>
>>  class InstrProfReader;
>>
>> -/// Profiling information for a single function.
>> -struct InstrProfRecord {
>> -  InstrProfRecord() {}
>> -  InstrProfRecord(StringRef Name, uint64_t Hash, ArrayRef<uint64_t>
> Counts)
>> -      : Name(Name), Hash(Hash), Counts(Counts) {}
>> -  StringRef Name;
>> -  uint64_t Hash;
>> -  ArrayRef<uint64_t> Counts;
>> -};
>> -
>>  /// A file format agnostic iterator over profiling data.
>> -class InstrProfIterator : public std::iterator<std::input_iterator_tag,
>> -                                               InstrProfRecord> {
>> +class InstrProfIterator
>> +    : public std::iterator<std::input_iterator_tag, InstrProfRecord> {
>>    InstrProfReader *Reader;
>>    InstrProfRecord Record;
>>
>>    void Increment();
>> +
>>  public:
>>    InstrProfIterator() : Reader(nullptr) {}
>>    InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) {
> Increment(); }
>>
>> -  InstrProfIterator &operator++() { Increment(); return *this; }
>> +  InstrProfIterator &operator++() {
>> +    Increment();
>> +    return *this;
>> +  }
>>    bool operator==(const InstrProfIterator &RHS) { return Reader ==
> RHS.Reader; }
>>    bool operator!=(const InstrProfIterator &RHS) { return Reader !=
> RHS.Reader; }
>>    InstrProfRecord &operator*() { return Record; }
>> @@ -114,11 +107,10 @@
>>    std::unique_ptr<MemoryBuffer> DataBuffer;
>>    /// Iterator over the profile data.
>>    line_iterator Line;
>> -  /// The current set of counter values.
>> -  std::vector<uint64_t> Counts;
>>
>>    TextInstrProfReader(const TextInstrProfReader &) = delete;
>>    TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
>> +
>>  public:
>>    TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)
>>        : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true,
> '#') {}
>> @@ -136,13 +128,10 @@
>>  ///
>>  /// Templated on the unsigned type whose size matches pointers on the
> platform
>>  /// that wrote the profile.
>> -template <class IntPtrT>
>> -class RawInstrProfReader : public InstrProfReader {
>> +template <class IntPtrT> class RawInstrProfReader : public
> InstrProfReader {
>>  private:
>>    /// The profile data file contents.
>>    std::unique_ptr<MemoryBuffer> DataBuffer;
>> -  /// The current set of counter values.
>> -  std::vector<uint64_t> Counts;
>>    struct ProfileData {
>>      const uint32_t NameSize;
>>      const uint32_t NumCounters;
>> @@ -171,19 +160,19 @@
>>
>>    RawInstrProfReader(const RawInstrProfReader &) = delete;
>>    RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
>> +
>>  public:
>>    RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
>> -      : DataBuffer(std::move(DataBuffer)) { }
>> +      : DataBuffer(std::move(DataBuffer)) {}
>>
>>    static bool hasFormat(const MemoryBuffer &DataBuffer);
>>    std::error_code readHeader() override;
>>    std::error_code readNextRecord(InstrProfRecord &Record) override;
>>
>>  private:
>>    std::error_code readNextHeader(const char *CurrentPos);
>>    std::error_code readHeader(const RawHeader &Header);
>> -  template <class IntT>
>> -  IntT swap(IntT Int) const {
>> +  template <class IntT> IntT swap(IntT Int) const {
>>      return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int;
>>    }
>>    const uint64_t *getCounter(IntPtrT CounterPtr) const {
>> @@ -206,17 +195,16 @@
>>  /// Trait for lookups into the on-disk hash table for the binary
> instrprof
>>  /// format.
>>  class InstrProfLookupTrait {
>> -  std::vector<uint64_t> DataBuffer;
>> +  std::vector<InstrProfRecord> DataBuffer;
>>    IndexedInstrProf::HashT HashType;
>> +  unsigned FormatVersion;
>> +
>>  public:
>> -  InstrProfLookupTrait(IndexedInstrProf::HashT HashType) :
> HashType(HashType) {}
>> +  InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned
> FormatVersion)
>> +      : HashType(HashType), FormatVersion(FormatVersion) {}
>> +
>> +  typedef ArrayRef<InstrProfRecord> data_type;
>>
>> -  struct data_type {
>> -    data_type(StringRef Name, ArrayRef<uint64_t> Data)
>> -        : Name(Name), Data(Data) {}
>> -    StringRef Name;
>> -    ArrayRef<uint64_t> Data;
>> -  };
>>    typedef StringRef internal_key_type;
>>    typedef StringRef external_key_type;
>>    typedef uint64_t hash_value_type;
>> @@ -239,22 +227,18 @@
>>      return StringRef((const char *)D, N);
>>    }
>>
>> -  data_type ReadData(StringRef K, const unsigned char *D, offset_type
> N) {
>> -    DataBuffer.clear();
>> -    if (N % sizeof(uint64_t))
>> -      // The data is corrupt, don't try to read it.
>> -      return data_type("", DataBuffer);
>> +  data_type ReadDataV1V2(StringRef K, const unsigned char *D,
> offset_type N);
>>
>> -    using namespace support;
>> -    // We just treat the data as opaque here. It's simpler to handle in
>> -    // IndexedInstrProfReader.
>> -    unsigned NumEntries = N / sizeof(uint64_t);
>> -    DataBuffer.reserve(NumEntries);
>> -    for (unsigned I = 0; I < NumEntries; ++I)
>> -      DataBuffer.push_back(endian::readNext<uint64_t, little,
> unaligned>(D));
>> -    return data_type(K, DataBuffer);
>> +  data_type ReadData(StringRef K, const unsigned char *D, offset_type
> N) {
>> +    switch (FormatVersion) {
>> +    case 1:
>> +    case 2:
>> +      return ReadDataV1V2(K, D, N);
>> +    }
>> +    return data_type();
>
> Let's not split this into ReadData and ReadDataV1V2 in this commit. Just
> forward declare ReadData here and rename the ReadDataV1V2 you have in
> the cpp file to ReadData for now.
>
>>    }
>>  };
>> +
>>  typedef OnDiskIterableChainedHashTable<InstrProfLookupTrait>
>>      InstrProfReaderIndex;
>>
>> @@ -267,18 +251,17 @@
>>    std::unique_ptr<InstrProfReaderIndex> Index;
>>    /// Iterator over the profile data.
>>    InstrProfReaderIndex::data_iterator RecordIterator;
>> -  /// Offset into our current data set.
>> -  size_t CurrentOffset;
>>    /// The file format version of the profile data.
>>    uint64_t FormatVersion;
>>    /// The maximal execution count among all functions.
>>    uint64_t MaxFunctionCount;
>>
>>    IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
>>    IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) =
> delete;
>> +
>>  public:
>>    IndexedInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
>> -      : DataBuffer(std::move(DataBuffer)), Index(nullptr),
> CurrentOffset(0) {}
>> +      : DataBuffer(std::move(DataBuffer)), Index(nullptr) {}
>>
>>    /// Return true if the given buffer is in an indexed instrprof
> format.
>>    static bool hasFormat(const MemoryBuffer &DataBuffer);
>> Index: lib/ProfileData/InstrProfReader.cpp
>> ===================================================================
>> --- lib/ProfileData/InstrProfReader.cpp
>> +++ lib/ProfileData/InstrProfReader.cpp
>> @@ -15,7 +15,6 @@
>>  #include "llvm/ProfileData/InstrProfReader.h"
>>  #include "InstrProfIndexed.h"
>>  #include "llvm/ADT/STLExtras.h"
>> -#include "llvm/ProfileData/InstrProf.h"
>>  #include <cassert>
>>
>>  using namespace llvm;
>> @@ -75,7 +74,6 @@
>>    return
> IndexedInstrProfReader::create(std::move(BufferOrError.get()));
>>  }
>>
>> -
>>  ErrorOr<std::unique_ptr<IndexedInstrProfReader>>
>>  IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
>>    // Sanity check the buffer.
>> @@ -126,59 +124,42 @@
>>      return error(instrprof_error::malformed);
>>
>>    // Read each counter and fill our internal storage with the values.
>> -  Counts.clear();
>> -  Counts.reserve(NumCounters);
>> +  Record.Counts.clear();
>> +  Record.Counts.reserve(NumCounters);
>>    for (uint64_t I = 0; I < NumCounters; ++I) {
>>      if (Line.is_at_end())
>>        return error(instrprof_error::truncated);
>>      uint64_t Count;
>>      if ((Line++)->getAsInteger(10, Count))
>>        return error(instrprof_error::malformed);
>> -    Counts.push_back(Count);
>> +    Record.Counts.push_back(Count);
>>    }
>> -  // Give the record a reference to our internal counter storage.
>> -  Record.Counts = Counts;
>>
>>    return success();
>>  }
>>
>> -template <class IntPtrT>
>> -static uint64_t getRawMagic();
>> -
>> -template <>
>> -uint64_t getRawMagic<uint64_t>() {
>> -  return
>> -    uint64_t(255) << 56 |
>> -    uint64_t('l') << 48 |
>> -    uint64_t('p') << 40 |
>> -    uint64_t('r') << 32 |
>> -    uint64_t('o') << 24 |
>> -    uint64_t('f') << 16 |
>> -    uint64_t('r') <<  8 |
>> -    uint64_t(129);
>> +template <class IntPtrT> static uint64_t getRawMagic();
>> +
>> +template <> uint64_t getRawMagic<uint64_t>() {
>> +  return uint64_t(255) << 56 | uint64_t('l') << 48 | uint64_t('p') <<
> 40 |
>> +         uint64_t('r') << 32 | uint64_t('o') << 24 | uint64_t('f') <<
> 16 |
>> +         uint64_t('r') << 8 | uint64_t(129);
>>  }
>>
>> -template <>
>> -uint64_t getRawMagic<uint32_t>() {
>> -  return
>> -    uint64_t(255) << 56 |
>> -    uint64_t('l') << 48 |
>> -    uint64_t('p') << 40 |
>> -    uint64_t('r') << 32 |
>> -    uint64_t('o') << 24 |
>> -    uint64_t('f') << 16 |
>> -    uint64_t('R') <<  8 |
>> -    uint64_t(129);
>> +template <> uint64_t getRawMagic<uint32_t>() {
>> +  return uint64_t(255) << 56 | uint64_t('l') << 48 | uint64_t('p') <<
> 40 |
>> +         uint64_t('r') << 32 | uint64_t('o') << 24 | uint64_t('f') <<
> 16 |
>> +         uint64_t('R') << 8 | uint64_t(129);
>>  }
>>
>>  template <class IntPtrT>
>>  bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer
> &DataBuffer) {
>>    if (DataBuffer.getBufferSize() < sizeof(uint64_t))
>>      return false;
>>    uint64_t Magic =
>> -    *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart());
>> +      *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart());
>>    return getRawMagic<IntPtrT>() == Magic ||
>> -    sys::getSwappedBytes(getRawMagic<IntPtrT>()) == Magic;
>> +         sys::getSwappedBytes(getRawMagic<IntPtrT>()) == Magic;
>>  }
>>
>>  template <class IntPtrT>
>> @@ -188,7 +169,7 @@
>>    if (DataBuffer->getBufferSize() < sizeof(RawHeader))
>>      return error(instrprof_error::bad_header);
>>    auto *Header =
>> -    reinterpret_cast<const RawHeader *>(DataBuffer->getBufferStart());
>> +      reinterpret_cast<const RawHeader
> *>(DataBuffer->getBufferStart());
>>    ShouldSwapBytes = Header->Magic != getRawMagic<IntPtrT>();
>>    return readHeader(*Header);
>>  }
>> @@ -220,9 +201,7 @@
>>    return readHeader(*Header);
>>  }
>>
>> -static uint64_t getRawVersion() {
>> -  return 1;
>> -}
>> +static uint64_t getRawVersion() { return 1; }
>>
>>  template <class IntPtrT>
>>  std::error_code
>> @@ -280,11 +259,10 @@
>>    Record.Hash = swap(Data->FuncHash);
>>    Record.Name = RawName;
>>    if (ShouldSwapBytes) {
>> -    Counts.clear();
>> -    Counts.reserve(RawCounts.size());
>> +    Record.Counts.clear();
>> +    Record.Counts.reserve(RawCounts.size());
>>      for (uint64_t Count : RawCounts)
>> -      Counts.push_back(swap(Count));
>> -    Record.Counts = Counts;
>> +      Record.Counts.push_back(swap(Count));
>>    } else
>>      Record.Counts = RawCounts;
>>
>> @@ -303,6 +281,50 @@
>>    return IndexedInstrProf::ComputeHash(HashType, K);
>>  }
>>
>> +typedef InstrProfLookupTrait::data_type data_type;
>> +typedef InstrProfLookupTrait::offset_type offset_type;
>> +
>> +data_type InstrProfLookupTrait::ReadDataV1V2(StringRef K,
>> +                                             const unsigned char *D,
>> +                                             offset_type N) {
>> +
>> +  // Check if the data is corrupt. If so, don't try to read it.
>> +  if (N % sizeof(uint64_t))
>> +    return data_type();
>> +
>> +  DataBuffer.clear();
>> +  uint64_t NumCounts;
>> +  uint64_t NumEntries = N / sizeof(uint64_t);
>> +  std::vector<uint64_t> CounterBuffer;
>> +  for (uint64_t I = 0; I < NumEntries; I += NumCounts) {
>> +    using namespace support;
>> +    // The function hash comes first.
>> +    uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D);
>> +
>> +    if (++I >= NumEntries)
>> +      return data_type();
>> +
>> +    // In v1, we have at least one count.
>> +    // Later, we have the number of counts.
>> +    NumCounts = (1 == FormatVersion)
>> +                    ? NumEntries - I
>> +                    : endian::readNext<uint64_t, little, unaligned>(D);
>> +    if (1 != FormatVersion)
>> +      ++I;
>> +
>> +    // If we have more counts than data, this is bogus.
>> +    if (I + NumCounts > NumEntries)
>> +      return data_type();
>> +
>> +    CounterBuffer.clear();
>> +    for (unsigned J = 0; J < NumCounts; ++J)
>> +      CounterBuffer.push_back(endian::readNext<uint64_t, little,
> unaligned>(D));
>> +
>> +    DataBuffer.push_back(InstrProfRecord(K, Hash, CounterBuffer));
>> +  }
>> +  return DataBuffer;
>> +}
>> +
>>  bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer)
> {
>>    if (DataBuffer.getBufferSize() < 8)
>>      return false;
>> @@ -342,36 +364,32 @@
>>    uint64_t HashOffset = endian::readNext<uint64_t, little,
> unaligned>(Cur);
>>
>>    // The rest of the file is an on disk hash table.
>> -  Index.reset(InstrProfReaderIndex::Create(Start + HashOffset, Cur,
> Start,
>> -
> InstrProfLookupTrait(HashType)));
>> +  Index.reset(InstrProfReaderIndex::Create(
>> +      Start + HashOffset, Cur, Start,
>> +      InstrProfLookupTrait(HashType, FormatVersion)));
>>    // Set up our iterator for readNextRecord.
>>    RecordIterator = Index->data_begin();
>>
>>    return success();
>>  }
>>
>> -std::error_code IndexedInstrProfReader::getFunctionCounts(
>> -    StringRef FuncName, uint64_t FuncHash, std::vector<uint64_t>
> &Counts) {
>> +std::error_code
>> +IndexedInstrProfReader::getFunctionCounts(StringRef FuncName, uint64_t
> FuncHash,
>> +                                          std::vector<uint64_t>
> &Counts) {
>> +
>>    auto Iter = Index->find(FuncName);
>>    if (Iter == Index->end())
>>      return error(instrprof_error::unknown_function);
>>
>>    // Found it. Look for counters with the right hash.
>> -  ArrayRef<uint64_t> Data = (*Iter).Data;
>> -  uint64_t NumCounts;
>> -  for (uint64_t I = 0, E = Data.size(); I != E; I += NumCounts) {
>> -    // The function hash comes first.
>> -    uint64_t FoundHash = Data[I++];
>> -    // In v1, we have at least one count. Later, we have the number of
> counts.
>> -    if (I == E)
>> -      return error(instrprof_error::malformed);
>> -    NumCounts = FormatVersion == 1 ? E - I : Data[I++];
>> -    // If we have more counts than data, this is bogus.
>> -    if (I + NumCounts > E)
>> -      return error(instrprof_error::malformed);
>> +  ArrayRef<InstrProfRecord> Data = (*Iter);
>> +  if (Data.empty())
>> +    return error(instrprof_error::malformed);
>> +
>> +  for (unsigned I = 0, E = Data.size(); I < E; ++I) {
>>      // Check for a match and fill the vector if there is one.
>> -    if (FoundHash == FuncHash) {
>> -      Counts = Data.slice(I, NumCounts);
>> +    if (Data[I].Hash == FuncHash) {
>> +      Counts = Data[I].Counts;
>>        return success();
>>      }
>>    }
>> @@ -384,30 +402,15 @@
>>    if (RecordIterator == Index->data_end())
>>      return error(instrprof_error::eof);
>>
>> -  // Record the current function name.
>> -  Record.Name = (*RecordIterator).Name;
>> -
>> -  ArrayRef<uint64_t> Data = (*RecordIterator).Data;
>> -  // Valid data starts with a hash and either a count or the number of
> counts.
>> -  if (CurrentOffset + 1 > Data.size())
>> -    return error(instrprof_error::malformed);
>> -  // First we have a function hash.
>> -  Record.Hash = Data[CurrentOffset++];
>> -  // In version 1 we knew the number of counters implicitly, but in
> newer
>> -  // versions we store the number of counters next.
>> -  uint64_t NumCounts =
>> -      FormatVersion == 1 ? Data.size() - CurrentOffset :
> Data[CurrentOffset++];
>> -  if (CurrentOffset + NumCounts > Data.size())
>> +  if ((*RecordIterator).empty())
>>      return error(instrprof_error::malformed);
>> -  // And finally the counts themselves.
>> -  Record.Counts = Data.slice(CurrentOffset, NumCounts);
>>
>> -  // If we've exhausted this function's data, increment the record.
>> -  CurrentOffset += NumCounts;
>> -  if (CurrentOffset == Data.size()) {
>> +  static unsigned RecordIndex = 0;
>> +  ArrayRef<InstrProfRecord> Data = (*RecordIterator);
>> +  Record = Data[RecordIndex++];
>> +  if (RecordIndex >= Data.size()) {
>>      ++RecordIterator;
>> -    CurrentOffset = 0;
>> +    RecordIndex = 0;
>>    }
>> -
>>    return success();
>>  }
>