[PATCH] Value profiling - patchset 2 - merge intended

Sat Jun 20 15:15:10 PDT 2015

Betul Buyukkurt <betulb at codeaurora.org> writes:
> Included <cstdint> to InstrProf.h
>
>
> http://reviews.llvm.org/D10493
>
> Files:
>   include/llvm/ProfileData/InstrProf.h
>   include/llvm/ProfileData/InstrProfReader.h
>   lib/ProfileData/InstrProfReader.cpp
>
> EMAIL PREFERENCES
>   http://reviews.llvm.org/settings/panel/emailpreferences/
> Index: include/llvm/ProfileData/InstrProf.h
> ===================================================================
> --- include/llvm/ProfileData/InstrProf.h
> +++ include/llvm/ProfileData/InstrProf.h
> @@ -16,7 +16,10 @@
>  #ifndef LLVM_PROFILEDATA_INSTRPROF_H_
>  #define LLVM_PROFILEDATA_INSTRPROF_H_
>  
> +#include "llvm/ADT/StringRef.h"
> +#include <cstdint>
>  #include <system_error>
> +#include <vector>
>  
>  namespace llvm {
>  const std::error_category &instrprof_category();
> @@ -41,6 +44,16 @@
>    return std::error_code(static_cast<int>(E), instrprof_category());
>  }
>  
> +/// Profiling information for a single function.
> +struct InstrProfRecord {
> +  InstrProfRecord() {}
> +  InstrProfRecord(StringRef Name, uint64_t Hash, std::vector<uint64_t> &Counts)

Please use an rvalue reference for Counts.

> +      : Name(Name), Hash(Hash), Counts(std::move(Counts)) {}
> +  StringRef Name;
> +  uint64_t Hash;
> +  std::vector<uint64_t> Counts;
> +};
> +
>  } // end namespace llvm
>  
>  namespace std {
> Index: include/llvm/ProfileData/InstrProfReader.h
> ===================================================================
> --- include/llvm/ProfileData/InstrProfReader.h
> +++ include/llvm/ProfileData/InstrProfReader.h
> @@ -29,16 +29,6 @@
>  
>  class InstrProfReader;
>  
> -/// Profiling information for a single function.
> -struct InstrProfRecord {
> -  InstrProfRecord() {}
> -  InstrProfRecord(StringRef Name, uint64_t Hash, ArrayRef<uint64_t> Counts)
> -      : Name(Name), Hash(Hash), Counts(Counts) {}
> -  StringRef Name;
> -  uint64_t Hash;
> -  ArrayRef<uint64_t> Counts;
> -};
> -
>  /// A file format agnostic iterator over profiling data.
>  class InstrProfIterator : public std::iterator<std::input_iterator_tag,
>                                                 InstrProfRecord> {
> @@ -114,8 +104,6 @@
>    std::unique_ptr<MemoryBuffer> DataBuffer;
>    /// Iterator over the profile data.
>    line_iterator Line;
> -  /// The current set of counter values.
> -  std::vector<uint64_t> Counts;
>  
>    TextInstrProfReader(const TextInstrProfReader &) = delete;
>    TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
> @@ -141,8 +129,6 @@
>  private:
>    /// The profile data file contents.
>    std::unique_ptr<MemoryBuffer> DataBuffer;
> -  /// The current set of counter values.
> -  std::vector<uint64_t> Counts;
>    struct ProfileData {
>      const uint32_t NameSize;
>      const uint32_t NumCounters;
> @@ -206,17 +192,16 @@
>  /// Trait for lookups into the on-disk hash table for the binary instrprof
>  /// format.
>  class InstrProfLookupTrait {
> -  std::vector<uint64_t> DataBuffer;
> +  std::vector<InstrProfRecord> DataBuffer;
>    IndexedInstrProf::HashT HashType;
> +  unsigned FormatVersion;
> +
>  public:
> -  InstrProfLookupTrait(IndexedInstrProf::HashT HashType) : HashType(HashType) {}
> +  InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
> +      : HashType(HashType), FormatVersion(FormatVersion) {}
> +
> +  typedef ArrayRef<InstrProfRecord> data_type;
>  
> -  struct data_type {
> -    data_type(StringRef Name, ArrayRef<uint64_t> Data)
> -        : Name(Name), Data(Data) {}
> -    StringRef Name;
> -    ArrayRef<uint64_t> Data;
> -  };
>    typedef StringRef internal_key_type;
>    typedef StringRef external_key_type;
>    typedef uint64_t hash_value_type;
> @@ -239,22 +224,9 @@
>      return StringRef((const char *)D, N);
>    }
>  
> -  data_type ReadData(StringRef K, const unsigned char *D, offset_type N) {
> -    DataBuffer.clear();
> -    if (N % sizeof(uint64_t))
> -      // The data is corrupt, don't try to read it.
> -      return data_type("", DataBuffer);
> -
> -    using namespace support;
> -    // We just treat the data as opaque here. It's simpler to handle in
> -    // IndexedInstrProfReader.
> -    unsigned NumEntries = N / sizeof(uint64_t);
> -    DataBuffer.reserve(NumEntries);
> -    for (unsigned I = 0; I < NumEntries; ++I)
> -      DataBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D));
> -    return data_type(K, DataBuffer);
> -  }
> +  data_type ReadData(StringRef K, const unsigned char *D, offset_type N);
>  };
> +
>  typedef OnDiskIterableChainedHashTable<InstrProfLookupTrait>
>      InstrProfReaderIndex;
>  
> @@ -267,8 +239,6 @@
>    std::unique_ptr<InstrProfReaderIndex> Index;
>    /// Iterator over the profile data.
>    InstrProfReaderIndex::data_iterator RecordIterator;
> -  /// Offset into our current data set.
> -  size_t CurrentOffset;
>    /// The file format version of the profile data.
>    uint64_t FormatVersion;
>    /// The maximal execution count among all functions.
> @@ -278,7 +248,7 @@
>    IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
>  public:
>    IndexedInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
> -      : DataBuffer(std::move(DataBuffer)), Index(nullptr), CurrentOffset(0) {}
> +      : DataBuffer(std::move(DataBuffer)), Index(nullptr) {}
>  
>    /// Return true if the given buffer is in an indexed instrprof format.
>    static bool hasFormat(const MemoryBuffer &DataBuffer);
> Index: lib/ProfileData/InstrProfReader.cpp
> ===================================================================
> --- lib/ProfileData/InstrProfReader.cpp
> +++ lib/ProfileData/InstrProfReader.cpp
> @@ -15,7 +15,6 @@
>  #include "llvm/ProfileData/InstrProfReader.h"
>  #include "InstrProfIndexed.h"
>  #include "llvm/ADT/STLExtras.h"
> -#include "llvm/ProfileData/InstrProf.h"
>  #include <cassert>
>  
>  using namespace llvm;
> @@ -126,18 +125,16 @@
>      return error(instrprof_error::malformed);
>  
>    // Read each counter and fill our internal storage with the values.
> -  Counts.clear();
> -  Counts.reserve(NumCounters);
> +  Record.Counts.clear();
> +  Record.Counts.reserve(NumCounters);
>    for (uint64_t I = 0; I < NumCounters; ++I) {
>      if (Line.is_at_end())
>        return error(instrprof_error::truncated);
>      uint64_t Count;
>      if ((Line++)->getAsInteger(10, Count))
>        return error(instrprof_error::malformed);
> -    Counts.push_back(Count);
> +    Record.Counts.push_back(Count);
>    }
> -  // Give the record a reference to our internal counter storage.
> -  Record.Counts = Counts;
>  
>    return success();
>  }
> @@ -280,11 +277,10 @@
>    Record.Hash = swap(Data->FuncHash);
>    Record.Name = RawName;
>    if (ShouldSwapBytes) {
> -    Counts.clear();
> -    Counts.reserve(RawCounts.size());
> +    Record.Counts.clear();
> +    Record.Counts.reserve(RawCounts.size());
>      for (uint64_t Count : RawCounts)
> -      Counts.push_back(swap(Count));
> -    Record.Counts = Counts;
> +      Record.Counts.push_back(swap(Count));
>    } else
>      Record.Counts = RawCounts;
>  
> @@ -303,6 +299,49 @@
>    return IndexedInstrProf::ComputeHash(HashType, K);
>  }
>  
> +typedef InstrProfLookupTrait::data_type data_type;
> +typedef InstrProfLookupTrait::offset_type offset_type;
> +
> +data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D,
> +                                         offset_type N) {
> +
> +  // Check if the data is corrupt. If so, don't try to read it.
> +  if (N % sizeof(uint64_t))
> +    return data_type();
> +
> +  DataBuffer.clear();
> +  uint64_t NumCounts;
> +  uint64_t NumEntries = N / sizeof(uint64_t);
> +  std::vector<uint64_t> CounterBuffer;
> +  for (uint64_t I = 0; I < NumEntries; I += NumCounts) {
> +    using namespace support;
> +    // The function hash comes first.
> +    uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D);
> +
> +    if (++I >= NumEntries)
> +      return data_type();
> +
> +    // In v1, we have at least one count.
> +    // Later, we have the number of counts.
> +    NumCounts = (1 == FormatVersion)
> +                    ? NumEntries - I
> +                    : endian::readNext<uint64_t, little, unaligned>(D);
> +    if (1 != FormatVersion)
> +      ++I;
> +
> +    // If we have more counts than data, this is bogus.
> +    if (I + NumCounts > NumEntries)
> +      return data_type();
> +
> +    CounterBuffer.clear();
> +    for (unsigned J = 0; J < NumCounts; ++J)
> +      CounterBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D));
> +
> +    DataBuffer.push_back(InstrProfRecord(K, Hash, CounterBuffer));
> +  }
> +  return DataBuffer;
> +}
> +
>  bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) {
>    if (DataBuffer.getBufferSize() < 8)
>      return false;
> @@ -342,8 +381,9 @@
>    uint64_t HashOffset = endian::readNext<uint64_t, little, unaligned>(Cur);
>  
>    // The rest of the file is an on disk hash table.
> -  Index.reset(InstrProfReaderIndex::Create(Start + HashOffset, Cur, Start,
> -                                           InstrProfLookupTrait(HashType)));
> +  Index.reset(InstrProfReaderIndex::Create(
> +      Start + HashOffset, Cur, Start,
> +      InstrProfLookupTrait(HashType, FormatVersion)));
>    // Set up our iterator for readNextRecord.
>    RecordIterator = Index->data_begin();
>  
> @@ -357,21 +397,14 @@
>      return error(instrprof_error::unknown_function);
>  
>    // Found it. Look for counters with the right hash.
> -  ArrayRef<uint64_t> Data = (*Iter).Data;
> -  uint64_t NumCounts;
> -  for (uint64_t I = 0, E = Data.size(); I != E; I += NumCounts) {
> -    // The function hash comes first.
> -    uint64_t FoundHash = Data[I++];
> -    // In v1, we have at least one count. Later, we have the number of counts.
> -    if (I == E)
> -      return error(instrprof_error::malformed);
> -    NumCounts = FormatVersion == 1 ? E - I : Data[I++];
> -    // If we have more counts than data, this is bogus.
> -    if (I + NumCounts > E)
> -      return error(instrprof_error::malformed);
> +  ArrayRef<InstrProfRecord> Data = (*Iter);
> +  if (Data.empty())
> +    return error(instrprof_error::malformed);
> +
> +  for (unsigned I = 0, E = Data.size(); I < E; ++I) {
>      // Check for a match and fill the vector if there is one.
> -    if (FoundHash == FuncHash) {
> -      Counts = Data.slice(I, NumCounts);
> +    if (Data[I].Hash == FuncHash) {
> +      Counts = Data[I].Counts;
>        return success();
>      }
>    }
> @@ -384,30 +417,15 @@
>    if (RecordIterator == Index->data_end())
>      return error(instrprof_error::eof);
>  
> -  // Record the current function name.
> -  Record.Name = (*RecordIterator).Name;
> -
> -  ArrayRef<uint64_t> Data = (*RecordIterator).Data;
> -  // Valid data starts with a hash and either a count or the number of counts.
> -  if (CurrentOffset + 1 > Data.size())
> -    return error(instrprof_error::malformed);
> -  // First we have a function hash.
> -  Record.Hash = Data[CurrentOffset++];
> -  // In version 1 we knew the number of counters implicitly, but in newer
> -  // versions we store the number of counters next.
> -  uint64_t NumCounts =
> -      FormatVersion == 1 ? Data.size() - CurrentOffset : Data[CurrentOffset++];
> -  if (CurrentOffset + NumCounts > Data.size())
> +  if ((*RecordIterator).empty())
>      return error(instrprof_error::malformed);
> -  // And finally the counts themselves.
> -  Record.Counts = Data.slice(CurrentOffset, NumCounts);
>  
> -  // If we've exhausted this function's data, increment the record.
> -  CurrentOffset += NumCounts;
> -  if (CurrentOffset == Data.size()) {
> +  static unsigned RecordIndex = 0;
> +  ArrayRef<InstrProfRecord> Data = (*RecordIterator);
> +  Record = Data[RecordIndex++];
> +  if (RecordIndex >= Data.size()) {
>      ++RecordIterator;
> -    CurrentOffset = 0;
> +    RecordIndex = 0;
>    }
> -
>    return success();
>  }