[PATCH] Value profiling - patchset 2 - merge intended

Mon Jun 22 15:50:08 PDT 2015

> David Blaikie <dblaikie at gmail.com> writes:
>> ================
>> Comment at: include/llvm/ProfileData/InstrProf.h:50-51
>> @@ +49,4 @@
>> +  InstrProfRecord() {}
>> +  InstrProfRecord(StringRef Name, uint64_t Hash, std::vector<uint64_t>
> Counts)
>> +      : Name(Name), Hash(Hash), Counts(std::move(Counts)) {}
>> +  StringRef Name;
>> ----------------
>> Yep, this looks good to me now - thanks!
>>
>> I'll leave it to Justin to review the substance of the patch - I don't
>> have any knowledge here.
>
> LGTM too. I'll commit this for you, but I have one question first:
>
> Betul Buyukkurt <betulb at codeaurora.org> writes:
>> http://reviews.llvm.org/D10493
>>
>> Files:
>>   include/llvm/ProfileData/InstrProf.h
>>   include/llvm/ProfileData/InstrProfReader.h
>>   lib/ProfileData/InstrProfReader.cpp
>>   unittests/ProfileData/InstrProfTest.cpp
>>
>> EMAIL PREFERENCES
>>   http://reviews.llvm.org/settings/panel/emailpreferences/
>> Index: include/llvm/ProfileData/InstrProf.h
>> ===================================================================
>> --- include/llvm/ProfileData/InstrProf.h
>> +++ include/llvm/ProfileData/InstrProf.h
>> @@ -16,7 +16,10 @@
>>  #ifndef LLVM_PROFILEDATA_INSTRPROF_H_
>>  #define LLVM_PROFILEDATA_INSTRPROF_H_
>>
>> +#include "llvm/ADT/StringRef.h"
>> +#include <cstdint>
>>  #include <system_error>
>> +#include <vector>
>>
>>  namespace llvm {
>>  const std::error_category &instrprof_category();
>> @@ -41,6 +44,16 @@
>>    return std::error_code(static_cast<int>(E), instrprof_category());
>>  }
>>
>> +/// Profiling information for a single function.
>> +struct InstrProfRecord {
>> +  InstrProfRecord() {}
>> +  InstrProfRecord(StringRef Name, uint64_t Hash, std::vector<uint64_t>
> Counts)
>> +      : Name(Name), Hash(Hash), Counts(std::move(Counts)) {}
>> +  StringRef Name;
>> +  uint64_t Hash;
>> +  std::vector<uint64_t> Counts;
>> +};
>> +
>>  } // end namespace llvm
>>
>>  namespace std {
>> Index: include/llvm/ProfileData/InstrProfReader.h
>> ===================================================================
>> --- include/llvm/ProfileData/InstrProfReader.h
>> +++ include/llvm/ProfileData/InstrProfReader.h
>> @@ -29,16 +29,6 @@
>>
>>  class InstrProfReader;
>>
>> -/// Profiling information for a single function.
>> -struct InstrProfRecord {
>> -  InstrProfRecord() {}
>> -  InstrProfRecord(StringRef Name, uint64_t Hash, ArrayRef<uint64_t>
> Counts)
>> -      : Name(Name), Hash(Hash), Counts(Counts) {}
>> -  StringRef Name;
>> -  uint64_t Hash;
>> -  ArrayRef<uint64_t> Counts;
>> -};
>> -
>>  /// A file format agnostic iterator over profiling data.
>>  class InstrProfIterator : public std::iterator<std::input_iterator_tag,
>>                                                 InstrProfRecord> {
>> @@ -114,8 +104,6 @@
>>    std::unique_ptr<MemoryBuffer> DataBuffer;
>>    /// Iterator over the profile data.
>>    line_iterator Line;
>> -  /// The current set of counter values.
>> -  std::vector<uint64_t> Counts;
>>
>>    TextInstrProfReader(const TextInstrProfReader &) = delete;
>>    TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
>> @@ -141,8 +129,6 @@
>>  private:
>>    /// The profile data file contents.
>>    std::unique_ptr<MemoryBuffer> DataBuffer;
>> -  /// The current set of counter values.
>> -  std::vector<uint64_t> Counts;
>>    struct ProfileData {
>>      const uint32_t NameSize;
>>      const uint32_t NumCounters;
>> @@ -206,17 +192,16 @@
>>  /// Trait for lookups into the on-disk hash table for the binary
> instrprof
>>  /// format.
>>  class InstrProfLookupTrait {
>> -  std::vector<uint64_t> DataBuffer;
>> +  std::vector<InstrProfRecord> DataBuffer;
>>    IndexedInstrProf::HashT HashType;
>> +  unsigned FormatVersion;
>> +
>>  public:
>> -  InstrProfLookupTrait(IndexedInstrProf::HashT HashType) :
> HashType(HashType) {}
>> +  InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned
> FormatVersion)
>> +      : HashType(HashType), FormatVersion(FormatVersion) {}
>> +
>> +  typedef ArrayRef<InstrProfRecord> data_type;
>>
>> -  struct data_type {
>> -    data_type(StringRef Name, ArrayRef<uint64_t> Data)
>> -        : Name(Name), Data(Data) {}
>> -    StringRef Name;
>> -    ArrayRef<uint64_t> Data;
>> -  };
>>    typedef StringRef internal_key_type;
>>    typedef StringRef external_key_type;
>>    typedef uint64_t hash_value_type;
>> @@ -239,22 +224,9 @@
>>      return StringRef((const char *)D, N);
>>    }
>>
>> -  data_type ReadData(StringRef K, const unsigned char *D, offset_type
> N) {
>> -    DataBuffer.clear();
>> -    if (N % sizeof(uint64_t))
>> -      // The data is corrupt, don't try to read it.
>> -      return data_type("", DataBuffer);
>> -
>> -    using namespace support;
>> -    // We just treat the data as opaque here. It's simpler to handle in
>> -    // IndexedInstrProfReader.
>> -    unsigned NumEntries = N / sizeof(uint64_t);
>> -    DataBuffer.reserve(NumEntries);
>> -    for (unsigned I = 0; I < NumEntries; ++I)
>> -      DataBuffer.push_back(endian::readNext<uint64_t, little,
> unaligned>(D));
>> -    return data_type(K, DataBuffer);
>> -  }
>> +  data_type ReadData(StringRef K, const unsigned char *D, offset_type
> N);
>>  };
>> +
>>  typedef OnDiskIterableChainedHashTable<InstrProfLookupTrait>
>>      InstrProfReaderIndex;
>>
>> @@ -267,8 +239,6 @@
>>    std::unique_ptr<InstrProfReaderIndex> Index;
>>    /// Iterator over the profile data.
>>    InstrProfReaderIndex::data_iterator RecordIterator;
>> -  /// Offset into our current data set.
>> -  size_t CurrentOffset;
>>    /// The file format version of the profile data.
>>    uint64_t FormatVersion;
>>    /// The maximal execution count among all functions.
>> @@ -278,7 +248,7 @@
>>    IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) =
> delete;
>>  public:
>>    IndexedInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
>> -      : DataBuffer(std::move(DataBuffer)), Index(nullptr),
> CurrentOffset(0) {}
>> +      : DataBuffer(std::move(DataBuffer)), Index(nullptr) {}
>>
>>    /// Return true if the given buffer is in an indexed instrprof
> format.
>>    static bool hasFormat(const MemoryBuffer &DataBuffer);
>> Index: lib/ProfileData/InstrProfReader.cpp
>> ===================================================================
>> --- lib/ProfileData/InstrProfReader.cpp
>> +++ lib/ProfileData/InstrProfReader.cpp
>> @@ -15,7 +15,6 @@
>>  #include "llvm/ProfileData/InstrProfReader.h"
>>  #include "InstrProfIndexed.h"
>>  #include "llvm/ADT/STLExtras.h"
>> -#include "llvm/ProfileData/InstrProf.h"
>>  #include <cassert>
>>
>>  using namespace llvm;
>> @@ -126,18 +125,16 @@
>>      return error(instrprof_error::malformed);
>>
>>    // Read each counter and fill our internal storage with the values.
>> -  Counts.clear();
>> -  Counts.reserve(NumCounters);
>> +  Record.Counts.clear();
>> +  Record.Counts.reserve(NumCounters);
>>    for (uint64_t I = 0; I < NumCounters; ++I) {
>>      if (Line.is_at_end())
>>        return error(instrprof_error::truncated);
>>      uint64_t Count;
>>      if ((Line++)->getAsInteger(10, Count))
>>        return error(instrprof_error::malformed);
>> -    Counts.push_back(Count);
>> +    Record.Counts.push_back(Count);
>>    }
>> -  // Give the record a reference to our internal counter storage.
>> -  Record.Counts = Counts;
>>
>>    return success();
>>  }
>> @@ -280,11 +277,10 @@
>>    Record.Hash = swap(Data->FuncHash);
>>    Record.Name = RawName;
>>    if (ShouldSwapBytes) {
>> -    Counts.clear();
>> -    Counts.reserve(RawCounts.size());
>> +    Record.Counts.clear();
>> +    Record.Counts.reserve(RawCounts.size());
>>      for (uint64_t Count : RawCounts)
>> -      Counts.push_back(swap(Count));
>> -    Record.Counts = Counts;
>> +      Record.Counts.push_back(swap(Count));
>>    } else
>>      Record.Counts = RawCounts;
>>
>> @@ -303,6 +299,49 @@
>>    return IndexedInstrProf::ComputeHash(HashType, K);
>>  }
>>
>> +typedef InstrProfLookupTrait::data_type data_type;
>> +typedef InstrProfLookupTrait::offset_type offset_type;
>> +
>> +data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned
> char *D,
>> +                                         offset_type N) {
>> +
>> +  // Check if the data is corrupt. If so, don't try to read it.
>> +  if (N % sizeof(uint64_t))
>> +    return data_type();
>> +
>> +  DataBuffer.clear();
>> +  uint64_t NumCounts;
>> +  uint64_t NumEntries = N / sizeof(uint64_t);
>> +  std::vector<uint64_t> CounterBuffer;
>> +  for (uint64_t I = 0; I < NumEntries; I += NumCounts) {
>> +    using namespace support;
>> +    // The function hash comes first.
>> +    uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D);
>> +
>> +    if (++I >= NumEntries)
>> +      return data_type();
>> +
>> +    // In v1, we have at least one count.
>> +    // Later, we have the number of counts.
>> +    NumCounts = (1 == FormatVersion)
>> +                    ? NumEntries - I
>> +                    : endian::readNext<uint64_t, little, unaligned>(D);
>> +    if (1 != FormatVersion)
>> +      ++I;
>> +
>> +    // If we have more counts than data, this is bogus.
>> +    if (I + NumCounts > NumEntries)
>> +      return data_type();
>> +
>> +    CounterBuffer.clear();
>> +    for (unsigned J = 0; J < NumCounts; ++J)
>> +      CounterBuffer.push_back(endian::readNext<uint64_t, little,
> unaligned>(D));
>> +
>> +    DataBuffer.push_back(InstrProfRecord(K, Hash,
> std::move(CounterBuffer)));
>> +  }
>> +  return DataBuffer;
>> +}
>> +
>>  bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer)
> {
>>    if (DataBuffer.getBufferSize() < 8)
>>      return false;
>> @@ -342,8 +381,9 @@
>>    uint64_t HashOffset = endian::readNext<uint64_t, little,
> unaligned>(Cur);
>>
>>    // The rest of the file is an on disk hash table.
>> -  Index.reset(InstrProfReaderIndex::Create(Start + HashOffset, Cur,
> Start,
>> -
> InstrProfLookupTrait(HashType)));
>> +  Index.reset(InstrProfReaderIndex::Create(
>> +      Start + HashOffset, Cur, Start,
>> +      InstrProfLookupTrait(HashType, FormatVersion)));
>>    // Set up our iterator for readNextRecord.
>>    RecordIterator = Index->data_begin();
>>
>> @@ -357,21 +397,14 @@
>>      return error(instrprof_error::unknown_function);
>>
>>    // Found it. Look for counters with the right hash.
>> -  ArrayRef<uint64_t> Data = (*Iter).Data;
>> -  uint64_t NumCounts;
>> -  for (uint64_t I = 0, E = Data.size(); I != E; I += NumCounts) {
>> -    // The function hash comes first.
>> -    uint64_t FoundHash = Data[I++];
>> -    // In v1, we have at least one count. Later, we have the number of
> counts.
>> -    if (I == E)
>> -      return error(instrprof_error::malformed);
>> -    NumCounts = FormatVersion == 1 ? E - I : Data[I++];
>> -    // If we have more counts than data, this is bogus.
>> -    if (I + NumCounts > E)
>> -      return error(instrprof_error::malformed);
>> +  ArrayRef<InstrProfRecord> Data = (*Iter);
>> +  if (Data.empty())
>> +    return error(instrprof_error::malformed);
>> +
>> +  for (unsigned I = 0, E = Data.size(); I < E; ++I) {
>>      // Check for a match and fill the vector if there is one.
>> -    if (FoundHash == FuncHash) {
>> -      Counts = Data.slice(I, NumCounts);
>> +    if (Data[I].Hash == FuncHash) {
>> +      Counts = Data[I].Counts;
>>        return success();
>>      }
>>    }
>> @@ -384,30 +417,15 @@
>>    if (RecordIterator == Index->data_end())
>>      return error(instrprof_error::eof);
>>
>> -  // Record the current function name.
>> -  Record.Name = (*RecordIterator).Name;
>> -
>> -  ArrayRef<uint64_t> Data = (*RecordIterator).Data;
>> -  // Valid data starts with a hash and either a count or the number of
> counts.
>> -  if (CurrentOffset + 1 > Data.size())
>> -    return error(instrprof_error::malformed);
>> -  // First we have a function hash.
>> -  Record.Hash = Data[CurrentOffset++];
>> -  // In version 1 we knew the number of counters implicitly, but in
> newer
>> -  // versions we store the number of counters next.
>> -  uint64_t NumCounts =
>> -      FormatVersion == 1 ? Data.size() - CurrentOffset :
> Data[CurrentOffset++];
>> -  if (CurrentOffset + NumCounts > Data.size())
>> +  if ((*RecordIterator).empty())
>>      return error(instrprof_error::malformed);
>> -  // And finally the counts themselves.
>> -  Record.Counts = Data.slice(CurrentOffset, NumCounts);
>>
>> -  // If we've exhausted this function's data, increment the record.
>> -  CurrentOffset += NumCounts;
>> -  if (CurrentOffset == Data.size()) {
>> +  static unsigned RecordIndex = 0;
>> +  ArrayRef<InstrProfRecord> Data = (*RecordIterator);
>> +  Record = Data[RecordIndex++];
>> +  if (RecordIndex >= Data.size()) {
>>      ++RecordIterator;
>> -    CurrentOffset = 0;
>> +    RecordIndex = 0;
>>    }
>> -
>>    return success();
>>  }
>> Index: unittests/ProfileData/InstrProfTest.cpp
>> ===================================================================
>> --- unittests/ProfileData/InstrProfTest.cpp
>> +++ unittests/ProfileData/InstrProfTest.cpp
>> @@ -68,6 +68,7 @@
>>
>>  TEST_F(InstrProfTest, get_function_counts) {
>>    Writer.addFunctionCounts("foo", 0x1234, {1, 2});
>> +  Writer.addFunctionCounts("foo", 0x1235, {3, 4});
>>    auto Profile = Writer.writeBuffer();
>>    readProfile(std::move(Profile));
>>
>> @@ -77,6 +78,11 @@
>>    ASSERT_EQ(1U, Counts[0]);
>>    ASSERT_EQ(2U, Counts[1]);
>>
>> +  ASSERT_TRUE(NoError(Reader->getFunctionCounts("foo", 0x1235,
> Counts)));
>> +  ASSERT_EQ(2U, Counts.size());
>> +  ASSERT_EQ(3U, Counts[0]);
>> +  ASSERT_EQ(4U, Counts[1]);
>> +
>
> Is this new test related to this change, or is it part of something else
> that you added by accident? It seems unrelated to me, but maybe I'm
> missing something.

It was intentional. In the indexed readers the previous and current data
structures/code is put together to handle the case of same name different
hash values, having different set of counters. I've seen no tests for it
so I added here.

-Betul

>>    std::error_code EC;
>>    EC = Reader->getFunctionCounts("foo", 0x5678, Counts);
>>    ASSERT_TRUE(ErrorEquals(instrprof_error::hash_mismatch, EC));
>
>