[PATCH] Value profiling - patchset 2 - merge intended

Mon Jun 22 17:05:24 PDT 2015

"Betul Buyukkurt" <betulb at codeaurora.org> writes:
>> David Blaikie <dblaikie at gmail.com> writes:
>>> ================
>>> Comment at: include/llvm/ProfileData/InstrProf.h:50-51
>>> @@ +49,4 @@
>>> +  InstrProfRecord() {}
>>> +  InstrProfRecord(StringRef Name, uint64_t Hash, std::vector<uint64_t>
>> Counts)
>>> +      : Name(Name), Hash(Hash), Counts(std::move(Counts)) {}
>>> +  StringRef Name;
>>> ----------------
>>> Yep, this looks good to me now - thanks!
>>>
>>> I'll leave it to Justin to review the substance of the patch - I don't
>>> have any knowledge here.
>>
>> LGTM too. I'll commit this for you, but I have one question first:
>>
>> Betul Buyukkurt <betulb at codeaurora.org> writes:
>>> http://reviews.llvm.org/D10493
>>>
>>> Files:
>>>   include/llvm/ProfileData/InstrProf.h
>>>   include/llvm/ProfileData/InstrProfReader.h
>>>   lib/ProfileData/InstrProfReader.cpp
>>>   unittests/ProfileData/InstrProfTest.cpp
>>>
>>> EMAIL PREFERENCES
>>>   http://reviews.llvm.org/settings/panel/emailpreferences/
>>> Index: include/llvm/ProfileData/InstrProf.h
>>> ===================================================================
>>> --- include/llvm/ProfileData/InstrProf.h
>>> +++ include/llvm/ProfileData/InstrProf.h
>>> @@ -16,7 +16,10 @@
>>>  #ifndef LLVM_PROFILEDATA_INSTRPROF_H_
>>>  #define LLVM_PROFILEDATA_INSTRPROF_H_
>>>
>>> +#include "llvm/ADT/StringRef.h"
>>> +#include <cstdint>
>>>  #include <system_error>
>>> +#include <vector>
>>>
>>>  namespace llvm {
>>>  const std::error_category &instrprof_category();
>>> @@ -41,6 +44,16 @@
>>>    return std::error_code(static_cast<int>(E), instrprof_category());
>>>  }
>>>
>>> +/// Profiling information for a single function.
>>> +struct InstrProfRecord {
>>> +  InstrProfRecord() {}
>>> +  InstrProfRecord(StringRef Name, uint64_t Hash, std::vector<uint64_t>
>> Counts)
>>> +      : Name(Name), Hash(Hash), Counts(std::move(Counts)) {}
>>> +  StringRef Name;
>>> +  uint64_t Hash;
>>> +  std::vector<uint64_t> Counts;
>>> +};
>>> +
>>>  } // end namespace llvm
>>>
>>>  namespace std {
>>> Index: include/llvm/ProfileData/InstrProfReader.h
>>> ===================================================================
>>> --- include/llvm/ProfileData/InstrProfReader.h
>>> +++ include/llvm/ProfileData/InstrProfReader.h
>>> @@ -29,16 +29,6 @@
>>>
>>>  class InstrProfReader;
>>>
>>> -/// Profiling information for a single function.
>>> -struct InstrProfRecord {
>>> -  InstrProfRecord() {}
>>> -  InstrProfRecord(StringRef Name, uint64_t Hash, ArrayRef<uint64_t>
>> Counts)
>>> -      : Name(Name), Hash(Hash), Counts(Counts) {}
>>> -  StringRef Name;
>>> -  uint64_t Hash;
>>> -  ArrayRef<uint64_t> Counts;
>>> -};
>>> -
>>>  /// A file format agnostic iterator over profiling data.
>>>  class InstrProfIterator : public std::iterator<std::input_iterator_tag,
>>>                                                 InstrProfRecord> {
>>> @@ -114,8 +104,6 @@
>>>    std::unique_ptr<MemoryBuffer> DataBuffer;
>>>    /// Iterator over the profile data.
>>>    line_iterator Line;
>>> -  /// The current set of counter values.
>>> -  std::vector<uint64_t> Counts;
>>>
>>>    TextInstrProfReader(const TextInstrProfReader &) = delete;
>>>    TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
>>> @@ -141,8 +129,6 @@
>>>  private:
>>>    /// The profile data file contents.
>>>    std::unique_ptr<MemoryBuffer> DataBuffer;
>>> -  /// The current set of counter values.
>>> -  std::vector<uint64_t> Counts;
>>>    struct ProfileData {
>>>      const uint32_t NameSize;
>>>      const uint32_t NumCounters;
>>> @@ -206,17 +192,16 @@
>>>  /// Trait for lookups into the on-disk hash table for the binary
>> instrprof
>>>  /// format.
>>>  class InstrProfLookupTrait {
>>> -  std::vector<uint64_t> DataBuffer;
>>> +  std::vector<InstrProfRecord> DataBuffer;
>>>    IndexedInstrProf::HashT HashType;
>>> +  unsigned FormatVersion;
>>> +
>>>  public:
>>> -  InstrProfLookupTrait(IndexedInstrProf::HashT HashType) :
>> HashType(HashType) {}
>>> +  InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned
>> FormatVersion)
>>> +      : HashType(HashType), FormatVersion(FormatVersion) {}
>>> +
>>> +  typedef ArrayRef<InstrProfRecord> data_type;
>>>
>>> -  struct data_type {
>>> -    data_type(StringRef Name, ArrayRef<uint64_t> Data)
>>> -        : Name(Name), Data(Data) {}
>>> -    StringRef Name;
>>> -    ArrayRef<uint64_t> Data;
>>> -  };
>>>    typedef StringRef internal_key_type;
>>>    typedef StringRef external_key_type;
>>>    typedef uint64_t hash_value_type;
>>> @@ -239,22 +224,9 @@
>>>      return StringRef((const char *)D, N);
>>>    }
>>>
>>> -  data_type ReadData(StringRef K, const unsigned char *D, offset_type
>> N) {
>>> -    DataBuffer.clear();
>>> -    if (N % sizeof(uint64_t))
>>> -      // The data is corrupt, don't try to read it.
>>> -      return data_type("", DataBuffer);
>>> -
>>> -    using namespace support;
>>> -    // We just treat the data as opaque here. It's simpler to handle in
>>> -    // IndexedInstrProfReader.
>>> -    unsigned NumEntries = N / sizeof(uint64_t);
>>> -    DataBuffer.reserve(NumEntries);
>>> -    for (unsigned I = 0; I < NumEntries; ++I)
>>> -      DataBuffer.push_back(endian::readNext<uint64_t, little,
>> unaligned>(D));
>>> -    return data_type(K, DataBuffer);
>>> -  }
>>> +  data_type ReadData(StringRef K, const unsigned char *D, offset_type
>> N);
>>>  };
>>> +
>>>  typedef OnDiskIterableChainedHashTable<InstrProfLookupTrait>
>>>      InstrProfReaderIndex;
>>>
>>> @@ -267,8 +239,6 @@
>>>    std::unique_ptr<InstrProfReaderIndex> Index;
>>>    /// Iterator over the profile data.
>>>    InstrProfReaderIndex::data_iterator RecordIterator;
>>> -  /// Offset into our current data set.
>>> -  size_t CurrentOffset;
>>>    /// The file format version of the profile data.
>>>    uint64_t FormatVersion;
>>>    /// The maximal execution count among all functions.
>>> @@ -278,7 +248,7 @@
>>>    IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) =
>> delete;
>>>  public:
>>>    IndexedInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
>>> -      : DataBuffer(std::move(DataBuffer)), Index(nullptr),
>> CurrentOffset(0) {}
>>> +      : DataBuffer(std::move(DataBuffer)), Index(nullptr) {}
>>>
>>>    /// Return true if the given buffer is in an indexed instrprof
>> format.
>>>    static bool hasFormat(const MemoryBuffer &DataBuffer);
>>> Index: lib/ProfileData/InstrProfReader.cpp
>>> ===================================================================
>>> --- lib/ProfileData/InstrProfReader.cpp
>>> +++ lib/ProfileData/InstrProfReader.cpp
>>> @@ -15,7 +15,6 @@
>>>  #include "llvm/ProfileData/InstrProfReader.h"
>>>  #include "InstrProfIndexed.h"
>>>  #include "llvm/ADT/STLExtras.h"
>>> -#include "llvm/ProfileData/InstrProf.h"
>>>  #include <cassert>
>>>
>>>  using namespace llvm;
>>> @@ -126,18 +125,16 @@
>>>      return error(instrprof_error::malformed);
>>>
>>>    // Read each counter and fill our internal storage with the values.
>>> -  Counts.clear();
>>> -  Counts.reserve(NumCounters);
>>> +  Record.Counts.clear();
>>> +  Record.Counts.reserve(NumCounters);
>>>    for (uint64_t I = 0; I < NumCounters; ++I) {
>>>      if (Line.is_at_end())
>>>        return error(instrprof_error::truncated);
>>>      uint64_t Count;
>>>      if ((Line++)->getAsInteger(10, Count))
>>>        return error(instrprof_error::malformed);
>>> -    Counts.push_back(Count);
>>> +    Record.Counts.push_back(Count);
>>>    }
>>> -  // Give the record a reference to our internal counter storage.
>>> -  Record.Counts = Counts;
>>>
>>>    return success();
>>>  }
>>> @@ -280,11 +277,10 @@
>>>    Record.Hash = swap(Data->FuncHash);
>>>    Record.Name = RawName;
>>>    if (ShouldSwapBytes) {
>>> -    Counts.clear();
>>> -    Counts.reserve(RawCounts.size());
>>> +    Record.Counts.clear();
>>> +    Record.Counts.reserve(RawCounts.size());
>>>      for (uint64_t Count : RawCounts)
>>> -      Counts.push_back(swap(Count));
>>> -    Record.Counts = Counts;
>>> +      Record.Counts.push_back(swap(Count));
>>>    } else
>>>      Record.Counts = RawCounts;
>>>
>>> @@ -303,6 +299,49 @@
>>>    return IndexedInstrProf::ComputeHash(HashType, K);
>>>  }
>>>
>>> +typedef InstrProfLookupTrait::data_type data_type;
>>> +typedef InstrProfLookupTrait::offset_type offset_type;
>>> +
>>> +data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned
>> char *D,
>>> +                                         offset_type N) {
>>> +
>>> +  // Check if the data is corrupt. If so, don't try to read it.
>>> +  if (N % sizeof(uint64_t))
>>> +    return data_type();
>>> +
>>> +  DataBuffer.clear();
>>> +  uint64_t NumCounts;
>>> +  uint64_t NumEntries = N / sizeof(uint64_t);
>>> +  std::vector<uint64_t> CounterBuffer;
>>> +  for (uint64_t I = 0; I < NumEntries; I += NumCounts) {
>>> +    using namespace support;
>>> +    // The function hash comes first.
>>> +    uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D);
>>> +
>>> +    if (++I >= NumEntries)
>>> +      return data_type();
>>> +
>>> +    // In v1, we have at least one count.
>>> +    // Later, we have the number of counts.
>>> +    NumCounts = (1 == FormatVersion)
>>> +                    ? NumEntries - I
>>> +                    : endian::readNext<uint64_t, little, unaligned>(D);
>>> +    if (1 != FormatVersion)
>>> +      ++I;
>>> +
>>> +    // If we have more counts than data, this is bogus.
>>> +    if (I + NumCounts > NumEntries)
>>> +      return data_type();
>>> +
>>> +    CounterBuffer.clear();
>>> +    for (unsigned J = 0; J < NumCounts; ++J)
>>> +      CounterBuffer.push_back(endian::readNext<uint64_t, little,
>> unaligned>(D));
>>> +
>>> +    DataBuffer.push_back(InstrProfRecord(K, Hash,
>> std::move(CounterBuffer)));
>>> +  }
>>> +  return DataBuffer;
>>> +}
>>> +
>>>  bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer)
>> {
>>>    if (DataBuffer.getBufferSize() < 8)
>>>      return false;
>>> @@ -342,8 +381,9 @@
>>>    uint64_t HashOffset = endian::readNext<uint64_t, little,
>> unaligned>(Cur);
>>>
>>>    // The rest of the file is an on disk hash table.
>>> -  Index.reset(InstrProfReaderIndex::Create(Start + HashOffset, Cur,
>> Start,
>>> -
>> InstrProfLookupTrait(HashType)));
>>> +  Index.reset(InstrProfReaderIndex::Create(
>>> +      Start + HashOffset, Cur, Start,
>>> +      InstrProfLookupTrait(HashType, FormatVersion)));
>>>    // Set up our iterator for readNextRecord.
>>>    RecordIterator = Index->data_begin();
>>>
>>> @@ -357,21 +397,14 @@
>>>      return error(instrprof_error::unknown_function);
>>>
>>>    // Found it. Look for counters with the right hash.
>>> -  ArrayRef<uint64_t> Data = (*Iter).Data;
>>> -  uint64_t NumCounts;
>>> -  for (uint64_t I = 0, E = Data.size(); I != E; I += NumCounts) {
>>> -    // The function hash comes first.
>>> -    uint64_t FoundHash = Data[I++];
>>> -    // In v1, we have at least one count. Later, we have the number of
>> counts.
>>> -    if (I == E)
>>> -      return error(instrprof_error::malformed);
>>> -    NumCounts = FormatVersion == 1 ? E - I : Data[I++];
>>> -    // If we have more counts than data, this is bogus.
>>> -    if (I + NumCounts > E)
>>> -      return error(instrprof_error::malformed);
>>> +  ArrayRef<InstrProfRecord> Data = (*Iter);
>>> +  if (Data.empty())
>>> +    return error(instrprof_error::malformed);
>>> +
>>> +  for (unsigned I = 0, E = Data.size(); I < E; ++I) {
>>>      // Check for a match and fill the vector if there is one.
>>> -    if (FoundHash == FuncHash) {
>>> -      Counts = Data.slice(I, NumCounts);
>>> +    if (Data[I].Hash == FuncHash) {
>>> +      Counts = Data[I].Counts;
>>>        return success();
>>>      }
>>>    }
>>> @@ -384,30 +417,15 @@
>>>    if (RecordIterator == Index->data_end())
>>>      return error(instrprof_error::eof);
>>>
>>> -  // Record the current function name.
>>> -  Record.Name = (*RecordIterator).Name;
>>> -
>>> -  ArrayRef<uint64_t> Data = (*RecordIterator).Data;
>>> -  // Valid data starts with a hash and either a count or the number of
>> counts.
>>> -  if (CurrentOffset + 1 > Data.size())
>>> -    return error(instrprof_error::malformed);
>>> -  // First we have a function hash.
>>> -  Record.Hash = Data[CurrentOffset++];
>>> -  // In version 1 we knew the number of counters implicitly, but in
>> newer
>>> -  // versions we store the number of counters next.
>>> -  uint64_t NumCounts =
>>> -      FormatVersion == 1 ? Data.size() - CurrentOffset :
>> Data[CurrentOffset++];
>>> -  if (CurrentOffset + NumCounts > Data.size())
>>> +  if ((*RecordIterator).empty())
>>>      return error(instrprof_error::malformed);
>>> -  // And finally the counts themselves.
>>> -  Record.Counts = Data.slice(CurrentOffset, NumCounts);
>>>
>>> -  // If we've exhausted this function's data, increment the record.
>>> -  CurrentOffset += NumCounts;
>>> -  if (CurrentOffset == Data.size()) {
>>> +  static unsigned RecordIndex = 0;
>>> +  ArrayRef<InstrProfRecord> Data = (*RecordIterator);
>>> +  Record = Data[RecordIndex++];
>>> +  if (RecordIndex >= Data.size()) {
>>>      ++RecordIterator;
>>> -    CurrentOffset = 0;
>>> +    RecordIndex = 0;
>>>    }
>>> -
>>>    return success();
>>>  }
>>> Index: unittests/ProfileData/InstrProfTest.cpp
>>> ===================================================================
>>> --- unittests/ProfileData/InstrProfTest.cpp
>>> +++ unittests/ProfileData/InstrProfTest.cpp
>>> @@ -68,6 +68,7 @@
>>>
>>>  TEST_F(InstrProfTest, get_function_counts) {
>>>    Writer.addFunctionCounts("foo", 0x1234, {1, 2});
>>> +  Writer.addFunctionCounts("foo", 0x1235, {3, 4});
>>>    auto Profile = Writer.writeBuffer();
>>>    readProfile(std::move(Profile));
>>>
>>> @@ -77,6 +78,11 @@
>>>    ASSERT_EQ(1U, Counts[0]);
>>>    ASSERT_EQ(2U, Counts[1]);
>>>
>>> +  ASSERT_TRUE(NoError(Reader->getFunctionCounts("foo", 0x1235,
>> Counts)));
>>> +  ASSERT_EQ(2U, Counts.size());
>>> +  ASSERT_EQ(3U, Counts[0]);
>>> +  ASSERT_EQ(4U, Counts[1]);
>>> +
>>
>> Is this new test related to this change, or is it part of something else
>> that you added by accident? It seems unrelated to me, but maybe I'm
>> missing something.
>
> It was intentional. In the indexed readers the previous and current data
> structures/code is put together to handle the case of same name different
> hash values, having different set of counters. I've seen no tests for it
> so I added here.

Okay - that should be a separate commit. Including test changes with
code changes makes it look like the code change fixed the test, so it's
harder to tell what's going on if they aren't related.

I've committed the test update for you in r240359, and the rest of the
patch in r240360. Thanks!

>
> -Betul
>
>>>    std::error_code EC;
>>>    EC = Reader->getFunctionCounts("foo", 0x5678, Counts);
>>>    ASSERT_TRUE(ErrorEquals(instrprof_error::hash_mismatch, EC));
>>
>>