[PATCH] Value profiling - patchset 2 - merge intended

Sat Jun 20 21:27:49 PDT 2015

>
>
> On Sat, Jun 20, 2015 at 8:04 PM, Betul Buyukkurt <betulb at codeaurora.org
> <mailto:betulb at codeaurora.org> > wrote:
>
>
>>
>>
>> On Sat, Jun 20, 2015 at 3:15 PM, Justin Bogner <mail at justinbogner.com
>> <mailto:mail at justinbogner.com>
>> <mailto:mail at justinbogner.com <mailto:mail at justinbogner.com> > > wrote:
>>
>>
>> Betul Buyukkurt <betulb at codeaurora.org <mailto:betulb at codeaurora.org>
>> <mailto:betulb at codeaurora.org <mailto:betulb at codeaurora.org> > >
>
>> writes:
>>> Included <cstdint> to InstrProf.h
>>>
>>>
>>> http://reviews.llvm.org/D10493
>>>
>>> Files:
>>>   include/llvm/ProfileData/InstrProf.h
>>>   include/llvm/ProfileData/InstrProfReader.h
>>>   lib/ProfileData/InstrProfReader.cpp
>>>
>>> EMAIL PREFERENCES
>>>   http://reviews.llvm.org/settings/panel/emailpreferences/
>>
>>> Index: include/llvm/ProfileData/InstrProf.h
>>> ===================================================================
>>> --- include/llvm/ProfileData/InstrProf.h
>>> +++ include/llvm/ProfileData/InstrProf.h
>>> @@ -16,7 +16,10 @@
>>>  #ifndef LLVM_PROFILEDATA_INSTRPROF_H_
>>>  #define LLVM_PROFILEDATA_INSTRPROF_H_
>>>
>>> +#include "llvm/ADT/StringRef.h"
>>> +#include <cstdint>
>>>  #include <system_error>
>>> +#include <vector>
>>>
>>>  namespace llvm {
>>>  const std::error_category &instrprof_category();
>>> @@ -41,6 +44,16 @@
>>>    return std::error_code(static_cast<int>(E), instrprof_category());
>>>  }
>>>
>>> +/// Profiling information for a single function.
>>> +struct InstrProfRecord {
>>> +  InstrProfRecord() {}
>>> +  InstrProfRecord(StringRef Name, uint64_t Hash, std::vector<uint64_t>
>>> &Counts)
>>
>> Please use an rvalue reference for Counts.
>>
>>
>>
>> Generally pass-by-value's the right tool in my experience. That way the
>> caller can decide whether to move into the operand or copy, depending on
>> their needs. The callee doesn't have to force the caller one way or
>> another.
>
>
> I totally agree. I did not understand what was meant by pass by value in
> the previous comment. I now understood the concern. I fixed it in my
> latest patch. Thanks for the comments.
>
>
>
> Looks like your current version is passing by rvalue reference:
>
>
> +  InstrProfRecord(StringRef Name, uint64_t Hash, std::vector<uint64_t>
> &&Counts)
> +      : Name(Name), Hash(Hash), Counts(Counts) {}
>
> What I was suggesting was to pass by value:
>
>
> +  InstrProfRecord(StringRef Name, uint64_t Hash, std::vector<uint64_t>
> Counts)
> +      : Name(Name), Hash(Hash), Counts(std::move(Counts)) {}
>
> Works just as well, but lets callers /not/ pass ownership if they don't
> want
> to.

Then what I understood from your first comment was correct. I purposefully
wanted to avoid this to prevent copying of data around.

> Oh, and actually you forgot the std::move in the ctor init list (perhaps
> that's what you thought I meant by pass by value? Sorry for the
> confusion/lack of clarity)

I'm new to the rvalue references. I do think Counts(Counts) knowing what
it received is an rvalue reference would use the
"vector(vector&& other)" constructor, would it not?

>
>
> -Betul
>
>
>>
>>
>>> +      : Name(Name), Hash(Hash), Counts(std::move(Counts)) {}
>>> +  StringRef Name;
>>> +  uint64_t Hash;
>>> +  std::vector<uint64_t> Counts;
>>> +};
>>> +
>>>  } // end namespace llvm
>>>
>>>  namespace std {
>>> Index: include/llvm/ProfileData/InstrProfReader.h
>>> ===================================================================
>>> --- include/llvm/ProfileData/InstrProfReader.h
>>> +++ include/llvm/ProfileData/InstrProfReader.h
>>> @@ -29,16 +29,6 @@
>>>
>>>  class InstrProfReader;
>>>
>>> -/// Profiling information for a single function.
>>> -struct InstrProfRecord {
>>> -  InstrProfRecord() {}
>>> -  InstrProfRecord(StringRef Name, uint64_t Hash, ArrayRef<uint64_t>
>>> Counts)
>>> -      : Name(Name), Hash(Hash), Counts(Counts) {}
>>> -  StringRef Name;
>>> -  uint64_t Hash;
>>> -  ArrayRef<uint64_t> Counts;
>>> -};
>>> -
>>>  /// A file format agnostic iterator over profiling data.
>>>  class InstrProfIterator : public
>>> std::iterator<std::input_iterator_tag,
>>>                                                 InstrProfRecord> {
>>> @@ -114,8 +104,6 @@
>>>    std::unique_ptr<MemoryBuffer> DataBuffer;
>>>    /// Iterator over the profile data.
>>>    line_iterator Line;
>>> -  /// The current set of counter values.
>>> -  std::vector<uint64_t> Counts;
>>>
>>>    TextInstrProfReader(const TextInstrProfReader &) = delete;
>>>    TextInstrProfReader &operator=(const TextInstrProfReader &) =
>>> delete;
>>> @@ -141,8 +129,6 @@
>>>  private:
>>>    /// The profile data file contents.
>>>    std::unique_ptr<MemoryBuffer> DataBuffer;
>>> -  /// The current set of counter values.
>>> -  std::vector<uint64_t> Counts;
>>>    struct ProfileData {
>>>      const uint32_t NameSize;
>>>      const uint32_t NumCounters;
>>
>>> @@ -206,17 +192,16 @@
>>>  /// Trait for lookups into the on-disk hash table for the binary
>>> instrprof
>>>  /// format.
>>>  class InstrProfLookupTrait {
>>> -  std::vector<uint64_t> DataBuffer;
>>> +  std::vector<InstrProfRecord> DataBuffer;
>>>    IndexedInstrProf::HashT HashType;
>>> +  unsigned FormatVersion;
>>> +
>>>  public:
>>> -  InstrProfLookupTrait(IndexedInstrProf::HashT HashType) :
>>> HashType(HashType) {}
>>> +  InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned
>>> FormatVersion)
>>> +      : HashType(HashType), FormatVersion(FormatVersion) {}
>>> +
>>> +  typedef ArrayRef<InstrProfRecord> data_type;
>>>
>>> -  struct data_type {
>>> -    data_type(StringRef Name, ArrayRef<uint64_t> Data)
>>> -        : Name(Name), Data(Data) {}
>>> -    StringRef Name;
>>> -    ArrayRef<uint64_t> Data;
>>> -  };
>>>    typedef StringRef internal_key_type;
>>>    typedef StringRef external_key_type;
>>>    typedef uint64_t hash_value_type;
>>> @@ -239,22 +224,9 @@
>>>      return StringRef((const char *)D, N);
>>>    }
>>>
>>> -  data_type ReadData(StringRef K, const unsigned char *D, offset_type
>>> N)
>>> {
>>> -    DataBuffer.clear();
>>> -    if (N % sizeof(uint64_t))
>>> -      // The data is corrupt, don't try to read it.
>>> -      return data_type("", DataBuffer);
>>> -
>>> -    using namespace support;
>>> -    // We just treat the data as opaque here. It's simpler to handle
>>> in
>>> -    // IndexedInstrProfReader.
>>> -    unsigned NumEntries = N / sizeof(uint64_t);
>>> -    DataBuffer.reserve(NumEntries);
>>> -    for (unsigned I = 0; I < NumEntries; ++I)
>>> -      DataBuffer.push_back(endian::readNext<uint64_t, little,
>>> unaligned>(D));
>>> -    return data_type(K, DataBuffer);
>>> -  }
>>> +  data_type ReadData(StringRef K, const unsigned char *D, offset_type
>>> N);
>>>  };
>>> +
>>>  typedef OnDiskIterableChainedHashTable<InstrProfLookupTrait>
>>>      InstrProfReaderIndex;
>>>
>>> @@ -267,8 +239,6 @@
>>>    std::unique_ptr<InstrProfReaderIndex> Index;
>>>    /// Iterator over the profile data.
>>>    InstrProfReaderIndex::data_iterator RecordIterator;
>>> -  /// Offset into our current data set.
>>> -  size_t CurrentOffset;
>>>    /// The file format version of the profile data.
>>>    uint64_t FormatVersion;
>>>    /// The maximal execution count among all functions.
>>> @@ -278,7 +248,7 @@
>>
>>>    IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) =
>>> delete;
>>>  public:
>>>    IndexedInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
>>> -      : DataBuffer(std::move(DataBuffer)), Index(nullptr),
>>> CurrentOffset(0) {}
>>> +      : DataBuffer(std::move(DataBuffer)), Index(nullptr) {}
>>>
>>>    /// Return true if the given buffer is in an indexed instrprof
>>> format.
>>>    static bool hasFormat(const MemoryBuffer &DataBuffer);
>>> Index: lib/ProfileData/InstrProfReader.cpp
>>> ===================================================================
>>> --- lib/ProfileData/InstrProfReader.cpp
>>> +++ lib/ProfileData/InstrProfReader.cpp
>>> @@ -15,7 +15,6 @@
>>>  #include "llvm/ProfileData/InstrProfReader.h"
>>>  #include "InstrProfIndexed.h"
>>>  #include "llvm/ADT/STLExtras.h"
>>> -#include "llvm/ProfileData/InstrProf.h"
>>>  #include <cassert>
>>>
>>>  using namespace llvm;
>>> @@ -126,18 +125,16 @@
>>>      return error(instrprof_error::malformed);
>>>
>>>    // Read each counter and fill our internal storage with the values.
>>> -  Counts.clear();
>>> -  Counts.reserve(NumCounters);
>>> +  Record.Counts.clear();
>>> +  Record.Counts.reserve(NumCounters);
>>>    for (uint64_t I = 0; I < NumCounters; ++I) {
>>>      if (Line.is_at_end())
>>>        return error(instrprof_error::truncated);
>>>      uint64_t Count;
>>>      if ((Line++)->getAsInteger(10, Count))
>>>        return error(instrprof_error::malformed);
>>> -    Counts.push_back(Count);
>>> +    Record.Counts.push_back(Count);
>>>    }
>>> -  // Give the record a reference to our internal counter storage.
>>> -  Record.Counts = Counts;
>>>
>>>    return success();
>>>  }
>>> @@ -280,11 +277,10 @@
>>>    Record.Hash = swap(Data->FuncHash);
>>>    Record.Name = RawName;
>>>    if (ShouldSwapBytes) {
>>> -    Counts.clear();
>>> -    Counts.reserve(RawCounts.size());
>>> +    Record.Counts.clear();
>>> +    Record.Counts.reserve(RawCounts.size());
>>>      for (uint64_t Count : RawCounts)
>>> -      Counts.push_back(swap(Count));
>>> -    Record.Counts = Counts;
>>> +      Record.Counts.push_back(swap(Count));
>>>    } else
>>>      Record.Counts = RawCounts;
>>>
>>
>>> @@ -303,6 +299,49 @@
>>>    return IndexedInstrProf::ComputeHash(HashType, K);
>>>  }
>>>
>>> +typedef InstrProfLookupTrait::data_type data_type;
>>> +typedef InstrProfLookupTrait::offset_type offset_type;
>>> +
>>> +data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned
>>> char
>>> *D,
>>
>>> +                                         offset_type N) {
>>> +
>>> +  // Check if the data is corrupt. If so, don't try to read it.
>>> +  if (N % sizeof(uint64_t))
>>> +    return data_type();
>>> +
>>> +  DataBuffer.clear();
>>> +  uint64_t NumCounts;
>>> +  uint64_t NumEntries = N / sizeof(uint64_t);
>>> +  std::vector<uint64_t> CounterBuffer;
>>> +  for (uint64_t I = 0; I < NumEntries; I += NumCounts) {
>>> +    using namespace support;
>>> +    // The function hash comes first.
>>> +    uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D);
>>> +
>>> +    if (++I >= NumEntries)
>>> +      return data_type();
>>> +
>>> +    // In v1, we have at least one count.
>>> +    // Later, we have the number of counts.
>>> +    NumCounts = (1 == FormatVersion)
>>> +                    ? NumEntries - I
>>> +                    : endian::readNext<uint64_t, little,
>>> unaligned>(D);
>>> +    if (1 != FormatVersion)
>>> +      ++I;
>>> +
>>> +    // If we have more counts than data, this is bogus.
>>> +    if (I + NumCounts > NumEntries)
>>> +      return data_type();
>>> +
>>> +    CounterBuffer.clear();
>>> +    for (unsigned J = 0; J < NumCounts; ++J)
>>> +      CounterBuffer.push_back(endian::readNext<uint64_t, little,
>>> unaligned>(D));
>>> +
>>> +    DataBuffer.push_back(InstrProfRecord(K, Hash, CounterBuffer));
>>> +  }
>>> +  return DataBuffer;
>>> +}
>>> +
>>>  bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer)
>>> {
>>>    if (DataBuffer.getBufferSize() < 8)
>>>      return false;
>>
>>> @@ -342,8 +381,9 @@
>>>    uint64_t HashOffset = endian::readNext<uint64_t, little,
>>> unaligned>(Cur);
>>>
>>>    // The rest of the file is an on disk hash table.
>>> -  Index.reset(InstrProfReaderIndex::Create(Start + HashOffset, Cur,
>>> Start,
>>> -
>>> InstrProfLookupTrait(HashType)));
>>> +  Index.reset(InstrProfReaderIndex::Create(
>>> +      Start + HashOffset, Cur, Start,
>>> +      InstrProfLookupTrait(HashType, FormatVersion)));
>>>    // Set up our iterator for readNextRecord.
>>>    RecordIterator = Index->data_begin();
>>>
>>> @@ -357,21 +397,14 @@
>>
>>>      return error(instrprof_error::unknown_function);
>>>
>>>    // Found it. Look for counters with the right hash.
>>> -  ArrayRef<uint64_t> Data = (*Iter).Data;
>>> -  uint64_t NumCounts;
>>> -  for (uint64_t I = 0, E = Data.size(); I != E; I += NumCounts) {
>>> -    // The function hash comes first.
>>> -    uint64_t FoundHash = Data[I++];
>>> -    // In v1, we have at least one count. Later, we have the number of
>>> counts.
>>> -    if (I == E)
>>> -      return error(instrprof_error::malformed);
>>> -    NumCounts = FormatVersion == 1 ? E - I : Data[I++];
>>> -    // If we have more counts than data, this is bogus.
>>> -    if (I + NumCounts > E)
>>> -      return error(instrprof_error::malformed);
>>> +  ArrayRef<InstrProfRecord> Data = (*Iter);
>>> +  if (Data.empty())
>>> +    return error(instrprof_error::malformed);
>>> +
>>> +  for (unsigned I = 0, E = Data.size(); I < E; ++I) {
>>>      // Check for a match and fill the vector if there is one.
>>> -    if (FoundHash == FuncHash) {
>>> -      Counts = Data.slice(I, NumCounts);
>>> +    if (Data[I].Hash == FuncHash) {
>>> +      Counts = Data[I].Counts;
>>>        return success();
>>>      }
>>>    }
>>
>>> @@ -384,30 +417,15 @@
>>
>>>    if (RecordIterator == Index->data_end())
>>>      return error(instrprof_error::eof);
>>>
>>> -  // Record the current function name.
>>> -  Record.Name = (*RecordIterator).Name;
>>> -
>>> -  ArrayRef<uint64_t> Data = (*RecordIterator).Data;
>>> -  // Valid data starts with a hash and either a count or the number of
>>> counts.
>>> -  if (CurrentOffset + 1 > Data.size())
>>> -    return error(instrprof_error::malformed);
>>> -  // First we have a function hash.
>>> -  Record.Hash = Data[CurrentOffset++];
>>> -  // In version 1 we knew the number of counters implicitly, but in
>>> newer
>>> -  // versions we store the number of counters next.
>>> -  uint64_t NumCounts =
>>> -      FormatVersion == 1 ? Data.size() - CurrentOffset :
>>> Data[CurrentOffset++];
>>> -  if (CurrentOffset + NumCounts > Data.size())
>>> +  if ((*RecordIterator).empty())
>>>      return error(instrprof_error::malformed);
>>> -  // And finally the counts themselves.
>>> -  Record.Counts = Data.slice(CurrentOffset, NumCounts);
>>>
>>> -  // If we've exhausted this function's data, increment the record.
>>> -  CurrentOffset += NumCounts;
>>> -  if (CurrentOffset == Data.size()) {
>>> +  static unsigned RecordIndex = 0;
>>> +  ArrayRef<InstrProfRecord> Data = (*RecordIterator);
>>> +  Record = Data[RecordIndex++];
>>> +  if (RecordIndex >= Data.size()) {
>>>      ++RecordIterator;
>>> -    CurrentOffset = 0;
>>> +    RecordIndex = 0;
>>>    }
>>> -
>>>    return success();
>>>  }
>>
>> _______________________________________________
>> llvm-commits mailing list
>
>> llvm-commits at cs.uiuc.edu <mailto:llvm-commits at cs.uiuc.edu>
>> <mailto:llvm-commits at cs.uiuc.edu <mailto:llvm-commits at cs.uiuc.edu> >
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>>
>>
>>
>>
>
>
>
>
>
>