[PATCH] Value profiling - patchset 2 - merge intended
Betul Buyukkurt
betulb at codeaurora.org
Mon Jun 22 15:50:08 PDT 2015
> David Blaikie <dblaikie at gmail.com> writes:
>> ================
>> Comment at: include/llvm/ProfileData/InstrProf.h:50-51
>> @@ +49,4 @@
>> + InstrProfRecord() {}
>> + InstrProfRecord(StringRef Name, uint64_t Hash, std::vector<uint64_t>
> Counts)
>> + : Name(Name), Hash(Hash), Counts(std::move(Counts)) {}
>> + StringRef Name;
>> ----------------
>> Yep, this looks good to me now - thanks!
>>
>> I'll leave it to Justin to review the substance of the patch - I don't
>> have any knowledge here.
>
> LGTM too. I'll commit this for you, but I have one question first:
>
> Betul Buyukkurt <betulb at codeaurora.org> writes:
>> http://reviews.llvm.org/D10493
>>
>> Files:
>> include/llvm/ProfileData/InstrProf.h
>> include/llvm/ProfileData/InstrProfReader.h
>> lib/ProfileData/InstrProfReader.cpp
>> unittests/ProfileData/InstrProfTest.cpp
>>
>> EMAIL PREFERENCES
>> http://reviews.llvm.org/settings/panel/emailpreferences/
>> Index: include/llvm/ProfileData/InstrProf.h
>> ===================================================================
>> --- include/llvm/ProfileData/InstrProf.h
>> +++ include/llvm/ProfileData/InstrProf.h
>> @@ -16,7 +16,10 @@
>> #ifndef LLVM_PROFILEDATA_INSTRPROF_H_
>> #define LLVM_PROFILEDATA_INSTRPROF_H_
>>
>> +#include "llvm/ADT/StringRef.h"
>> +#include <cstdint>
>> #include <system_error>
>> +#include <vector>
>>
>> namespace llvm {
>> const std::error_category &instrprof_category();
>> @@ -41,6 +44,16 @@
>> return std::error_code(static_cast<int>(E), instrprof_category());
>> }
>>
>> +/// Profiling information for a single function.
>> +struct InstrProfRecord {
>> + InstrProfRecord() {}
>> + InstrProfRecord(StringRef Name, uint64_t Hash, std::vector<uint64_t>
> Counts)
>> + : Name(Name), Hash(Hash), Counts(std::move(Counts)) {}
>> + StringRef Name;
>> + uint64_t Hash;
>> + std::vector<uint64_t> Counts;
>> +};
>> +
>> } // end namespace llvm
>>
>> namespace std {
>> Index: include/llvm/ProfileData/InstrProfReader.h
>> ===================================================================
>> --- include/llvm/ProfileData/InstrProfReader.h
>> +++ include/llvm/ProfileData/InstrProfReader.h
>> @@ -29,16 +29,6 @@
>>
>> class InstrProfReader;
>>
>> -/// Profiling information for a single function.
>> -struct InstrProfRecord {
>> - InstrProfRecord() {}
>> - InstrProfRecord(StringRef Name, uint64_t Hash, ArrayRef<uint64_t>
> Counts)
>> - : Name(Name), Hash(Hash), Counts(Counts) {}
>> - StringRef Name;
>> - uint64_t Hash;
>> - ArrayRef<uint64_t> Counts;
>> -};
>> -
>> /// A file format agnostic iterator over profiling data.
>> class InstrProfIterator : public std::iterator<std::input_iterator_tag,
>> InstrProfRecord> {
>> @@ -114,8 +104,6 @@
>> std::unique_ptr<MemoryBuffer> DataBuffer;
>> /// Iterator over the profile data.
>> line_iterator Line;
>> - /// The current set of counter values.
>> - std::vector<uint64_t> Counts;
>>
>> TextInstrProfReader(const TextInstrProfReader &) = delete;
>> TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
>> @@ -141,8 +129,6 @@
>> private:
>> /// The profile data file contents.
>> std::unique_ptr<MemoryBuffer> DataBuffer;
>> - /// The current set of counter values.
>> - std::vector<uint64_t> Counts;
>> struct ProfileData {
>> const uint32_t NameSize;
>> const uint32_t NumCounters;
>> @@ -206,17 +192,16 @@
>> /// Trait for lookups into the on-disk hash table for the binary
> instrprof
>> /// format.
>> class InstrProfLookupTrait {
>> - std::vector<uint64_t> DataBuffer;
>> + std::vector<InstrProfRecord> DataBuffer;
>> IndexedInstrProf::HashT HashType;
>> + unsigned FormatVersion;
>> +
>> public:
>> - InstrProfLookupTrait(IndexedInstrProf::HashT HashType) :
> HashType(HashType) {}
>> + InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned
> FormatVersion)
>> + : HashType(HashType), FormatVersion(FormatVersion) {}
>> +
>> + typedef ArrayRef<InstrProfRecord> data_type;
>>
>> - struct data_type {
>> - data_type(StringRef Name, ArrayRef<uint64_t> Data)
>> - : Name(Name), Data(Data) {}
>> - StringRef Name;
>> - ArrayRef<uint64_t> Data;
>> - };
>> typedef StringRef internal_key_type;
>> typedef StringRef external_key_type;
>> typedef uint64_t hash_value_type;
>> @@ -239,22 +224,9 @@
>> return StringRef((const char *)D, N);
>> }
>>
>> - data_type ReadData(StringRef K, const unsigned char *D, offset_type
> N) {
>> - DataBuffer.clear();
>> - if (N % sizeof(uint64_t))
>> - // The data is corrupt, don't try to read it.
>> - return data_type("", DataBuffer);
>> -
>> - using namespace support;
>> - // We just treat the data as opaque here. It's simpler to handle in
>> - // IndexedInstrProfReader.
>> - unsigned NumEntries = N / sizeof(uint64_t);
>> - DataBuffer.reserve(NumEntries);
>> - for (unsigned I = 0; I < NumEntries; ++I)
>> - DataBuffer.push_back(endian::readNext<uint64_t, little,
> unaligned>(D));
>> - return data_type(K, DataBuffer);
>> - }
>> + data_type ReadData(StringRef K, const unsigned char *D, offset_type
> N);
>> };
>> +
>> typedef OnDiskIterableChainedHashTable<InstrProfLookupTrait>
>> InstrProfReaderIndex;
>>
>> @@ -267,8 +239,6 @@
>> std::unique_ptr<InstrProfReaderIndex> Index;
>> /// Iterator over the profile data.
>> InstrProfReaderIndex::data_iterator RecordIterator;
>> - /// Offset into our current data set.
>> - size_t CurrentOffset;
>> /// The file format version of the profile data.
>> uint64_t FormatVersion;
>> /// The maximal execution count among all functions.
>> @@ -278,7 +248,7 @@
>> IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) =
> delete;
>> public:
>> IndexedInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
>> - : DataBuffer(std::move(DataBuffer)), Index(nullptr),
> CurrentOffset(0) {}
>> + : DataBuffer(std::move(DataBuffer)), Index(nullptr) {}
>>
>> /// Return true if the given buffer is in an indexed instrprof
> format.
>> static bool hasFormat(const MemoryBuffer &DataBuffer);
>> Index: lib/ProfileData/InstrProfReader.cpp
>> ===================================================================
>> --- lib/ProfileData/InstrProfReader.cpp
>> +++ lib/ProfileData/InstrProfReader.cpp
>> @@ -15,7 +15,6 @@
>> #include "llvm/ProfileData/InstrProfReader.h"
>> #include "InstrProfIndexed.h"
>> #include "llvm/ADT/STLExtras.h"
>> -#include "llvm/ProfileData/InstrProf.h"
>> #include <cassert>
>>
>> using namespace llvm;
>> @@ -126,18 +125,16 @@
>> return error(instrprof_error::malformed);
>>
>> // Read each counter and fill our internal storage with the values.
>> - Counts.clear();
>> - Counts.reserve(NumCounters);
>> + Record.Counts.clear();
>> + Record.Counts.reserve(NumCounters);
>> for (uint64_t I = 0; I < NumCounters; ++I) {
>> if (Line.is_at_end())
>> return error(instrprof_error::truncated);
>> uint64_t Count;
>> if ((Line++)->getAsInteger(10, Count))
>> return error(instrprof_error::malformed);
>> - Counts.push_back(Count);
>> + Record.Counts.push_back(Count);
>> }
>> - // Give the record a reference to our internal counter storage.
>> - Record.Counts = Counts;
>>
>> return success();
>> }
>> @@ -280,11 +277,10 @@
>> Record.Hash = swap(Data->FuncHash);
>> Record.Name = RawName;
>> if (ShouldSwapBytes) {
>> - Counts.clear();
>> - Counts.reserve(RawCounts.size());
>> + Record.Counts.clear();
>> + Record.Counts.reserve(RawCounts.size());
>> for (uint64_t Count : RawCounts)
>> - Counts.push_back(swap(Count));
>> - Record.Counts = Counts;
>> + Record.Counts.push_back(swap(Count));
>> } else
>> Record.Counts = RawCounts;
>>
>> @@ -303,6 +299,49 @@
>> return IndexedInstrProf::ComputeHash(HashType, K);
>> }
>>
>> +typedef InstrProfLookupTrait::data_type data_type;
>> +typedef InstrProfLookupTrait::offset_type offset_type;
>> +
>> +data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned
> char *D,
>> + offset_type N) {
>> +
>> + // Check if the data is corrupt. If so, don't try to read it.
>> + if (N % sizeof(uint64_t))
>> + return data_type();
>> +
>> + DataBuffer.clear();
>> + uint64_t NumCounts;
>> + uint64_t NumEntries = N / sizeof(uint64_t);
>> + std::vector<uint64_t> CounterBuffer;
>> + for (uint64_t I = 0; I < NumEntries; I += NumCounts) {
>> + using namespace support;
>> + // The function hash comes first.
>> + uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D);
>> +
>> + if (++I >= NumEntries)
>> + return data_type();
>> +
>> + // In v1, we have at least one count.
>> + // Later, we have the number of counts.
>> + NumCounts = (1 == FormatVersion)
>> + ? NumEntries - I
>> + : endian::readNext<uint64_t, little, unaligned>(D);
>> + if (1 != FormatVersion)
>> + ++I;
>> +
>> + // If we have more counts than data, this is bogus.
>> + if (I + NumCounts > NumEntries)
>> + return data_type();
>> +
>> + CounterBuffer.clear();
>> + for (unsigned J = 0; J < NumCounts; ++J)
>> + CounterBuffer.push_back(endian::readNext<uint64_t, little,
> unaligned>(D));
>> +
>> + DataBuffer.push_back(InstrProfRecord(K, Hash,
> std::move(CounterBuffer)));
>> + }
>> + return DataBuffer;
>> +}
>> +
>> bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer)
> {
>> if (DataBuffer.getBufferSize() < 8)
>> return false;
>> @@ -342,8 +381,9 @@
>> uint64_t HashOffset = endian::readNext<uint64_t, little,
> unaligned>(Cur);
>>
>> // The rest of the file is an on disk hash table.
>> - Index.reset(InstrProfReaderIndex::Create(Start + HashOffset, Cur,
> Start,
>> -
> InstrProfLookupTrait(HashType)));
>> + Index.reset(InstrProfReaderIndex::Create(
>> + Start + HashOffset, Cur, Start,
>> + InstrProfLookupTrait(HashType, FormatVersion)));
>> // Set up our iterator for readNextRecord.
>> RecordIterator = Index->data_begin();
>>
>> @@ -357,21 +397,14 @@
>> return error(instrprof_error::unknown_function);
>>
>> // Found it. Look for counters with the right hash.
>> - ArrayRef<uint64_t> Data = (*Iter).Data;
>> - uint64_t NumCounts;
>> - for (uint64_t I = 0, E = Data.size(); I != E; I += NumCounts) {
>> - // The function hash comes first.
>> - uint64_t FoundHash = Data[I++];
>> - // In v1, we have at least one count. Later, we have the number of
> counts.
>> - if (I == E)
>> - return error(instrprof_error::malformed);
>> - NumCounts = FormatVersion == 1 ? E - I : Data[I++];
>> - // If we have more counts than data, this is bogus.
>> - if (I + NumCounts > E)
>> - return error(instrprof_error::malformed);
>> + ArrayRef<InstrProfRecord> Data = (*Iter);
>> + if (Data.empty())
>> + return error(instrprof_error::malformed);
>> +
>> + for (unsigned I = 0, E = Data.size(); I < E; ++I) {
>> // Check for a match and fill the vector if there is one.
>> - if (FoundHash == FuncHash) {
>> - Counts = Data.slice(I, NumCounts);
>> + if (Data[I].Hash == FuncHash) {
>> + Counts = Data[I].Counts;
>> return success();
>> }
>> }
>> @@ -384,30 +417,15 @@
>> if (RecordIterator == Index->data_end())
>> return error(instrprof_error::eof);
>>
>> - // Record the current function name.
>> - Record.Name = (*RecordIterator).Name;
>> -
>> - ArrayRef<uint64_t> Data = (*RecordIterator).Data;
>> - // Valid data starts with a hash and either a count or the number of
> counts.
>> - if (CurrentOffset + 1 > Data.size())
>> - return error(instrprof_error::malformed);
>> - // First we have a function hash.
>> - Record.Hash = Data[CurrentOffset++];
>> - // In version 1 we knew the number of counters implicitly, but in
> newer
>> - // versions we store the number of counters next.
>> - uint64_t NumCounts =
>> - FormatVersion == 1 ? Data.size() - CurrentOffset :
> Data[CurrentOffset++];
>> - if (CurrentOffset + NumCounts > Data.size())
>> + if ((*RecordIterator).empty())
>> return error(instrprof_error::malformed);
>> - // And finally the counts themselves.
>> - Record.Counts = Data.slice(CurrentOffset, NumCounts);
>>
>> - // If we've exhausted this function's data, increment the record.
>> - CurrentOffset += NumCounts;
>> - if (CurrentOffset == Data.size()) {
>> + static unsigned RecordIndex = 0;
>> + ArrayRef<InstrProfRecord> Data = (*RecordIterator);
>> + Record = Data[RecordIndex++];
>> + if (RecordIndex >= Data.size()) {
>> ++RecordIterator;
>> - CurrentOffset = 0;
>> + RecordIndex = 0;
>> }
>> -
>> return success();
>> }
>> Index: unittests/ProfileData/InstrProfTest.cpp
>> ===================================================================
>> --- unittests/ProfileData/InstrProfTest.cpp
>> +++ unittests/ProfileData/InstrProfTest.cpp
>> @@ -68,6 +68,7 @@
>>
>> TEST_F(InstrProfTest, get_function_counts) {
>> Writer.addFunctionCounts("foo", 0x1234, {1, 2});
>> + Writer.addFunctionCounts("foo", 0x1235, {3, 4});
>> auto Profile = Writer.writeBuffer();
>> readProfile(std::move(Profile));
>>
>> @@ -77,6 +78,11 @@
>> ASSERT_EQ(1U, Counts[0]);
>> ASSERT_EQ(2U, Counts[1]);
>>
>> + ASSERT_TRUE(NoError(Reader->getFunctionCounts("foo", 0x1235,
> Counts)));
>> + ASSERT_EQ(2U, Counts.size());
>> + ASSERT_EQ(3U, Counts[0]);
>> + ASSERT_EQ(4U, Counts[1]);
>> +
>
> Is this new test related to this change, or is it part of something else
> that you added by accident? It seems unrelated to me, but maybe I'm
> missing something.
It was intentional. In the indexed readers the previous and current data
structures/code is put together to handle the case of same name different
hash values, having different set of counters. I've seen no tests for it
so I added here.
-Betul
>> std::error_code EC;
>> EC = Reader->getFunctionCounts("foo", 0x5678, Counts);
>> ASSERT_TRUE(ErrorEquals(instrprof_error::hash_mismatch, EC));
>
>
More information about the llvm-commits
mailing list