[PATCH] D10674: Value profiling - patchset 3

Justin Bogner via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 23 15:15:31 PDT 2015


Betul Buyukkurt <betulb at codeaurora.org> writes:
> betulb updated this revision to Diff 35117.
> betulb added a comment.
>
> Review comments addressed:
>
> - Removed the ValuesSites[instrprof_value_kind] array and made
> individual value site vectors for different kinds. Currently only one
> value site vector is present due to single value kind is valid.
> - Added const std::vector<InstrProfValueSiteRecord>&
> getValueSitesForKind(uint32_t ValueKind) const; and
> std::vector<InstrProfValueSiteRecord>& getValueSitesForKind(uint32_t
> ValueKind); methods to retrieve the value site vector reference for a
> given kind.
> - Removed the HashKeyMap member from the indexed reader.

This is basically good now. There are a few really minor things left,
then I think this can go in.

>
> http://reviews.llvm.org/D10674
>
> Files:
>   include/llvm/ProfileData/InstrProf.h
>   include/llvm/ProfileData/InstrProfReader.h
>   include/llvm/ProfileData/InstrProfWriter.h
>   lib/ProfileData/InstrProf.cpp
>   lib/ProfileData/InstrProfIndexed.h
>   lib/ProfileData/InstrProfReader.cpp
>   lib/ProfileData/InstrProfWriter.cpp
>   tools/llvm-profdata/llvm-profdata.cpp
>   unittests/ProfileData/CoverageMappingTest.cpp
>   unittests/ProfileData/InstrProfTest.cpp
>
>
> Index: unittests/ProfileData/InstrProfTest.cpp
> ===================================================================
> --- unittests/ProfileData/InstrProfTest.cpp
> +++ unittests/ProfileData/InstrProfTest.cpp
> @@ -50,7 +50,8 @@
>  }
>  
>  TEST_F(InstrProfTest, write_and_read_one_function) {
> -  Writer.addFunctionCounts("foo", 0x1234, {1, 2, 3, 4});
> +  InstrProfRecord Record("foo", 0x1234, {1, 2, 3, 4});
> +  Writer.addRecord(std::move(Record));
>    auto Profile = Writer.writeBuffer();
>    readProfile(std::move(Profile));
>  
> @@ -67,8 +68,10 @@
>  }
>  
>  TEST_F(InstrProfTest, get_function_counts) {
> -  Writer.addFunctionCounts("foo", 0x1234, {1, 2});
> -  Writer.addFunctionCounts("foo", 0x1235, {3, 4});
> +  InstrProfRecord Record1("foo", 0x1234, {1, 2});
> +  InstrProfRecord Record2("foo", 0x1235, {3, 4});
> +  Writer.addRecord(std::move(Record1));
> +  Writer.addRecord(std::move(Record2));
>    auto Profile = Writer.writeBuffer();
>    readProfile(std::move(Profile));
>  
> @@ -92,9 +95,12 @@
>  }
>  
>  TEST_F(InstrProfTest, get_max_function_count) {
> -  Writer.addFunctionCounts("foo", 0x1234, {1ULL << 31, 2});
> -  Writer.addFunctionCounts("bar", 0, {1ULL << 63});
> -  Writer.addFunctionCounts("baz", 0x5678, {0, 0, 0, 0});
> +  InstrProfRecord Record1("foo", 0x1234, {1ULL << 31, 2});
> +  InstrProfRecord Record2("bar", 0, {1ULL << 63});
> +  InstrProfRecord Record3("baz", 0x5678, {0, 0, 0, 0});
> +  Writer.addRecord(std::move(Record1));
> +  Writer.addRecord(std::move(Record2));
> +  Writer.addRecord(std::move(Record3));
>    auto Profile = Writer.writeBuffer();
>    readProfile(std::move(Profile));
>  
> Index: unittests/ProfileData/CoverageMappingTest.cpp
> ===================================================================
> --- unittests/ProfileData/CoverageMappingTest.cpp
> +++ unittests/ProfileData/CoverageMappingTest.cpp
> @@ -188,7 +188,8 @@
>  }
>  
>  TEST_F(CoverageMappingTest, basic_coverage_iteration) {
> -  ProfileWriter.addFunctionCounts("func", 0x1234, {30, 20, 10, 0});
> +  InstrProfRecord Record("func", 0x1234, {30, 20, 10, 0});
> +  ProfileWriter.addRecord(std::move(Record));
>    readProfCounts();
>  
>    addCMR(Counter::getCounter(0), "file1", 1, 1, 9, 9);
> @@ -238,7 +239,8 @@
>  }
>  
>  TEST_F(CoverageMappingTest, combine_regions) {
> -  ProfileWriter.addFunctionCounts("func", 0x1234, {10, 20, 30});
> +  InstrProfRecord Record("func", 0x1234, {10, 20, 30});
> +  ProfileWriter.addRecord(std::move(Record));
>    readProfCounts();
>  
>    addCMR(Counter::getCounter(0), "file1", 1, 1, 9, 9);
> @@ -256,7 +258,8 @@
>  }
>  
>  TEST_F(CoverageMappingTest, dont_combine_expansions) {
> -  ProfileWriter.addFunctionCounts("func", 0x1234, {10, 20});
> +  InstrProfRecord Record("func", 0x1234, {10, 20});
> +  ProfileWriter.addRecord(std::move(Record));
>    readProfCounts();
>  
>    addCMR(Counter::getCounter(0), "file1", 1, 1, 9, 9);
> @@ -275,7 +278,8 @@
>  }
>  
>  TEST_F(CoverageMappingTest, strip_filename_prefix) {
> -  ProfileWriter.addFunctionCounts("file1:func", 0x1234, {10});
> +  InstrProfRecord Record("file1:func", 0x1234, {10});
> +  ProfileWriter.addRecord(std::move(Record));
>    readProfCounts();
>  
>    addCMR(Counter::getCounter(0), "file1", 1, 1, 9, 9);
> Index: tools/llvm-profdata/llvm-profdata.cpp
> ===================================================================
> --- tools/llvm-profdata/llvm-profdata.cpp
> +++ tools/llvm-profdata/llvm-profdata.cpp
> @@ -58,9 +58,8 @@
>        exitWithError(ec.message(), Filename);
>  
>      auto Reader = std::move(ReaderOrErr.get());
> -    for (const auto &I : *Reader)
> -      if (std::error_code EC =
> -              Writer.addFunctionCounts(I.Name, I.Hash, I.Counts))
> +    for (auto &I : *Reader)
> +      if (std::error_code EC = Writer.addRecord(std::move(I)))
>          errs() << Filename << ": " << I.Name << ": " << EC.message() << "\n";
>      if (Reader->hasError())
>        exitWithError(Reader->getError().message(), Filename);
> @@ -134,8 +133,8 @@
>  }
>  
>  static int showInstrProfile(std::string Filename, bool ShowCounts,
> -                            bool ShowAllFunctions, std::string ShowFunction,
> -                            raw_fd_ostream &OS) {
> +                            bool ShowIndirectCallTargets, bool ShowAllFunctions,
> +                            std::string ShowFunction, raw_fd_ostream &OS) {
>    auto ReaderOrErr = InstrProfReader::create(Filename);
>    if (std::error_code EC = ReaderOrErr.getError())
>      exitWithError(EC.message(), Filename);
> @@ -162,6 +161,10 @@
>           << "    Hash: " << format("0x%016" PRIx64, Func.Hash) << "\n"
>           << "    Counters: " << Func.Counts.size() << "\n"
>           << "    Function count: " << Func.Counts[0] << "\n";
> +      if (ShowIndirectCallTargets)
> +        OS << "    Indirect Call Site Count: "
> +           << Func.IndirectCallSites.size()
> +           << "\n";
>      }
>  
>      if (Show && ShowCounts)
> @@ -174,6 +177,16 @@
>      }
>      if (Show && ShowCounts)
>        OS << "]\n";
> +
> +    if (Show && ShowIndirectCallTargets) {
> +      OS << "    Indirect Target Results: \n";
> +      for (size_t I = 0, E = Func.IndirectCallSites.size(); I < E; ++I) {
> +        for (auto V : Func.IndirectCallSites[I].ValueData) {
> +          OS << "\t[ " << I << ", ";
> +          OS << (const char *)V.first << ", " << V.second << " ]\n";
> +        }
> +      }
> +    }
>    }
>    if (Reader->hasError())
>      exitWithError(Reader->getError().message(), Filename);
> @@ -212,6 +225,8 @@
>  
>    cl::opt<bool> ShowCounts("counts", cl::init(false),
>                             cl::desc("Show counter values for shown functions"));
> +  cl::opt<bool> ShowIndirectCallTargets("ic-targets", cl::init(false),
> +      cl::desc("Show indirect call site target values for shown functions"));
>    cl::opt<bool> ShowAllFunctions("all-functions", cl::init(false),
>                                   cl::desc("Details for every function"));
>    cl::opt<std::string> ShowFunction("function",
> @@ -240,8 +255,8 @@
>      errs() << "warning: -function argument ignored: showing all functions\n";
>  
>    if (ProfileKind == instr)
> -    return showInstrProfile(Filename, ShowCounts, ShowAllFunctions,
> -                            ShowFunction, OS);
> +    return showInstrProfile(Filename, ShowCounts, ShowIndirectCallTargets,
> +                            ShowAllFunctions, ShowFunction, OS);
>    else
>      return showSampleProfile(Filename, ShowCounts, ShowAllFunctions,
>                               ShowFunction, OS);
> Index: lib/ProfileData/InstrProfWriter.cpp
> ===================================================================
> --- lib/ProfileData/InstrProfWriter.cpp
> +++ lib/ProfileData/InstrProfWriter.cpp
> @@ -26,8 +26,8 @@
>    typedef StringRef key_type;
>    typedef StringRef key_type_ref;
>  
> -  typedef const InstrProfWriter::CounterData *const data_type;
> -  typedef const InstrProfWriter::CounterData *const data_type_ref;
> +  typedef const InstrProfWriter::ProfilingData *const data_type;
> +  typedef const InstrProfWriter::ProfilingData *const data_type_ref;
>  
>    typedef uint64_t hash_value_type;
>    typedef uint64_t offset_type;
> @@ -45,8 +45,27 @@
>      LE.write<offset_type>(N);
>  
>      offset_type M = 0;
> -    for (const auto &Counts : *V)
> -      M += (2 + Counts.second.size()) * sizeof(uint64_t);
> +    for (const auto &ProfileData : *V) {
> +      M += sizeof(uint64_t); // The function hash
> +      M += sizeof(uint64_t); // The size of the Counts vector
> +      M += ProfileData.second.Counts.size() * sizeof(uint64_t);
> +
> +      // Value data
> +      M += sizeof(uint64_t); // Number of value kinds with value sites.
> +      for (uint32_t Kind = instrprof_value_kind::first;
> +           Kind < instrprof_value_kind::size; ++Kind) {
> +        const std::vector<InstrProfValueSiteRecord> &ValueSites =
> +            ProfileData.second.getValueSitesForKind(Kind);
> +        if (ValueSites.empty())
> +          continue;
> +        M += sizeof(uint64_t); // Value kind
> +        M += sizeof(uint64_t); // The number of value sites for given value kind
> +        for (InstrProfValueSiteRecord I : ValueSites) {
> +          M += sizeof(uint64_t); // Number of value data pairs at a value site
> +          M += 2 * sizeof(uint64_t) * I.ValueData.size(); // Value data pairs
> +        }
> +      }
> +    }
>      LE.write<offset_type>(M);
>  
>      return std::make_pair(N, M);
> @@ -60,52 +79,119 @@
>                         offset_type) {
>      using namespace llvm::support;
>      endian::Writer<little> LE(Out);
> -
> -    for (const auto &Counts : *V) {
> -      LE.write<uint64_t>(Counts.first);
> -      LE.write<uint64_t>(Counts.second.size());
> -      for (uint64_t I : Counts.second)
> +    for (const auto &ProfileData : *V) {
> +      LE.write<uint64_t>(ProfileData.first); // Function hash
> +      LE.write<uint64_t>(ProfileData.second.Counts.size());
> +      for (uint64_t I : ProfileData.second.Counts)
>          LE.write<uint64_t>(I);
> +
> +      // Compute the number of value kinds with value sites.
> +      uint64_t NumValueKinds = 0;
> +      for (uint32_t Kind = instrprof_value_kind::first;
> +           Kind < instrprof_value_kind::size; ++Kind)
> +        NumValueKinds +=
> +            !(ProfileData.second.getValueSitesForKind(Kind).empty());
> +      LE.write<uint64_t>(NumValueKinds);
> +
> +      // Write value data
> +      for (uint32_t Kind = instrprof_value_kind::first;
> +           Kind < instrprof_value_kind::size; ++Kind) {
> +        const std::vector<InstrProfValueSiteRecord> &ValueSites =
> +            ProfileData.second.getValueSitesForKind(Kind);
> +        if (ValueSites.empty())
> +          continue;
> +        LE.write<uint64_t>(Kind); // Write value kind
> +        // Write number of value sites for current value kind
> +        LE.write<uint64_t>(ValueSites.size());
> +        for (InstrProfValueSiteRecord I : ValueSites) {
> +          // Write number of value data pairs at this value site
> +          LE.write<uint64_t>(I.ValueData.size());
> +          for (auto V : I.ValueData) {
> +            if (Kind == instrprof_value_kind::indirect_call_target)
> +              LE.write<uint64_t>(ComputeHash((const char *)V.first));
> +            else
> +              LE.write<uint64_t>(V.first);

We should switch over the kinds so that we get uncovered switch warnings
here. We'll need to replace the "size" value with a "last" value so that
this works and we don't need a case for "size", of course.

> +            LE.write<uint64_t>(V.second);
> +          }
> +        }
> +      }
>      }
>    }
>  };
>  }
>  
> -std::error_code
> -InstrProfWriter::addFunctionCounts(StringRef FunctionName,
> -                                   uint64_t FunctionHash,
> -                                   ArrayRef<uint64_t> Counters) {
> -  auto &CounterData = FunctionData[FunctionName];
> +static std::error_code combineInstrProfRecords(InstrProfRecord &Dest,
> +                                               InstrProfRecord &Source,
> +                                               uint64_t &MaxFunctionCount) {
> +  // If the number of counters doesn't match we either have bad data
> +  // or a hash collision.
> +  if (Dest.Counts.size() != Source.Counts.size())
> +    return instrprof_error::count_mismatch;
>  
> -  auto Where = CounterData.find(FunctionHash);
> -  if (Where == CounterData.end()) {
> -    // We've never seen a function with this name and hash, add it.
> -    CounterData[FunctionHash] = Counters;
> -    // We keep track of the max function count as we go for simplicity.
> -    if (Counters[0] > MaxFunctionCount)
> -      MaxFunctionCount = Counters[0];
> -    return instrprof_error::success;
> +  for (size_t I = 0, E = Source.Counts.size(); I < E; ++I) {
> +    if (Dest.Counts[I] + Source.Counts[I] < Dest.Counts[I])
> +      return instrprof_error::counter_overflow;
> +    Dest.Counts[I] += Source.Counts[I];
>    }
>  
> -  // We're updating a function we've seen before.
> -  auto &FoundCounters = Where->second;
> -  // If the number of counters doesn't match we either have bad data or a hash
> -  // collision.
> -  if (FoundCounters.size() != Counters.size())
> -    return instrprof_error::count_mismatch;
> +  for (uint32_t Kind = instrprof_value_kind::first;
> +       Kind < instrprof_value_kind::size; ++Kind) {
>  
> -  for (size_t I = 0, E = Counters.size(); I < E; ++I) {
> -    if (FoundCounters[I] + Counters[I] < FoundCounters[I])
> -      return instrprof_error::counter_overflow;
> -    FoundCounters[I] += Counters[I];
> +    std::vector<InstrProfValueSiteRecord> &SourceValueSites =
> +        Source.getValueSitesForKind(Kind);
> +    if (SourceValueSites.empty())
> +      continue;
> +
> +    std::vector<InstrProfValueSiteRecord> &DestValueSites =
> +        Dest.getValueSitesForKind(Kind);
> +
> +    if (DestValueSites.empty()) {
> +      DestValueSites.swap(SourceValueSites);
> +      continue;
> +    }
> +
> +    if (DestValueSites.size() != SourceValueSites.size())
> +      return instrprof_error::value_site_count_mismatch;
> +    for (size_t I = 0, E = SourceValueSites.size(); I < E; ++I)
> +      DestValueSites[I].mergeValueData(SourceValueSites[I]);
>    }
> +
>    // We keep track of the max function count as we go for simplicity.
> -  if (FoundCounters[0] > MaxFunctionCount)
> -    MaxFunctionCount = FoundCounters[0];
> +  if (Dest.Counts[0] > MaxFunctionCount)
> +    MaxFunctionCount = Dest.Counts[0];
>  
>    return instrprof_error::success;
>  }
>  
> +void InstrProfWriter::updateStringTableReferences(InstrProfRecord &I) {
> +  I.Name = StringTable.insertString(I.Name);
> +  std::vector<InstrProfValueSiteRecord> &IndirectCallSites =
> +      I.getValueSitesForKind(instrprof_value_kind::indirect_call_target);

This is just I.IndirectCallSites now.

> +  for (auto& VSite : IndirectCallSites)
> +    for (auto& VData : VSite.ValueData)
> +      VData.first =
> +          (uint64_t)StringTable.insertString((const char *)VData.first);

I still find it pretty questionable how we're casting back and forth
between uint64_t and char * here. I guess I can live with it though.

> +}
> +
> +std::error_code InstrProfWriter::addRecord(InstrProfRecord &&I) {
> +  updateStringTableReferences(I);
> +  auto &ProfileDataMap = FunctionData[I.Name];
> +
> +  auto Where = ProfileDataMap.find(I.Hash);
> +  if (Where == ProfileDataMap.end()) {
> +    // We've never seen a function with this name and hash, add it.
> +    ProfileDataMap[I.Hash] = I;
> +
> +    // We keep track of the max function count as we go for simplicity.
> +    if (I.Counts[0] > MaxFunctionCount)
> +      MaxFunctionCount = I.Counts[0];
> +    return instrprof_error::success;
> +  }
> +
> +  // We're updating a function we've seen before.
> +  return combineInstrProfRecords(Where->second, I, MaxFunctionCount);
> +}
> +
>  std::pair<uint64_t, uint64_t> InstrProfWriter::writeImpl(raw_ostream &OS) {
>    OnDiskChainedHashTableGenerator<InstrProfRecordTrait> Generator;
>  
> Index: lib/ProfileData/InstrProfReader.cpp
> ===================================================================
> --- lib/ProfileData/InstrProfReader.cpp
> +++ lib/ProfileData/InstrProfReader.cpp
> @@ -15,8 +15,12 @@
>  #include "llvm/ProfileData/InstrProfReader.h"
>  #include "InstrProfIndexed.h"
>  #include "llvm/ADT/STLExtras.h"
> +#include "llvm/Support/Debug.h"
> +#include "llvm/Support/raw_ostream.h"
>  #include <cassert>
>  
> +#define DEBUG_TYPE "InstrProfReader"
> +
>  using namespace llvm;
>  
>  static ErrorOr<std::unique_ptr<MemoryBuffer>>
> @@ -302,42 +306,96 @@
>  typedef InstrProfLookupTrait::data_type data_type;
>  typedef InstrProfLookupTrait::offset_type offset_type;
>  
> +bool InstrProfLookupTrait::ReadValueProfilingData(
> +    const unsigned char *&D, const unsigned char *const End) {
> +
> +  using namespace support;
> +  // Read number of value kinds with value sites.
> +  if (D + sizeof(uint64_t) > End)
> +    return false;
> +  uint64_t ValueKindCount = endian::readNext<uint64_t, little, unaligned>(D);
> +
> +  for (uint32_t Kind = 0; Kind < ValueKindCount; ++Kind) {
> +
> +    // Read value kind and number of value sites for kind.
> +    if (D + 2*sizeof(uint64_t) > End)
> +      return false;
> +    uint64_t ValueKind = endian::readNext<uint64_t, little, unaligned>(D);
> +    uint64_t ValueSiteCount = endian::readNext<uint64_t, little, unaligned>(D);
> +
> +    std::vector<InstrProfValueSiteRecord> &ValueSites =
> +        DataBuffer.back().getValueSitesForKind(ValueKind);
> +    ValueSites.reserve(ValueSiteCount);
> +    for (uint64_t VSite = 0; VSite < ValueSiteCount; ++VSite) {
> +      // Read number of value data pairs at value site.
> +      if (D + sizeof(uint64_t) > End)
> +        return false;
> +      uint64_t ValueDataCount =
> +          endian::readNext<uint64_t, little, unaligned>(D);
> +
> +      // Check if there are as many ValueDataPairs as ValueDataCount in memory.
> +      if (D + (ValueDataCount<<1)*sizeof(uint64_t) > End)

There's still some funny formatting. Please run clang-format-patch or
git-clang-format on your changes.

> +        return false;
> +
> +      InstrProfValueSiteRecord VSiteRecord;
> +      for (uint64_t VCount = 0; VCount < ValueDataCount; ++VCount) {
> +        uint64_t Value = endian::readNext<uint64_t, little, unaligned>(D);
> +        uint64_t NumTaken = endian::readNext<uint64_t, little, unaligned>(D);
> +        if (ValueKind == instrprof_value_kind::indirect_call_target) {
> +          auto Result = HashKeyMap.find(Value);
> +          assert(Result != HashKeyMap.end() &&
> +                 "Hash does not match any known keys\n");
> +          Value = (uint64_t)Result->second;
> +        }
> +        VSiteRecord.ValueData.push_back(std::make_pair(Value, NumTaken));
> +      }
> +      ValueSites.push_back(std::move(VSiteRecord));
> +    }
> +  }
> +  return true;
> +}
> +
>  data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D,
>                                           offset_type N) {
> -
>    // Check if the data is corrupt. If so, don't try to read it.
>    if (N % sizeof(uint64_t))
>      return data_type();
>  
>    DataBuffer.clear();
> -  uint64_t NumCounts;
> -  uint64_t NumEntries = N / sizeof(uint64_t);
>    std::vector<uint64_t> CounterBuffer;
> -  for (uint64_t I = 0; I < NumEntries; I += NumCounts) {
> -    using namespace support;
> -    // The function hash comes first.
> -    uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D);
>  
> -    if (++I >= NumEntries)
> +  using namespace support;
> +  const unsigned char *End = D + N;
> +  while (D < End) {
> +    // Read hash
> +    if (D + sizeof(uint64_t) >= End)
>        return data_type();
> +    uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D);
>  
> -    // In v1, we have at least one count.
> -    // Later, we have the number of counts.
> -    NumCounts = (1 == FormatVersion)
> -                    ? NumEntries - I
> -                    : endian::readNext<uint64_t, little, unaligned>(D);
> -    if (1 != FormatVersion)
> -      ++I;
> -
> -    // If we have more counts than data, this is bogus.
> -    if (I + NumCounts > NumEntries)
> +    // Initialize number of counters for FormatVersion == 1
> +    uint64_t CountsSize = N / sizeof(uint64_t) - 1;
> +    // If format version is different then read number of counters
> +    if (FormatVersion != 1) {
> +      if (D + sizeof(uint64_t) > End)
> +        return data_type();
> +      CountsSize = endian::readNext<uint64_t, little, unaligned>(D);
> +    }
> +    // Read counter values
> +    if (D + CountsSize * sizeof(uint64_t) > End)
>        return data_type();
>  
>      CounterBuffer.clear();
> -    for (unsigned J = 0; J < NumCounts; ++J)
> +    CounterBuffer.reserve(CountsSize);
> +    for (uint64_t J = 0; J < CountsSize; ++J)
>        CounterBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D));
>  
>      DataBuffer.push_back(InstrProfRecord(K, Hash, std::move(CounterBuffer)));
> +
> +    // Read value profiling data
> +    if (FormatVersion > 2 && !ReadValueProfilingData(D, End)) {
> +      DataBuffer.clear();
> +      return data_type();
> +    }
>    }
>    return DataBuffer;
>  }
> @@ -384,6 +442,23 @@
>    Index.reset(InstrProfReaderIndex::Create(
>        Start + HashOffset, Cur, Start,
>        InstrProfLookupTrait(HashType, FormatVersion)));
> +
> +  // Form the map of hash values to const char* keys in profiling data.
> +  std::map<uint64_t, const char *> HashKeyMap;
> +  for (auto Key : Index->keys()) {
> +    const char *KeyTableRef = StringTable.insertString(Key);
> +    auto Result = HashKeyMap.insert(std::make_pair(ComputeHash(HashType, Key),
> +                                                   KeyTableRef));
> +    // Emit warning if a hash collision is detected.
> +    if (Result.second == false)
> +      DEBUG(dbgs() << "IndexedInstrProfReader: hash collision detected: \n"
> +                   << "\t Map Entry(Hash, Key): " << Result.first->first
> +                   << ", " << Result.first->second << "\n"
> +                   << "\t New Entry(Hash, Key): " << ComputeHash(HashType, Key)
> +                   << ", " << Key << "\n");
> +  }
> +  // Set the hash key map for the InstrLookupTrait
> +  Index->getInfoObj().setHashKeyMap(std::move(HashKeyMap));

I still think a std::vector is a more appropriate type than a std::map
here. You can just std::sort() it before passing it to setHashKeyMap and
use std::lower_bound for O(log(N)) look ups later. This is minor though.

>    // Set up our iterator for readNextRecord.
>    RecordIterator = Index->data_begin();
>  
> Index: lib/ProfileData/InstrProfIndexed.h
> ===================================================================
> --- lib/ProfileData/InstrProfIndexed.h
> +++ lib/ProfileData/InstrProfIndexed.h
> @@ -47,7 +47,7 @@
>  }
>  
>  const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81"
> -const uint64_t Version = 2;
> +const uint64_t Version = 3;
>  const HashT HashType = HashT::MD5;
>  }
>  
> Index: lib/ProfileData/InstrProf.cpp
> ===================================================================
> --- lib/ProfileData/InstrProf.cpp
> +++ lib/ProfileData/InstrProf.cpp
> @@ -50,6 +50,8 @@
>        return "Function count mismatch";
>      case instrprof_error::counter_overflow:
>        return "Counter overflow";
> +    case instrprof_error::value_site_count_mismatch:
> +      return "Function's value site counts mismatch";
>      }
>      llvm_unreachable("A value of instrprof_error has no message.");
>    }
> Index: include/llvm/ProfileData/InstrProfWriter.h
> ===================================================================
> --- include/llvm/ProfileData/InstrProfWriter.h
> +++ include/llvm/ProfileData/InstrProfWriter.h
> @@ -15,33 +15,32 @@
>  #ifndef LLVM_PROFILEDATA_INSTRPROFWRITER_H
>  #define LLVM_PROFILEDATA_INSTRPROFWRITER_H
>  
> -#include "llvm/ADT/ArrayRef.h"
>  #include "llvm/ADT/DenseMap.h"
> -#include "llvm/ADT/StringMap.h"
>  #include "llvm/ProfileData/InstrProf.h"
>  #include "llvm/Support/DataTypes.h"
>  #include "llvm/Support/MemoryBuffer.h"
>  #include "llvm/Support/raw_ostream.h"
> -#include <vector>
>  
>  namespace llvm {
>  
>  /// Writer for instrumentation based profile data.
>  class InstrProfWriter {
>  public:
> -  typedef SmallDenseMap<uint64_t, std::vector<uint64_t>, 1> CounterData;
> +  typedef SmallDenseMap<uint64_t, InstrProfRecord, 1> ProfilingData;
> +
>  private:
> -  StringMap<CounterData> FunctionData;
> +  InstrProfStringTable StringTable;
> +  StringMap<ProfilingData> FunctionData;
>    uint64_t MaxFunctionCount;
>  public:
>    InstrProfWriter() : MaxFunctionCount(0) {}
>  
> +  /// Update string entries in profile data with references to StringTable.
> +  void updateStringTableReferences(InstrProfRecord &I);
>    /// Add function counts for the given function. If there are already counts
>    /// for this function and the hash and number of counts match, each counter is
>    /// summed.
> -  std::error_code addFunctionCounts(StringRef FunctionName,
> -                                    uint64_t FunctionHash,
> -                                    ArrayRef<uint64_t> Counters);
> +  std::error_code addRecord(InstrProfRecord &&I);
>    /// Write the profile to \c OS
>    void write(raw_fd_ostream &OS);
>    /// Write the profile, returning the raw data. For testing.
> Index: include/llvm/ProfileData/InstrProfReader.h
> ===================================================================
> --- include/llvm/ProfileData/InstrProfReader.h
> +++ include/llvm/ProfileData/InstrProfReader.h
> @@ -24,6 +24,7 @@
>  #include "llvm/Support/MemoryBuffer.h"
>  #include "llvm/Support/OnDiskHashTable.h"
>  #include <iterator>
> +#include <map>
>  
>  namespace llvm {
>  
> @@ -65,6 +66,9 @@
>    InstrProfIterator end() { return InstrProfIterator(); }
>  
>  protected:
> +  /// String table for holding a unique copy of all the strings in the profile.
> +  InstrProfStringTable StringTable;
> +
>    /// Set the current std::error_code and return same.
>    std::error_code error(std::error_code EC) {
>      LastError = EC;
> @@ -195,6 +199,7 @@
>    std::vector<InstrProfRecord> DataBuffer;
>    IndexedInstrProf::HashT HashType;
>    unsigned FormatVersion;
> +  std::map<uint64_t, const char *> HashKeyMap;
>  
>  public:
>    InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
> @@ -209,9 +214,13 @@
>  
>    static bool EqualKey(StringRef A, StringRef B) { return A == B; }
>    static StringRef GetInternalKey(StringRef K) { return K; }
> +  static StringRef GetExternalKey(StringRef K) { return K; }
>  
>    hash_value_type ComputeHash(StringRef K);
>  
> +  void setHashKeyMap(std::map<uint64_t, const char *> HashKeyMap) {
> +    this->HashKeyMap = std::move(HashKeyMap);
> +  }
>    static std::pair<offset_type, offset_type>
>    ReadKeyDataLength(const unsigned char *&D) {
>      using namespace support;
> @@ -224,6 +233,8 @@
>      return StringRef((const char *)D, N);
>    }
>  
> +  bool ReadValueProfilingData(const unsigned char *&D,
> +                              const unsigned char *const End);
>    data_type ReadData(StringRef K, const unsigned char *D, offset_type N);
>  };
>  
> Index: include/llvm/ProfileData/InstrProf.h
> ===================================================================
> --- include/llvm/ProfileData/InstrProf.h
> +++ include/llvm/ProfileData/InstrProf.h
> @@ -16,42 +16,119 @@
>  #ifndef LLVM_PROFILEDATA_INSTRPROF_H_
>  #define LLVM_PROFILEDATA_INSTRPROF_H_
>  
> -#include "llvm/ADT/StringRef.h"
> +#include "llvm/ADT/StringSet.h"
> +#include "llvm/Support/ErrorHandling.h"
>  #include <cstdint>
> +#include <list>
>  #include <system_error>
>  #include <vector>
>  
>  namespace llvm {
>  const std::error_category &instrprof_category();
>  
>  enum class instrprof_error {
> -    success = 0,
> -    eof,
> -    bad_magic,
> -    bad_header,
> -    unsupported_version,
> -    unsupported_hash_type,
> -    too_large,
> -    truncated,
> -    malformed,
> -    unknown_function,
> -    hash_mismatch,
> -    count_mismatch,
> -    counter_overflow
> +  success = 0,
> +  eof,
> +  bad_magic,
> +  bad_header,
> +  unsupported_version,
> +  unsupported_hash_type,
> +  too_large,
> +  truncated,
> +  malformed,
> +  unknown_function,
> +  hash_mismatch,
> +  count_mismatch,
> +  counter_overflow,
> +  value_site_count_mismatch
>  };
>  
>  inline std::error_code make_error_code(instrprof_error E) {
>    return std::error_code(static_cast<int>(E), instrprof_category());
>  }
>  
> +enum instrprof_value_kind : uint32_t {
> +  first = 0,
> +  indirect_call_target = 0,
> +  size = 1

The enum value names are hoisted into the enclosing scope, so names like
first and size are going cause problems. These names need to be unique.
We should probably use CamelCase names too, to match llvm style. ie:

  enum InstrProfValueKind : uint32_t {
    IPVK_IndirectCallTarget = 0,

    IPVK_First = IPVK_IndirectCallTarget,
    IPVK_Last = IPVK_IndirectCallTarget
  }

> +};
> +
> +struct InstrProfStringTable {
> +  // Set of string values in profiling data.
> +  StringSet<> StringValueSet;
> +  InstrProfStringTable() { StringValueSet.clear(); }
> +  // Get a pointer to internal storage of a string in set
> +  const char *getStringData(StringRef Str) {
> +    auto Result = StringValueSet.find(Str);
> +    return (Result == StringValueSet.end()) ? nullptr : Result->first().data();
> +  }
> +  // Insert a string to StringTable
> +  const char *insertString(StringRef Str) {
> +    auto Result = StringValueSet.insert(Str);
> +    return Result.first->first().data();
> +  }
> +};
> +
> +struct InstrProfValueSiteRecord {
> +  // Typedef for a single TargetValue-NumTaken pair.
> +  typedef std::pair<uint64_t, uint64_t> ValueDataPair;
> +  // Value profiling data pairs at a given value site.
> +  std::list<ValueDataPair> ValueData;
> +
> +  InstrProfValueSiteRecord() { ValueData.clear(); }
> +
> +  // Sort ValueData ascending by TargetValue
> +  void sortByTargetValues() {
> +    ValueData.sort([](const ValueDataPair &left, const ValueDataPair &right) {
> +      return left.first < right.first;
> +    });
> +  }
> +  // Merge data from another InstrProfValueSiteRecord
> +  void mergeValueData(InstrProfValueSiteRecord &Input) {
> +    this->sortByTargetValues();
> +    Input.sortByTargetValues();
> +    auto I = ValueData.begin();
> +    auto IE = ValueData.end();
> +    for (auto J = Input.ValueData.begin(), JE = Input.ValueData.end(); J != JE;
> +         ++J) {
> +      while (I != IE && I->first < J->first)
> +        ++I;
> +      if (I != IE && I->first == J->first) {
> +        I->second += J->second;
> +        ++I;
> +        continue;
> +      }
> +      ValueData.insert(I, *J);
> +    }
> +  }
> +};
> +
>  /// Profiling information for a single function.
>  struct InstrProfRecord {
>    InstrProfRecord() {}
>    InstrProfRecord(StringRef Name, uint64_t Hash, std::vector<uint64_t> Counts)
>        : Name(Name), Hash(Hash), Counts(std::move(Counts)) {}
>    StringRef Name;
>    uint64_t Hash;
>    std::vector<uint64_t> Counts;
> +  std::vector<InstrProfValueSiteRecord> IndirectCallSites;
> +
> +  const std::vector<InstrProfValueSiteRecord>& getValueSitesForKind(
> +      uint32_t ValueKind) const {
> +    switch(ValueKind) {
> +      case instrprof_value_kind::indirect_call_target:
> +        return IndirectCallSites;
> +      default: llvm_unreachable("Unknown value kind!");

Get rid of the "default:" so we get covered switch warnings, and put the
unreachable after the switch statement.

> +    }
> +  }
> +
> +  std::vector<InstrProfValueSiteRecord>& getValueSitesForKind(
> +      uint32_t ValueKind) {
> +    const InstrProfRecord* const_this =
> +        static_cast<const InstrProfRecord*>(this);

const_cast is probably more appropriate than static.

> +    return const_cast<std::vector<InstrProfValueSiteRecord>&>
> +        (const_this->getValueSitesForKind(ValueKind));
> +  }
>  };
>  
>  } // end namespace llvm


More information about the llvm-commits mailing list