[PATCH] D10674: Value profiling - patchset 3

Mon Aug 17 16:44:45 PDT 2015

Betul Buyukkurt <betulb at codeaurora.org> writes:
> betulb updated this revision to Diff 32113.
> betulb added a comment.
>
> - Reserved fields are removed from the instrprof_value_kind enum.
> - All strings are stored in the string table (InstrProfStringTable).
> - Review comments on file format are addressed the suggested indexed
> file format is implemented. The only difference is from the suggested
> implementation is that value kind is not written out explicitly in the
> profile data file.
> - Data structures are arranged such that the old value data is
> arranged into site records, and the InstrProfRecord is keeping a
> vector of site records.

Thanks for working on this. Comments inline.

>
> http://reviews.llvm.org/D10674
>
> Files:
>   include/llvm/ProfileData/InstrProf.h
>   include/llvm/ProfileData/InstrProfReader.h
>   include/llvm/ProfileData/InstrProfWriter.h
>   lib/ProfileData/InstrProf.cpp
>   lib/ProfileData/InstrProfIndexed.h
>   lib/ProfileData/InstrProfReader.cpp
>   lib/ProfileData/InstrProfWriter.cpp
>   tools/llvm-profdata/llvm-profdata.cpp
>   unittests/ProfileData/CoverageMappingTest.cpp
>   unittests/ProfileData/InstrProfTest.cpp
>
> Index: unittests/ProfileData/InstrProfTest.cpp
> ===================================================================
> --- unittests/ProfileData/InstrProfTest.cpp
> +++ unittests/ProfileData/InstrProfTest.cpp
> @@ -50,7 +50,8 @@
>  }
>  
>  TEST_F(InstrProfTest, write_and_read_one_function) {
> -  Writer.addFunctionCounts("foo", 0x1234, {1, 2, 3, 4});
> +  InstrProfRecord Record("foo", 0x1234, {1, 2, 3, 4});
> +  Writer.addRecord(Record);
>    auto Profile = Writer.writeBuffer();
>    readProfile(std::move(Profile));
>  
> @@ -67,13 +68,16 @@
>  }
>  
>  TEST_F(InstrProfTest, get_function_counts) {
> -  Writer.addFunctionCounts("foo", 0x1234, {1, 2});
> -  Writer.addFunctionCounts("foo", 0x1235, {3, 4});
> +  InstrProfRecord Record1("foo", 0x1234, {1, 2});
> +  InstrProfRecord Record2("foo", 0x1235, {3, 4});
> +  Writer.addRecord(Record1);
> +  Writer.addRecord(Record2);
>    auto Profile = Writer.writeBuffer();
>    readProfile(std::move(Profile));
>  
>    std::vector<uint64_t> Counts;
> -  ASSERT_TRUE(NoError(Reader->getFunctionCounts("foo", 0x1234, Counts)));
> +  ASSERT_TRUE(NoError(
> +    Reader->getFunctionCounts("foo", 0x1234, Counts)));
>    ASSERT_EQ(2U, Counts.size());
>    ASSERT_EQ(1U, Counts[0]);
>    ASSERT_EQ(2U, Counts[1]);
> @@ -92,9 +96,12 @@
>  }
>  
>  TEST_F(InstrProfTest, get_max_function_count) {
> -  Writer.addFunctionCounts("foo", 0x1234, {1ULL << 31, 2});
> -  Writer.addFunctionCounts("bar", 0, {1ULL << 63});
> -  Writer.addFunctionCounts("baz", 0x5678, {0, 0, 0, 0});
> +  InstrProfRecord Record1("foo", 0x1234, {1ULL << 31, 2});
> +  InstrProfRecord Record2("bar", 0, {1ULL << 63});
> +  InstrProfRecord Record3("baz", 0x5678, {0, 0, 0, 0});
> +  Writer.addRecord(Record1);
> +  Writer.addRecord(Record2);
> +  Writer.addRecord(Record3);
>    auto Profile = Writer.writeBuffer();
>    readProfile(std::move(Profile));
>  
> Index: unittests/ProfileData/CoverageMappingTest.cpp
> ===================================================================
> --- unittests/ProfileData/CoverageMappingTest.cpp
> +++ unittests/ProfileData/CoverageMappingTest.cpp
> @@ -188,7 +188,8 @@
>  }
>  
>  TEST_F(CoverageMappingTest, basic_coverage_iteration) {
> -  ProfileWriter.addFunctionCounts("func", 0x1234, {30, 20, 10, 0});
> +  InstrProfRecord Record("func", 0x1234, {30, 20, 10, 0});
> +  ProfileWriter.addRecord(Record);
>    readProfCounts();
>  
>    addCMR(Counter::getCounter(0), "file1", 1, 1, 9, 9);
> @@ -238,7 +239,8 @@
>  }
>  
>  TEST_F(CoverageMappingTest, combine_regions) {
> -  ProfileWriter.addFunctionCounts("func", 0x1234, {10, 20, 30});
> +  InstrProfRecord Record("func", 0x1234, {10, 20, 30});
> +  ProfileWriter.addRecord(Record);
>    readProfCounts();
>  
>    addCMR(Counter::getCounter(0), "file1", 1, 1, 9, 9);
> @@ -256,7 +258,8 @@
>  }
>  
>  TEST_F(CoverageMappingTest, dont_combine_expansions) {
> -  ProfileWriter.addFunctionCounts("func", 0x1234, {10, 20});
> +  InstrProfRecord Record("func", 0x1234, {10, 20});
> +  ProfileWriter.addRecord(Record);
>    readProfCounts();
>  
>    addCMR(Counter::getCounter(0), "file1", 1, 1, 9, 9);
> @@ -275,7 +278,8 @@
>  }
>  
>  TEST_F(CoverageMappingTest, strip_filename_prefix) {
> -  ProfileWriter.addFunctionCounts("file1:func", 0x1234, {10});
> +  InstrProfRecord Record("file1:func", 0x1234, {10});
> +  ProfileWriter.addRecord(Record);
>    readProfCounts();
>  
>    addCMR(Counter::getCounter(0), "file1", 1, 1, 9, 9);
> Index: tools/llvm-profdata/llvm-profdata.cpp
> ===================================================================
> --- tools/llvm-profdata/llvm-profdata.cpp
> +++ tools/llvm-profdata/llvm-profdata.cpp
> @@ -58,9 +58,8 @@
>        exitWithError(ec.message(), Filename);
>  
>      auto Reader = std::move(ReaderOrErr.get());
> -    for (const auto &I : *Reader)
> -      if (std::error_code EC =
> -              Writer.addFunctionCounts(I.Name, I.Hash, I.Counts))
> +    for (auto &I : *Reader)
> +      if (std::error_code EC = Writer.addRecord(I))
>          errs() << Filename << ": " << I.Name << ": " << EC.message() << "\n";
>      if (Reader->hasError())
>        exitWithError(Reader->getError().message(), Filename);
> @@ -134,8 +133,8 @@
>  }
>  
>  static int showInstrProfile(std::string Filename, bool ShowCounts,
> -                            bool ShowAllFunctions, std::string ShowFunction,
> -                            raw_fd_ostream &OS) {
> +                            bool ShowIndirectCallTargets, bool ShowAllFunctions,
> +                            std::string ShowFunction, raw_fd_ostream &OS) {
>    auto ReaderOrErr = InstrProfReader::create(Filename);
>    if (std::error_code EC = ReaderOrErr.getError())
>      exitWithError(EC.message(), Filename);
> @@ -146,7 +145,7 @@
>    for (const auto &Func : *Reader) {
>      bool Show =
>          ShowAllFunctions || (!ShowFunction.empty() &&
> -                             Func.Name.find(ShowFunction) != Func.Name.npos);
> +        StringRef(Func.Name).find(ShowFunction) != StringRef::npos);

What's this change for? Func.Name is already a StringRef.

>  
>      ++TotalFunctions;
>      assert(Func.Counts.size() > 0 && "function missing entry counter");
> @@ -162,6 +161,10 @@
>           << "    Hash: " << format("0x%016" PRIx64, Func.Hash) << "\n"
>           << "    Counters: " << Func.Counts.size() << "\n"
>           << "    Function count: " << Func.Counts[0] << "\n";
> +      if (ShowIndirectCallTargets)
> +        OS << "    Indirect Call Site Count: "
> +           << Func.ValueSites[instrprof_value_kind::indirect_call_target].size()
> +           << "\n";
>      }
>  
>      if (Show && ShowCounts)
> @@ -174,6 +177,16 @@
>      }
>      if (Show && ShowCounts)
>        OS << "]\n";
> +
> +    if (Show && ShowIndirectCallTargets) {
> +      OS << "    Indirect Target Results: \n";
> +      for (size_t I = 0, E = Func.ValueSites[0].size(); I < E; ++I) {

Please don't use magic numbers like "Func.ValueSites[0]" - this should
say instrprof_value_kind::indirect_call_target if we're doing things
this way. That said, I don't think this is the best way to expose
this. More on that later.

> +        for (auto V : Func.ValueSites[0][I].ValueData) {
> +          OS << "\t[ " << I << ", ";
> +          OS << (const char*) V.first << ", " << V.second << " ]\n";
> +        }
> +      }
> +    }
>    }
>    if (Reader->hasError())
>      exitWithError(Reader->getError().message(), Filename);
> @@ -210,6 +223,8 @@
>  
>    cl::opt<bool> ShowCounts("counts", cl::init(false),
>                             cl::desc("Show counter values for shown functions"));
> +  cl::opt<bool> ShowIndirectCallTargets("ic-targets", cl::init(false),
> +      cl::desc("Show indirect call site target values for shown functions"));

This formatting doesn't look right. Please clang-format your patch.

>    cl::opt<bool> ShowAllFunctions("all-functions", cl::init(false),
>                                   cl::desc("Details for every function"));
>    cl::opt<std::string> ShowFunction("function",
> @@ -238,8 +253,8 @@
>      errs() << "warning: -function argument ignored: showing all functions\n";
>  
>    if (ProfileKind == instr)
> -    return showInstrProfile(Filename, ShowCounts, ShowAllFunctions,
> -                            ShowFunction, OS);
> +    return showInstrProfile(Filename, ShowCounts, ShowIndirectCallTargets,
> +                            ShowAllFunctions, ShowFunction, OS);
>    else
>      return showSampleProfile(Filename, ShowCounts, ShowAllFunctions,
>                               ShowFunction, OS);
> Index: lib/ProfileData/InstrProfWriter.cpp
> ===================================================================
> --- lib/ProfileData/InstrProfWriter.cpp
> +++ lib/ProfileData/InstrProfWriter.cpp
> @@ -26,8 +26,8 @@
>    typedef StringRef key_type;
>    typedef StringRef key_type_ref;
>  
> -  typedef const InstrProfWriter::CounterData *const data_type;
> -  typedef const InstrProfWriter::CounterData *const data_type_ref;
> +  typedef const InstrProfWriter::ProfilingData *const data_type;
> +  typedef const InstrProfWriter::ProfilingData *const data_type_ref;
>  
>    typedef uint64_t hash_value_type;
>    typedef uint64_t offset_type;
> @@ -43,10 +43,27 @@
>  
>      offset_type N = K.size();
>      LE.write<offset_type>(N);
> +    offset_type M = sizeof(uint64_t); // size of ProfileData

This comment is confusing - I guess you meant "ProfilingData" here, and
you mean this space is for the number of elements in it, but that isn't
clear at all.

> +    for (const auto &ProfileData : *V) {
> +      M += sizeof(uint64_t); // size of ProfileData.first

Comments like "size of ProfileData.first" aren't helpful - please say
what the space we're reserving is actually for (ie, the function hash
here).

> +      M += sizeof(uint32_t); // size of ProfileData.second.Counts.size()

Why are you chaning this count to a uint32 from a uint64? While it saves
some space, it means that the counts will always be stored unaligned
which can be quite bad for reading them.

> +      M += ProfileData.second.Counts.size() * sizeof(uint64_t);
> +      // Value data
> +      for (uint32_t Kind = instrprof_value_kind::first;
> +           Kind < instrprof_value_kind::size; ++Kind) {
> +        // Number of value sites per value kind
> +        M += sizeof(uint32_t); // ProfileData.second.ValuesSites[Kind].size()
> +        for (InstrProfValueSiteRecord I : ProfileData.second.ValueSites[Kind]) {
> +          // Number of value data pairs at a value site
> +          M += sizeof(uint32_t); // I.ValueData.size()

Similarly for these uint32s.

> +          for (auto V : I.ValueData) {
> +            M += sizeof(uint64_t);  // size of TargetValue
> +            M += sizeof(uint64_t);  // size of NumTaken
> +          }
> +        }
> +      }
> +    }
>  
> -    offset_type M = 0;
> -    for (const auto &Counts : *V)
> -      M += (2 + Counts.second.size()) * sizeof(uint64_t);
>      LE.write<offset_type>(M);
>  
>      return std::make_pair(N, M);
> @@ -61,49 +78,80 @@
>      using namespace llvm::support;
>      endian::Writer<little> LE(Out);
>  
> -    for (const auto &Counts : *V) {
> -      LE.write<uint64_t>(Counts.first);
> -      LE.write<uint64_t>(Counts.second.size());
> -      for (uint64_t I : Counts.second)
> +    LE.write<uint64_t>(V->size());

Not sure we need this size - can't we just derive this from the data
size in the reader?

> +    for (const auto &ProfileData : *V) {
> +      LE.write<uint64_t>(ProfileData.first);
> +      LE.write<uint32_t>(ProfileData.second.Counts.size());
> +      for (uint64_t I : ProfileData.second.Counts)
>          LE.write<uint64_t>(I);
> +      // Write value data
> +      for (uint32_t Kind = instrprof_value_kind::first;
> +           Kind < instrprof_value_kind::size; ++Kind) {
> +        // Write number of value sites per value kind
> +        LE.write<uint32_t>(ProfileData.second.ValueSites[Kind].size());
> +        for (InstrProfValueSiteRecord I : ProfileData.second.ValueSites[Kind]) {
> +          // Write number of value data pairs at this value site
> +          LE.write<uint32_t>(I.ValueData.size());
> +          for (auto V : I.ValueData) {
> +            if (Kind == instrprof_value_kind::indirect_call_target)
> +              LE.write<uint64_t>(ComputeHash((const char *)V.first));
> +            else LE.write<uint64_t>(V.first);

This looks wrong. Are you just relying on the fact that all of the
strings we use happen to already be in the hash table alongside our
function data? This will behave badly when there are hash collisions,
and will make it quite a bit harder if we want to explore shrinking the
format by avoiding storing function names if/when we don't need them.

I really do think we should store these as an actual string table.

> +            LE.write<uint64_t>(V.second);
> +          }
> +        }
> +      }
>      }
>    }
>  };
>  }
>  
> +static std::error_code combineInstrProfRecords(InstrProfRecord &Dest,
> +                                               InstrProfRecord &Source,
> +                                               uint64_t &MaxFunctionCount) {

I'm not sure we're gaining much by splitting this out from addRecord.

> +  // If the number of counters doesn't match we either have bad data
> +  // or a hash collision.
> +  if (Dest.Counts.size() != Source.Counts.size())
> +    return instrprof_error::count_mismatch;
> +
> +  for (size_t I = 0, E = Source.Counts.size(); I < E; ++I) {
> +    if (Dest.Counts[I] + Source.Counts[I] < Dest.Counts[I])
> +      return instrprof_error::counter_overflow;
> +    Dest.Counts[I] += Source.Counts[I];
> +  }
> +
> +  for (uint32_t Kind = instrprof_value_kind::first;
> +       Kind < instrprof_value_kind::size; ++Kind) {
> +    if (Dest.ValueSites[Kind].size() != Source.ValueSites[Kind].size())
> +      return instrprof_error::value_site_count_mismatch;
> +    for (size_t I = 0,  E = Source.ValueSites[Kind].size(); I < E; ++I)
> +      Dest.ValueSites[Kind][I].mergeValueData(Source.ValueSites[Kind][I]);
> +  }
> +
> +  // We keep track of the max function count as we go for simplicity.
> +  if (Dest.Counts[0] > MaxFunctionCount)
> +    MaxFunctionCount = Dest.Counts[0];
> +
> +  return instrprof_error::success;
> +}
> +
>  std::error_code
> -InstrProfWriter::addFunctionCounts(StringRef FunctionName,
> -                                   uint64_t FunctionHash,
> -                                   ArrayRef<uint64_t> Counters) {
> -  auto &CounterData = FunctionData[FunctionName];
> +InstrProfWriter::addRecord(InstrProfRecord &I) {
> +  I.updateStringTableReferences(StringTable);

It's kind of weird that this is a method on the InstrProfRecord - I
think the function that updates the string table fits better here in the
InstrProfWriter implementation.

> +  auto &ProfileDataMap = FunctionData[I.Name];
>  
> -  auto Where = CounterData.find(FunctionHash);
> -  if (Where == CounterData.end()) {
> +  auto Where = ProfileDataMap.find(I.Hash);
> +  if (Where == ProfileDataMap.end()) {
>      // We've never seen a function with this name and hash, add it.
> -    CounterData[FunctionHash] = Counters;
> +    ProfileDataMap[I.Hash] = I;
> +
>      // We keep track of the max function count as we go for simplicity.
> -    if (Counters[0] > MaxFunctionCount)
> -      MaxFunctionCount = Counters[0];
> +    if (I.Counts[0] > MaxFunctionCount)
> +      MaxFunctionCount = I.Counts[0];
>      return instrprof_error::success;
>    }
>  
>    // We're updating a function we've seen before.
> -  auto &FoundCounters = Where->second;
> -  // If the number of counters doesn't match we either have bad data or a hash
> -  // collision.
> -  if (FoundCounters.size() != Counters.size())
> -    return instrprof_error::count_mismatch;
> -
> -  for (size_t I = 0, E = Counters.size(); I < E; ++I) {
> -    if (FoundCounters[I] + Counters[I] < FoundCounters[I])
> -      return instrprof_error::counter_overflow;
> -    FoundCounters[I] += Counters[I];
> -  }
> -  // We keep track of the max function count as we go for simplicity.
> -  if (FoundCounters[0] > MaxFunctionCount)
> -    MaxFunctionCount = FoundCounters[0];
> -
> -  return instrprof_error::success;
> +  return combineInstrProfRecords(Where->second, I, MaxFunctionCount);
>  }
>  
>  std::pair<uint64_t, uint64_t> InstrProfWriter::writeImpl(raw_ostream &OS) {
> Index: lib/ProfileData/InstrProfReader.cpp
> ===================================================================
> --- lib/ProfileData/InstrProfReader.cpp
> +++ lib/ProfileData/InstrProfReader.cpp
> @@ -302,8 +302,9 @@
>  typedef InstrProfLookupTrait::data_type data_type;
>  typedef InstrProfLookupTrait::offset_type offset_type;
>  
> -data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D,
> -                                         offset_type N) {
> +data_type InstrProfLookupTrait::ReadDataV1V2(StringRef K,
> +                                             const unsigned char *D,
> +                                             offset_type N) {
>  
>    // Check if the data is corrupt. If so, don't try to read it.
>    if (N % sizeof(uint64_t))
> @@ -337,7 +338,71 @@
>      for (unsigned J = 0; J < NumCounts; ++J)
>        CounterBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D));
>  
> +    DataBuffer.push_back(
> +        InstrProfRecord(K.data(), Hash, std::move(CounterBuffer)));
> +  }
> +  return DataBuffer;
> +}
> +
> +data_type InstrProfLookupTrait::ReadDataV3(StringRef K,
> +                                           const unsigned char *D,
> +                                           offset_type N) {

Other than the fact that we've changed NumCounts (or CountsSize as you
call it below) to a uint32 for whatever reason, I don't really see what
we're gaining by splitting this into ReadDataV1V2 and ReadDataV3 - Most
of the logic is identical, and we can just do "if (ver < V3) continue"
after the DataBuffer.push_back() line to share all of it, no?

> +  using namespace support;
> +  const unsigned char *End = D + N;
> +  // Read number of data entries.
> +  if (D + sizeof(uint64_t) > End)
> +    return data_type();
> +  uint32_t DataBufferSize = endian::readNext<uint64_t, little, unaligned>(D);
> +  DataBuffer.clear();
> +  DataBuffer.reserve(DataBufferSize);
> +  std::vector<uint64_t> CounterBuffer;
> +  for (uint32_t I = 0; I < DataBufferSize; ++I) {
> +    // Read hash and number of counters
> +    if (D + sizeof (uint64_t) + sizeof(uint32_t) >= End)
> +      return data_type();
> +    uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D);
> +    uint32_t CountsSize = endian::readNext<uint32_t, little, unaligned>(D);
> +
> +    // Read counter values
> +    if (D + CountsSize * sizeof(uint64_t) >= End)
> +      return data_type();
> +
> +    CounterBuffer.clear();
> +    for (uint32_t C = 0; C < CountsSize; ++C)
> +      CounterBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D));
> +
>      DataBuffer.push_back(InstrProfRecord(K, Hash, std::move(CounterBuffer)));
> +
> +    // Read value profiling data
> +    for (uint32_t Kind = instrprof_value_kind::first;
> +         Kind < instrprof_value_kind::size; ++Kind) {

I still think we should store the kind in the data here and then read
the following data based on that. There's no guarantee that all kinds
that aren't indirect_call_target will behave as they do in this loop as
is, so I'd rather just reject outright kinds we don't understand.

If the type is in the data we can do that, and it also saves a few bytes
for functions that don't have any value profiling data (and more bytes
when we support more kinds of value profiling).

This also has the benefit that the V2 data will be valid for V3, so we
might even be able to get away without adding branches to check the
version difference here, which would be a convenient simplification. I
haven't thought through if that actually works though.

> +      // Read number of value sites.
> +      if (D + sizeof(uint32_t) > End)
> +        return data_type();
> +      uint32_t ValueSiteCount = endian::readNext<uint32_t, little, unaligned>(D);
> +
> +      for (uint32_t VSite = 0; VSite < ValueSiteCount; ++VSite) {
> +        // Read number of value data pairs at value site.
> +        if (D + sizeof(uint32_t) > End)
> +          return data_type();
> +        uint32_t ValueDataCount = endian::readNext<uint32_t, little, unaligned>(D);
> +
> +        InstrProfValueSiteRecord VSiteRecord;
> +        for (uint32_t VCount = 0; VCount < ValueDataCount; ++VCount) {
> +          if (D + 2 * sizeof(uint64_t) > End)
> +            return data_type();
> +          uint64_t Value = endian::readNext<uint64_t, little, unaligned>(D);
> +          if (Kind == instrprof_value_kind::indirect_call_target) {
> +            auto Result = HashKeyMap.find(Value);
> +            if (Result != HashKeyMap.end())
> +              Value = (uint64_t)Result->second;

What if it isn't found? As I mentioned earlier, I'm not very comfortable
with this approach to storing the strings.

> +          }
> +          uint64_t NumTaken = endian::readNext<uint64_t, little, unaligned>(D);
> +          VSiteRecord.ValueData.push_back(std::make_pair(Value, NumTaken));
> +        }
> +        DataBuffer[DataBuffer.size()-1].ValueSites[Kind].push_back(VSiteRecord);

This is better written as DataBuffer.back(). It also might make sense to
sort these here rather than in mergeValueData - more on that later.

> +      }
> +    }
>    }
>    return DataBuffer;
>  }
> @@ -379,19 +444,24 @@
>    if (HashType > IndexedInstrProf::HashT::Last)
>      return error(instrprof_error::unsupported_hash_type);
>    uint64_t HashOffset = endian::readNext<uint64_t, little, unaligned>(Cur);
> -
>    // The rest of the file is an on disk hash table.
>    Index.reset(InstrProfReaderIndex::Create(
>        Start + HashOffset, Cur, Start,
> -      InstrProfLookupTrait(HashType, FormatVersion)));
> +      InstrProfLookupTrait(HashType, FormatVersion, HashKeyMap)));
> +  for (auto Key : Index->keys()) {
> +    const char* KeyTableRef = StringTable.insertString(Key);
> +    HashKeyMap.insert(std::make_pair(ComputeHash(HashType, Key), KeyTableRef));
> +  }
>    // Set up our iterator for readNextRecord.
>    RecordIterator = Index->data_begin();
>  
>    return success();
>  }
>  
>  std::error_code IndexedInstrProfReader::getFunctionCounts(
> -    StringRef FuncName, uint64_t FuncHash, std::vector<uint64_t> &Counts) {
> +  StringRef FuncName, uint64_t FuncHash,
> +  std::vector<uint64_t> &Counts) {
> +
>    auto Iter = Index->find(FuncName);
>    if (Iter == Index->end())
>      return error(instrprof_error::unknown_function);
> @@ -411,6 +481,29 @@
>    return error(instrprof_error::hash_mismatch);
>  }
>  
> +std::error_code IndexedInstrProfReader::getFunctionValuesForKind(
> +  StringRef FuncName, uint64_t FuncHash, uint32_t ValueKind,
> +  std::vector<InstrProfValueSiteRecord> &Values) {

Does this function work? Won't it return an index into a string table
with no means to turn that into the actual string for indirect call
profiling?

> +
> +  auto Iter = Index->find(FuncName);
> +  if (Iter == Index->end())
> +    return error(instrprof_error::unknown_function);
> +
> +  // Found it. Look for counters with the right hash.
> +  ArrayRef<InstrProfRecord> Data = (*Iter);
> +  if (Data.empty())
> +    return error(instrprof_error::malformed);
> +
> +  for (unsigned I = 0, E = Data.size(); I < E; ++I) {
> +    // Check for a match and fill the vector if there is one.
> +    if (Data[I].Hash == FuncHash) {
> +      Values = Data[I].ValueSites[ValueKind];
> +      return success();
> +    }
> +  }
> +  return error(instrprof_error::hash_mismatch);
> +}
> +
>  std::error_code
>  IndexedInstrProfReader::readNextRecord(InstrProfRecord &Record) {
>    // Are we out of records?
> Index: lib/ProfileData/InstrProfIndexed.h
> ===================================================================
> --- lib/ProfileData/InstrProfIndexed.h
> +++ lib/ProfileData/InstrProfIndexed.h
> @@ -47,7 +47,7 @@
>  }
>  
>  const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81"
> -const uint64_t Version = 2;
> +const uint64_t Version = 3;
>  const HashT HashType = HashT::MD5;
>  }
>  
> Index: lib/ProfileData/InstrProf.cpp
> ===================================================================
> --- lib/ProfileData/InstrProf.cpp
> +++ lib/ProfileData/InstrProf.cpp
> @@ -50,6 +50,8 @@
>        return "Function count mismatch";
>      case instrprof_error::counter_overflow:
>        return "Counter overflow";
> +    case instrprof_error::value_site_count_mismatch:
> +      return "Function's value site counts mismatch";
>      }
>      llvm_unreachable("A value of instrprof_error has no message.");
>    }
> Index: include/llvm/ProfileData/InstrProfWriter.h
> ===================================================================
> --- include/llvm/ProfileData/InstrProfWriter.h
> +++ include/llvm/ProfileData/InstrProfWriter.h
> @@ -15,33 +15,30 @@
>  #ifndef LLVM_PROFILEDATA_INSTRPROFWRITER_H
>  #define LLVM_PROFILEDATA_INSTRPROFWRITER_H
>  
> -#include "llvm/ADT/ArrayRef.h"
>  #include "llvm/ADT/DenseMap.h"
>  #include "llvm/ADT/StringMap.h"
>  #include "llvm/ProfileData/InstrProf.h"
>  #include "llvm/Support/DataTypes.h"
>  #include "llvm/Support/MemoryBuffer.h"
>  #include "llvm/Support/raw_ostream.h"
> -#include <vector>
>  
>  namespace llvm {
>  
>  /// Writer for instrumentation based profile data.
>  class InstrProfWriter {
>  public:
> -  typedef SmallDenseMap<uint64_t, std::vector<uint64_t>, 1> CounterData;
> +  typedef SmallDenseMap<uint64_t, InstrProfRecord, 1> ProfilingData;
>  private:
> -  StringMap<CounterData> FunctionData;
> +  InstrProfStringTable StringTable;
> +  StringMap<ProfilingData> FunctionData;
>    uint64_t MaxFunctionCount;
>  public:
>    InstrProfWriter() : MaxFunctionCount(0) {}
>  
>    /// Add function counts for the given function. If there are already counts
>    /// for this function and the hash and number of counts match, each counter is
>    /// summed.
> -  std::error_code addFunctionCounts(StringRef FunctionName,
> -                                    uint64_t FunctionHash,
> -                                    ArrayRef<uint64_t> Counters);
> +  std::error_code addRecord(InstrProfRecord &I);

I'm not a fan of dropping const here, but I think it can come back if we
move the updateStringTable method anyway.

>    /// Write the profile to \c OS
>    void write(raw_fd_ostream &OS);
>    /// Write the profile, returning the raw data. For testing.
> Index: include/llvm/ProfileData/InstrProfReader.h
> ===================================================================
> --- include/llvm/ProfileData/InstrProfReader.h
> +++ include/llvm/ProfileData/InstrProfReader.h
> @@ -24,6 +24,7 @@
>  #include "llvm/Support/MemoryBuffer.h"
>  #include "llvm/Support/OnDiskHashTable.h"
>  #include <iterator>
> +#include <map>
>  
>  namespace llvm {
>  
> @@ -145,7 +146,6 @@
>      const uint64_t CountersDelta;
>      const uint64_t NamesDelta;
>    };
> -
>    bool ShouldSwapBytes;
>    uint64_t CountersDelta;
>    uint64_t NamesDelta;
> @@ -195,10 +195,13 @@
>    std::vector<InstrProfRecord> DataBuffer;
>    IndexedInstrProf::HashT HashType;
>    unsigned FormatVersion;
> +  const std::map<uint64_t, const char*> &HashKeyMap;
>  
>  public:
> -  InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
> -      : HashType(HashType), FormatVersion(FormatVersion) {}
> +  InstrProfLookupTrait(IndexedInstrProf::HashT HashType,
> +      unsigned FormatVersion, std::map<uint64_t, const char*> &HashKeyMap)
> +      : HashType(HashType), FormatVersion(FormatVersion),
> +      HashKeyMap(HashKeyMap) {}
>  
>    typedef ArrayRef<InstrProfRecord> data_type;
>  
> @@ -209,6 +212,7 @@
>  
>    static bool EqualKey(StringRef A, StringRef B) { return A == B; }
>    static StringRef GetInternalKey(StringRef K) { return K; }
> +  static StringRef GetExternalKey(StringRef K) { return K; }
>  
>    hash_value_type ComputeHash(StringRef K);
>  
> @@ -224,7 +228,17 @@
>      return StringRef((const char *)D, N);
>    }
>  
> -  data_type ReadData(StringRef K, const unsigned char *D, offset_type N);
> +  data_type ReadDataV1V2(StringRef K, const unsigned char *D, offset_type N);
> +  data_type ReadDataV3(StringRef K, const unsigned char *D, offset_type N);
> +
> +  data_type ReadData(StringRef K, const unsigned char *D, offset_type N) {
> +    switch (FormatVersion) {
> +      case 1:
> +      case 2: return ReadDataV1V2(K, D, N);
> +      case 3: return ReadDataV3(K, D, N);
> +    }
> +    return data_type();
> +  }
>  };
>  
>  typedef OnDiskIterableChainedHashTable<InstrProfLookupTrait>
> @@ -243,6 +257,10 @@
>    uint64_t FormatVersion;
>    /// The maximal execution count among all functions.
>    uint64_t MaxFunctionCount;
> +  /// String table.
> +  InstrProfStringTable StringTable;
> +  /// Map of hash values to const char* keys in profiling data.
> +  std::map<uint64_t, const char*> HashKeyMap;
>  
>    IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
>    IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
> @@ -261,6 +279,13 @@
>    /// Fill Counts with the profile data for the given function name.
>    std::error_code getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
>                                      std::vector<uint64_t> &Counts);
> +
> +  /// Return value profile data for the given function name and hash and
> +  /// value profiling kind
> +  std::error_code getFunctionValuesForKind(StringRef FuncName,
> +      uint64_t FuncHash, uint32_t ValueKind,
> +      std::vector<InstrProfValueSiteRecord> &Values);
> +
>    /// Return the maximum of all known function counts.
>    uint64_t getMaximumFunctionCount() { return MaxFunctionCount; }
>  
> Index: include/llvm/ProfileData/InstrProf.h
> ===================================================================
> --- include/llvm/ProfileData/InstrProf.h
> +++ include/llvm/ProfileData/InstrProf.h
> @@ -16,10 +16,11 @@
>  #ifndef LLVM_PROFILEDATA_INSTRPROF_H_
>  #define LLVM_PROFILEDATA_INSTRPROF_H_
>  
> -#include "llvm/ADT/StringRef.h"
> +#include "llvm/ADT/StringSet.h"
>  #include <cstdint>
> -#include <system_error>
> +#include <list>
>  #include <vector>
> +#include <system_error>
>  
>  namespace llvm {
>  const std::error_category &instrprof_category();
> @@ -37,21 +38,99 @@
>      unknown_function,
>      hash_mismatch,
>      count_mismatch,
> -    counter_overflow
> +    counter_overflow,
> +    value_site_count_mismatch
>  };
>  
>  inline std::error_code make_error_code(instrprof_error E) {
>    return std::error_code(static_cast<int>(E), instrprof_category());
>  }
>  
> +enum instrprof_value_kind : uint32_t {
> +    first = 0,
> +    indirect_call_target = 0,
> +    size = 1
> +};
> +
> +struct InstrProfStringTable {
> +  // Set of string values in profiling data.
> +  StringSet<> StringValueSet;
> +  InstrProfStringTable() { StringValueSet.clear(); }
> +  // Get a pointer to internal storage of a string in set
> +  const char* getStringData(StringRef Str) {
> +    auto Result = StringValueSet.find(Str);
> +    return (Result == StringValueSet.end()) ? nullptr : Result->first().data();
> +  }
> +  // Insert a string to StringTable
> +  const char* insertString(StringRef Str) {
> +    auto Result = StringValueSet.insert(Str);
> +    return Result.first->first().data();
> +  }
> +};

Is this type giving us any value over just using the StringSet<>
directly?

> +
> +struct InstrProfValueSiteRecord {
> +  // Typedef for a single TargetValue-NumTaken pair.
> +  typedef std::pair<uint64_t, uint64_t> ValueDataPair;
> +  // Value profiling data pairs at a given value site.
> +  std::list<ValueDataPair> ValueData;
> +
> +  InstrProfValueSiteRecord() { ValueData.clear(); }
> +  InstrProfValueSiteRecord(const InstrProfValueSiteRecord &Rec)
> +      : ValueData(std::move(Rec.ValueData)) {}
> +
> +  // Sort ValueData ascending by TargetValue
> +  void sortByTargetValues() {
> +    ValueData.sort([](const ValueDataPair &left, const ValueDataPair &right)
> +                { return left.first < right.first; });

clang-format and follow LLVM convention for variable names please.

> +  }
> +  // Sort ValueData descending by NumTaken
> +  void sortByNumTaken() {
> +    ValueData.sort([](const ValueDataPair &left, const ValueDataPair &right)
> +                { return left.second > right.second; });
> +  }

This seems to be unused.

> +  // Merge data from another InstrProfValueSiteRecord
> +  void mergeValueData(InstrProfValueSiteRecord &Input) {
> +    this->sortByTargetValues();
> +    Input.sortByTargetValues();

It seems wasteful to re-sort these all the time. Would it make sense to
sort at read time, or possibly even canonicalize the on-disk order to
already be sorted?

I guess it might make sense for the on-disk order to be sorted by
num-taken if that's what the frontend will want, and since merging is
less common the sort cost is okay. In any case, we should probably set
it up so that the on disk format is always sorted in a particular way -
that will remove some non-determinism and we can use it to speed up
whichever operation we think is more important.

> +    auto I = ValueData.begin();
> +    auto J = Input.ValueData.begin();
> +    while (J != Input.ValueData.end()) {
> +      while (I != ValueData.end() && I->first < J->first)
> +        ++I;
> +      if (I != ValueData.end() && I->first == J->first) {
> +        I->second += J->second;
> +        ++I;
> +        ++J;
> +        continue;
> +      }
> +      ValueData.insert(I, *J);
> +      ++J;
> +    }

I suspect a for-loop would read better here. Also please store the
".end()" values in variables rather than re-evaluating.

> +  }
> +};
> +
>  /// Profiling information for a single function.
>  struct InstrProfRecord {
>    InstrProfRecord() {}
>    InstrProfRecord(StringRef Name, uint64_t Hash, std::vector<uint64_t> Counts)
>        : Name(Name), Hash(Hash), Counts(std::move(Counts)) {}
>    StringRef Name;
>    uint64_t Hash;
>    std::vector<uint64_t> Counts;
> +  // Size of vector indicates the number of value sites for a value kind
> +  std::vector<InstrProfValueSiteRecord> ValueSites[instrprof_value_kind::size];

I don't think we're gaining much by having this be an array - I was
thinking it would be more like

  std::vector<InstrProfValueSiteRecord> IndirectCalls;

Then when we add more value types, they can have their own variables and
be accessed directly. Most of the code that works with these will have a
particular kind in mind, and since the value data is dependent on kind
looping over these isn't generally that useful. That is, the looping we
have now is only in the reader and writer, and I can't see the users of
the data ever doing that.

For the reader and writer, a switch statement over the kinds will allow
us to warn if someone doesn't update somewhere when they add a new
kind. For the users of profile data, Data->IndirectCalls reads a lot
better than Data->ValueSites[instrprof_value_kind::indirect_call_target].  
It really seems like the right trade off to me.

> +  // Clear value data entries
> +  void clearValueData() {
> +    for (uint32_t Kind = instrprof_value_kind::first;
> +         Kind < instrprof_value_kind::size; ++Kind)
> +      ValueSites[Kind].clear();
> +  }

This is never called, it can be removed.

> +  void updateStringTableReferences(InstrProfStringTable &StrTable) {
> +    Name = StrTable.insertString(Name);
> +    for (auto VSite : ValueSites[instrprof_value_kind::indirect_call_target])
> +      for (auto VData : VSite.ValueData)
> +        VData.first = (uint64_t)StrTable.insertString((const char *)VData.first);
> +  }
>  };
>  
>  } // end namespace llvm