[PATCH] LLVM changes for indirect call target profiling support

Tue Jun 2 11:20:48 PDT 2015

> Betul Buyukkurt <betulb at codeaurora.org> writes:
>> New fields are reserved in enum instr_value_prof_kind : uint32_t { ...
} for future value profiling types.
>> http://reviews.llvm.org/D8908
>> Files:
>>   docs/LangRef.rst
>>   include/llvm/IR/IntrinsicInst.h
>>   include/llvm/IR/Intrinsics.td
>>   include/llvm/ProfileData/InstrProf.h
>>   include/llvm/ProfileData/InstrProfReader.h
>>   include/llvm/ProfileData/InstrProfRecord.h
>>   include/llvm/ProfileData/InstrProfWriter.h
>>   lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
>>   lib/ProfileData/InstrProfIndexed.h
>>   lib/ProfileData/InstrProfReader.cpp
>>   lib/ProfileData/InstrProfWriter.cpp
>>   lib/Transforms/Instrumentation/InstrProfiling.cpp
>>   test/Instrumentation/InstrProfiling/linkage.ll
>>   test/Instrumentation/InstrProfiling/platform.ll
>>   test/Instrumentation/InstrProfiling/profiling.ll
>>   tools/llvm-profdata/llvm-profdata.cpp
>>   unittests/ProfileData/CoverageMappingTest.cpp
>>   unittests/ProfileData/InstrProfTest.cpp
>> EMAIL PREFERENCES
>>   http://reviews.llvm.org/settings/panel/emailpreferences/
>> Index: docs/LangRef.rst
>> =================================================================== ---
docs/LangRef.rst
>> +++ docs/LangRef.rst
>> @@ -8064,6 +8064,53 @@
>>  format that can be written out by a compiler runtime and consumed via
the ``llvm-profdata`` tool.
>> +'``llvm.instrprof_instrument_target``' Intrinsic
>> +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>> +
>> +Syntax:
>> +"""""""
>> +
>> +::
>> +
>> +      declare void @llvm.instrprof_instrument_target(i8* <name>, i64
<hash>,
>> +                                                     i32 <value_kind>,
+                                                     i64 <value>, +
                                                 i32 <index>) +
>> +Overview:
>> +"""""""""
>> +
>> +The '``llvm.instrprof_instrument_target``' intrinsic can be emitted by
a
>> +frontend for use with instrumentation based profiling. This will be
+lowered by the ``-instrprof`` pass to find out the target values,
+instrumented expressions take in a program at runtime.
>> +
>> +Arguments:
>> +""""""""""
>> +
>> +The first argument is a pointer to a global variable containing the
+name of the entity being instrumented. ``name`` should generally be the
>> +(mangled) function name for a set of counters.
>> +
>> +The second argument is a hash value that can be used by the consumer
+of the profile data to detect changes to the instrumented source. It +is
an error if ``hash`` differs between two instances of
>> +``instrprof_instrument_target`` that refer to the same name.
>> +
>> +The third argument represents the kind of the value profiling that is
done.
>> +The fourth argument is the value of the expression being profiled. The
last
>> +argument is the index of the instrumented expression within ``name``.
+It should be >= 0.
> These arguments need further explanation.

Not addressed in the latest patch.

>> +
>> +Semantics:
>> +""""""""""
>> +
>> +This intrinsic represents the point where a call to a runtime routine
+should be inserted for profiling of values at target expressions. It will
>> +cause the ``-instrprof`` pass to generate the appropriate data
+structures and the call to the profile runtime library with proper
+arguments.
>> +
>>  Standard C Library Intrinsics
>>  -----------------------------
>> Index: include/llvm/IR/IntrinsicInst.h
>> =================================================================== ---
include/llvm/IR/IntrinsicInst.h
>> +++ include/llvm/IR/IntrinsicInst.h
>> @@ -372,6 +372,38 @@
>>        return cast<ConstantInt>(const_cast<Value *>(getArgOperand(3)));
>>      }
>>    };
>> +
>> +  /// This represents the llvm.instrprof_instrument_target intrinsic.
+  class InstrProfInstrumentTargetInst : public IntrinsicInst { +  public:
>> +    static inline bool classof(const IntrinsicInst *I) {
>> +      return I->getIntrinsicID() ==
>> Intrinsic::instrprof_instrument_target;
>> +    }
>> +    static inline bool classof(const Value *V) {
>> +      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
>> +
>> +    GlobalVariable *getName() const {
>> +      return cast<GlobalVariable>(
>> +          const_cast<Value *>(getArgOperand(0))->stripPointerCasts());
+    }
>> +
>> +    ConstantInt *getHash() const {
>> +      return cast<ConstantInt>(const_cast<Value *>(getArgOperand(1)));
+    }
>> +
>> +    ConstantInt *getValueKind() const {
>> +      return cast<ConstantInt>(const_cast<Value *>(getArgOperand(2)));
+    }
>> +
>> +    Value *getTargetValue() const {
>> +      return cast<Value>(const_cast<Value *>(getArgOperand(3))); +
}
>> +
>> +    ConstantInt *getIndex() const {
>> +      return cast<ConstantInt>(const_cast<Value *>(getArgOperand(4)));
+    }
>> +  };
>>  }
>>  #endif
>> Index: include/llvm/IR/Intrinsics.td
>> =================================================================== ---
include/llvm/IR/Intrinsics.td
>> +++ include/llvm/IR/Intrinsics.td
>> @@ -304,6 +304,13 @@
>>                                           llvm_i32_ty, llvm_i32_ty],
>>                                          []>;
>> +// A call to profile runtime for value profiling of target expressions
+// through instrumentation based profiling.
>> +def int_instrprof_instrument_target : Intrinsic<[],
>> +                                        [llvm_ptr_ty, llvm_i64_ty,
llvm_i32_ty,
>> +                                         llvm_i64_ty, llvm_i32_ty], +
                                      []>;
>> +
>>  //===------------------- Standard C Library Intrinsics
>> --------------------===//
>>  //
>> Index: include/llvm/ProfileData/InstrProf.h
>> =================================================================== ---
include/llvm/ProfileData/InstrProf.h
>> +++ include/llvm/ProfileData/InstrProf.h
>> @@ -16,9 +16,19 @@
>>  #ifndef LLVM_PROFILEDATA_INSTRPROF_H_
>>  #define LLVM_PROFILEDATA_INSTRPROF_H_
>> +#include <cstdint>
>>  #include <system_error>
>>  namespace llvm {
>> +
>> +enum instr_value_prof_kind : uint32_t {
>> +    first = 0,
>> +    indirect_call_target = 0,
>> +    reserved_1 = 1,
>> +    reserved_2 = 2,
>> +    last = 3
>> +};
>> +
>>  const std::error_category &instrprof_category();
>>  enum class instrprof_error {
>> Index: include/llvm/ProfileData/InstrProfReader.h
>> =================================================================== ---
include/llvm/ProfileData/InstrProfReader.h
>> +++ include/llvm/ProfileData/InstrProfReader.h
>> @@ -17,28 +17,21 @@
>>  #include "llvm/ADT/ArrayRef.h"
>>  #include "llvm/ADT/StringExtras.h"
>> -#include "llvm/ProfileData/InstrProf.h"
>> +#include "llvm/ProfileData/InstrProfRecord.h"
>>  #include "llvm/Support/EndianStream.h"
>>  #include "llvm/Support/ErrorOr.h"
>> +#include "llvm/Support/raw_ostream.h"
>>  #include "llvm/Support/LineIterator.h"
>>  #include "llvm/Support/MemoryBuffer.h"
>>  #include "llvm/Support/OnDiskHashTable.h"
>>  #include <iterator>
>> +#include <map>
>> +#include <string>
>>  namespace llvm {
>>  class InstrProfReader;
>> -/// Profiling information for a single function.
>> -struct InstrProfRecord {
>> -  InstrProfRecord() {}
>> -  InstrProfRecord(StringRef Name, uint64_t Hash, ArrayRef<uint64_t>
Counts)
>> -      : Name(Name), Hash(Hash), Counts(Counts) {}
>> -  StringRef Name;
>> -  uint64_t Hash;
>> -  ArrayRef<uint64_t> Counts;
>> -};
>> -
>>  /// A file format agnostic iterator over profiling data.
>>  class InstrProfIterator : public
std::iterator<std::input_iterator_tag,
>>                                                 InstrProfRecord> {
>> @@ -138,7 +131,7 @@
>>  /// that wrote the profile.
>>  template <class IntPtrT>
>>  class RawInstrProfReader : public InstrProfReader {
>> -private:
>> +protected:
>>    /// The profile data file contents.
>>    std::unique_ptr<MemoryBuffer> DataBuffer;
>>    /// The current set of counter values.
>> @@ -159,8 +152,8 @@
>>      const uint64_t CountersDelta;
>>      const uint64_t NamesDelta;
>>    };
>> -
>>    bool ShouldSwapBytes;
>> +  uint64_t FormatVersion;
>>    uint64_t CountersDelta;
>>    uint64_t NamesDelta;
>>    const ProfileData *Data;
>> @@ -171,17 +164,19 @@
>>    RawInstrProfReader(const RawInstrProfReader &) = delete;
>>    RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
>> +
>>  public:
>>    RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
>>        : DataBuffer(std::move(DataBuffer)) { }
>>    static bool hasFormat(const MemoryBuffer &DataBuffer);
>>    std::error_code readHeader() override;
>>    std::error_code readNextRecord(InstrProfRecord &Record) override;
>> -private:
>> +protected:
>>    std::error_code readNextHeader(const char *CurrentPos);
>>    std::error_code readHeader(const RawHeader &Header);
>> +  std::error_code readData(InstrProfRecord &Record);
>>    template <class IntT>
>>    IntT swap(IntT Int) const {
>>      return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int;
>> @@ -199,24 +194,82 @@
>>  typedef RawInstrProfReader<uint32_t> RawInstrProfReader32;
>>  typedef RawInstrProfReader<uint64_t> RawInstrProfReader64;
>> +/// Reader for the raw instrumented value profile binary format from
runtime.
>> +///
>> +/// This format is a raw memory dump of the instrumentation-based
value
>> profiling
>> +/// data from the runtime.  It has no index.
>> +///
>> +/// Templated on the unsigned type whose size matches pointers on the
platform
>> +/// that wrote the profile.
>> +template <class IntPtrT>
>> +class RawInstrValueProfReader : public RawInstrProfReader<IntPtrT> {
> As discussed in the other email, we don't need this class.

Done.

>> +private:
>> +  using Base = RawInstrProfReader<IntPtrT>;
>> +  using RawHeader = struct Base::RawHeader;
>> +  using ProfileData = struct Base::ProfileData;
>> +  using Base::Base;
>> +  using Base::swap;
>> +  using Base::getName;
>> +
>> +  struct ValueData {
>> +    ProfileData Data;
>> +    const IntPtrT FunctionPointer;
>> +    const uint32_t NumValueSites[instr_value_prof_kind::last]; +
const IntPtrT Values[instr_value_prof_kind::last]; // NULL, used
at
>> runtime
>> +  };
>> +  struct ValueHeader {
>> +    RawHeader Header;
>> +    const uint64_t ValuesSize;
>> +    const uint64_t ValuesDelta;
>> +  };
>> +  struct ValueEntry {
>> +    const uint64_t Value;
>> +    const uint64_t NumTaken;
>> +    const uint32_t ValueKind;
>> +    const uint32_t CounterIndex;
>> +    const IntPtrT VDataIndex;
>> +  };
>> +  const ValueData *VData;
>> +  const ValueData *VDataEnd;
>> +  const ValueEntry *CurrentVEntry;
>> +  uint64_t CurrentVDataIndex;
>> +  std::map<const IntPtrT, std::string> FunctionPtrToNameMap;
>> +
>> +  RawInstrValueProfReader(const RawInstrValueProfReader &) = delete; +
 RawInstrValueProfReader &operator=(const RawInstrValueProfReader &)
=
>> delete;
>> +
>> +public:
>> +  RawInstrValueProfReader(std::unique_ptr<MemoryBuffer> DataBuffer) +
    : Base(std::move(DataBuffer)) { }
>> +
>> +  static bool hasVPFormat(const MemoryBuffer &DataBuffer);
>> +  std::error_code readHeader() override;
>> +  std::error_code readNextRecord(InstrProfRecord &Record) override; +
+protected:
>> +  std::error_code readNextHeader(const char *CurrentPos);
>> +  std::error_code readHeader(const ValueHeader &VHeader);
>> +};
>> +
>> +typedef RawInstrValueProfReader<uint32_t> RawInstrValueProfReader32;
+typedef RawInstrValueProfReader<uint64_t> RawInstrValueProfReader64; +
>>  namespace IndexedInstrProf {
>>  enum class HashT : uint32_t;
>>  }
>>  /// Trait for lookups into the on-disk hash table for the binary
>> instrprof
>>  /// format.
>>  class InstrProfLookupTrait {
>> -  std::vector<uint64_t> DataBuffer;
>> +  std::vector<InstrProfRecord> DataBuffer;
> This means we're copying a lot more data around - we need to allocate
std::vectors for each set of counters and for each set of value
> profiling data, rather than not copying any data at all. I'm not sure if
it's a problem in practice, but we should be aware of the tradeoffs here.

Agreed. We need to make sure this does not create any performance issues
later. However, in practice, we have not observed any issues.

>>    IndexedInstrProf::HashT HashType;
>> +  unsigned FormatVersion;
>>  public:
>> -  InstrProfLookupTrait(IndexedInstrProf::HashT HashType) :
>> HashType(HashType) {}
>> +  InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned
FormatVersion)
>> +    : HashType(HashType), FormatVersion(FormatVersion) {}
>> +
>> +  typedef ArrayRef<InstrProfRecord> data_type;
>> -  struct data_type {
>> -    data_type(StringRef Name, ArrayRef<uint64_t> Data)
>> -        : Name(Name), Data(Data) {}
>> -    StringRef Name;
>> -    ArrayRef<uint64_t> Data;
>> -  };
>>    typedef StringRef internal_key_type;
>>    typedef StringRef external_key_type;
>>    typedef uint64_t hash_value_type;
>> @@ -240,21 +293,110 @@
>>    }
>>    data_type ReadData(StringRef K, const unsigned char *D, offset_type
>> N) {
>> -    DataBuffer.clear();
>> +    switch (FormatVersion) {
>> +      case 1:
>> +      case 2: return ReadDataV1V2(K, D, N);
>> +      case 3: return ReadDataV3(K, D, N);
>> +    }
>> +    return data_type();
>> +  }
>> +
>> +  data_type ReadDataV1V2(StringRef K, const unsigned char *D,
>> offset_type N) {
> These are too complex to leave in the header. Please move them to
InstrProfReader.cpp.

Done.

>> +    // Check if the data is corrupt. If so, don't try to read it.
>>      if (N % sizeof(uint64_t))
>> -      // The data is corrupt, don't try to read it.
>> -      return data_type("", DataBuffer);
>> +      return data_type();
>> +
>> +    DataBuffer.clear();
>> +    uint64_t NumCounts;
>> +    uint64_t NumEntries = N / sizeof(uint64_t);
>> +    std::vector<uint64_t> CounterBuffer;
>> +    for (uint64_t I = 0; I < NumEntries; I += NumCounts) {
>> +      using namespace support;
>> +      // The function hash comes first.
>> +      uint64_t Hash = endian::readNext<uint64_t, little,
unaligned>(D);
>> +
>> +      if (++I >= NumEntries)
>> +        return data_type();
>> +
>> +      // In v1, we have at least one count.
>> +      // Later, we have the number of counts.
>> +      NumCounts = (1 == FormatVersion) ? NumEntries - I :
>> +        endian::readNext<uint64_t, little, unaligned>(D);
>> +      if (1 != FormatVersion)
>> +        ++I;
>> +
>> +      // If we have more counts than data, this is bogus.
>> +      if (I + NumCounts > NumEntries)
>> +        return data_type();
>> +
>> +      CounterBuffer.clear();
>> +      for (unsigned J = 0; J < NumCounts; ++J)
>> +        CounterBuffer.push_back(endian::readNext<uint64_t, little,
unaligned>(D));
>> +
>> +      DataBuffer.push_back(InstrProfRecord(K, Hash, CounterBuffer)); +
   }
>> +    return DataBuffer;
>> +  }
>> +  data_type ReadDataV3(StringRef K, const unsigned char *D,
offset_type
>> N) {
>>      using namespace support;
>> -    // We just treat the data as opaque here. It's simpler to handle
in
>> -    // IndexedInstrProfReader.
>> -    unsigned NumEntries = N / sizeof(uint64_t);
>> -    DataBuffer.reserve(NumEntries);
>> -    for (unsigned I = 0; I < NumEntries; ++I)
>> -      DataBuffer.push_back(endian::readNext<uint64_t, little,
>> unaligned>(D));
>> -    return data_type(K, DataBuffer);
>> +    const unsigned char *Start = D;
>> +    if (D - Start + sizeof (uint64_t) > N)
>> +      return data_type();
>> +    uint64_t DataBufferSize = endian::readNext<uint64_t, little,
unaligned>(D);
>> +    DataBuffer.clear();
>> +    DataBuffer.reserve(DataBufferSize);
>> +    std::vector<uint64_t> CounterBuffer;
>> +    std::vector<InstrProfValueRecord> ValueBuffer;
>> +    for (unsigned I = 0; I < DataBufferSize; ++I) {
>> +      if (D - Start + sizeof (uint64_t) + sizeof(uint32_t) >= N) +
   return data_type();
>> +      uint64_t Hash = endian::readNext<uint64_t, little,
unaligned>(D);
>> +      uint32_t CountsSize = endian::readNext<uint32_t, little,
unaligned>(D);
>> +      if (D - Start + CountsSize * sizeof(uint64_t) >= N)
>> +        return data_type();
>> +      CounterBuffer.clear();
>> +      for (unsigned I = 0; I < CountsSize; ++I)
>> +        CounterBuffer.push_back(endian::readNext<uint64_t, little,
unaligned>(D));
>> +
>> +      DataBuffer.push_back(InstrProfRecord(K, Hash, CounterBuffer)); +
+      for (uint32_t Kind = instr_value_prof_kind::first;
>> +           Kind < instr_value_prof_kind::last; ++Kind) {
>> +        if (D - Start + 2 * sizeof(uint32_t) > N)
>> +          return data_type();
>> +        uint32_t ValueKind = endian::readNext<uint32_t, little,
unaligned>(D);
>> +        if (ValueKind != Kind)
>> +          return data_type();
>> +        uint32_t ValuesSize = endian::readNext<uint32_t, little,
unaligned>(D);
>> +        ValueBuffer.clear();
>> +        for (unsigned I = 0; I < ValuesSize; ++I) {
>> +          if (D - Start + sizeof(uint32_t) >= N) return data_type(); +
         uint32_t NameSize = endian::readNext<uint32_t, little,
unaligned>(D);
>> +          if (D - Start + NameSize * sizeof(char)>= N)
>> +            return data_type();
>> +
>> +          std::string Name((const char *)D, NameSize);
>> +          D += NameSize;
>> +
>> +          if (D - Start + 2 * sizeof(uint64_t) >= N)
>> +            return data_type();
>> +          uint64_t Value = endian::readNext<uint64_t, little,
>> unaligned>(D);
>> +          uint64_t NumTaken = endian::readNext<uint64_t, little,
unaligned>(D);
>> +
>> +          if (D - Start + sizeof(uint32_t) > N)
>> +            return data_type();
>> +          uint32_t CounterIndex = endian::readNext<uint32_t, little,
unaligned>(D);
>> +          ValueBuffer.push_back(
>> +            InstrProfValueRecord(Name, Value, NumTaken,
CounterIndex));
>> +        }
>> +        DataBuffer[DataBuffer.size()-1].Values[Kind] = ValueBuffer; +
    }
> I guess this'll be simpler without having to deal with the kinds, but
there's also a lot of "D - Start + ..." math in here that I think would be
easier to understand with a few more named variables.

Not addressed in the latest patch.

>> +    }
>> +    return DataBuffer;
>>    }
>>  };
>> +
>>  typedef OnDiskIterableChainedHashTable<InstrProfLookupTrait>
>>      InstrProfReaderIndex;
>> @@ -267,8 +409,6 @@
>>    std::unique_ptr<InstrProfReaderIndex> Index;
>>    /// Iterator over the profile data.
>>    InstrProfReaderIndex::data_iterator RecordIterator;
> This name is a bit confusing now - it iterates over arrays of records,
not individual ones.

Not addressed in the latest patch.

>> -  /// Offset into our current data set.
>> -  size_t CurrentOffset;
>>    /// The file format version of the profile data.
>>    uint64_t FormatVersion;
>>    /// The maximal execution count among all functions.
>> @@ -278,7 +418,7 @@
>>    IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) =
>> delete;
>>  public:
>>    IndexedInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
>> -      : DataBuffer(std::move(DataBuffer)), Index(nullptr),
>> CurrentOffset(0) {}
>> +      : DataBuffer(std::move(DataBuffer)), Index(nullptr) {}
>>    /// Return true if the given buffer is in an indexed instrprof
>> format.
>>    static bool hasFormat(const MemoryBuffer &DataBuffer);
>> @@ -288,9 +428,16 @@
>>    /// Read a single record.
>>    std::error_code readNextRecord(InstrProfRecord &Record) override;
>> -  /// Fill Counts with the profile data for the given function name. +
 /// Return profile data for the given function name and hash
>>    std::error_code getFunctionCounts(StringRef FuncName, uint64_t
>> FuncHash,
>> -                                    std::vector<uint64_t> &Counts); +
   std::vector<uint64_t> &Counts);
> It really does fill "Counts". It doesn't return anything, so the old doc
comment was better.

Done.

> Also the formatting is funny here and for the function below -
> clang-format will fix that for you.

Not addressed in the latest patch.

>> +
>> +  /// Return value profile data for the given function name and hash
and
>> +  /// value profiling kind
>> +  std::error_code getFunctionValuesForKind(StringRef FuncName, +
uint64_t FuncHash, uint32_t ValueKind,
>> +     std::vector<InstrProfValueRecord> &Values);
>> +
>>    /// Return the maximum of all known function counts.
>>    uint64_t getMaximumFunctionCount() { return MaxFunctionCount; }
>> Index: include/llvm/ProfileData/InstrProfRecord.h
>> =================================================================== ---
/dev/null
>> +++ include/llvm/ProfileData/InstrProfRecord.h
>> @@ -0,0 +1,67 @@
>> +//=-- InstrProfRecord.h - Instrumented profiling format support ---*-
C++ -*-=//
>> +//
>> +//                     The LLVM Compiler Infrastructure
>> +//
>> +// This file is distributed under the University of Illinois Open
Source
>> +// License. See LICENSE.TXT for details.
>> +//
>> +//===----------------------------------------------------------------------===//
+//
>> +// Instrumentation-based profiling data is generated by instrumented
+// binaries through library functions in compiler-rt, and read by the clang
>> +// frontend to feed PGO.
>> +//
>> +//===----------------------------------------------------------------------===//
+
>> +#ifndef LLVM_PROFILEDATA_INSTRPROFRECORD_H_
>> +#define LLVM_PROFILEDATA_INSTRPROFRECORD_H_
> We don't need this header - just put these definitions in InstrProf.h

Done.

>> +
>> +#include "llvm/ADT/SmallString.h"
>> +#include "llvm/ProfileData/InstrProf.h"
>> +
>> +namespace llvm {
>> +
>> +// Profiling information for a single target value
>> +struct InstrProfValueRecord {
>> +  InstrProfValueRecord() {};
>> +  InstrProfValueRecord(uint64_t Value, uint64_t NumTaken, uint32_t
CounterIndex)
>> +    : Value(Value), NumTaken(NumTaken), CounterIndex(CounterIndex) {}
+  InstrProfValueRecord(StringRef Name, uint64_t NumTaken, uint32_t
CounterIndex)
>> +    : Name(Name), NumTaken(NumTaken), CounterIndex(CounterIndex) {}
> What are these two constructors that don't initialize all of the fields
for? If we use them we're just asking for memory problems.

Done.

>> +  InstrProfValueRecord(StringRef Name, uint64_t Value, uint64_t
NumTaken,
>> +    uint32_t CounterIndex) : Name(Name), Value(Value),
>> NumTaken(NumTaken),
>> +    CounterIndex(CounterIndex) {}
>> +  SmallString<32> Name;
>> +  uint64_t Value;
>> +  uint64_t NumTaken;
>> +  uint32_t CounterIndex;
>> +};
>> +
>> +/// Profiling information for a single function.
>> +struct InstrProfRecord {
>> +  InstrProfRecord() {}
>> +  InstrProfRecord(StringRef Name, uint64_t Hash, ArrayRef<uint64_t>
Counts)
>> +    : Name(Name), Hash(Hash), Counts(Counts) {}
>> +  void clearValues() {
>> +    uint32_t i = instr_value_prof_kind::first;
>> +    for ( ; i < instr_value_prof_kind::last; ++i)
>> +      Values[i].clear();
>> +  }
>> +  StringRef Name;
>> +  uint64_t Hash;
>> +  std::vector<uint64_t> Counts;
>> +  std::vector<InstrProfValueRecord>
>> Values[instr_value_prof_kind::last];
>> +};
>> +
>> +/// Profiling information for a single function for outputting through
the
>> +/// IndexedInstrProfWriter
>> +struct IndexedInstrProfRecord {
> This only used in the writer - better to define it there locally.

Done.

>> +  IndexedInstrProfRecord() {}
>> +  IndexedInstrProfRecord(ArrayRef<uint64_t> Counts) : Counts(Counts)
{}
>> +  std::vector<uint64_t> Counts;
>> +  std::vector<InstrProfValueRecord>
>> Values[instr_value_prof_kind::last];
>> +};
>> +
>> +} // end namespace llvm
>> +
>> +#endif // LLVM_PROFILEDATA_INSTRPROFRECORD_H_
>> Index: include/llvm/ProfileData/InstrProfWriter.h
>> =================================================================== ---
include/llvm/ProfileData/InstrProfWriter.h
>> +++ include/llvm/ProfileData/InstrProfWriter.h
>> @@ -19,6 +19,7 @@
>>  #include "llvm/ADT/DenseMap.h"
>>  #include "llvm/ADT/StringMap.h"
>>  #include "llvm/ProfileData/InstrProf.h"
>> +#include "llvm/ProfileData/InstrProfRecord.h"
>>  #include "llvm/Support/DataTypes.h"
>>  #include "llvm/Support/MemoryBuffer.h"
>>  #include "llvm/Support/raw_ostream.h"
>> @@ -29,20 +30,18 @@
>>  /// Writer for instrumentation based profile data.
>>  class InstrProfWriter {
>>  public:
>> -  typedef SmallDenseMap<uint64_t, std::vector<uint64_t>, 1>
>> CounterData;
>> +  typedef SmallDenseMap<uint64_t, IndexedInstrProfRecord, 1>
>> ProfilingData;
>>  private:
>> -  StringMap<CounterData> FunctionData;
>> +  StringMap<ProfilingData> FunctionData;
>>    uint64_t MaxFunctionCount;
>>  public:
>>    InstrProfWriter() : MaxFunctionCount(0) {}
>>    /// Add function counts for the given function. If there are already
>> counts
>>    /// for this function and the hash and number of counts match, each
>> counter is
>>    /// summed.
>> -  std::error_code addFunctionCounts(StringRef FunctionName,
>> -                                    uint64_t FunctionHash,
>> -                                    ArrayRef<uint64_t> Counters); -
/// Write the profile to \c OS
>> +  std::error_code addFunctionCounts(const InstrProfRecord &I);
> Might as well rename this to addRecord - it does more than function
counts now.

Done.

>> +  /// Ensure that all data is written to disk.
>>    void write(raw_fd_ostream &OS);
>>    /// Write the profile, returning the raw data. For testing.
>>    std::unique_ptr<MemoryBuffer> writeBuffer();
>> Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
>> =================================================================== ---
lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
>> +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
>> @@ -4896,7 +4896,8 @@
>>    }
>>    case Intrinsic::instrprof_increment:
>>      llvm_unreachable("instrprof failed to lower an increment");
>> -
>> +  case Intrinsic::instrprof_instrument_target:
>> +    llvm_unreachable("instrprof failed to lower an value profiling
call");
>>    case Intrinsic::frameescape: {
>>      MachineFunction &MF = DAG.getMachineFunction();
>>      const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
>> Index: lib/ProfileData/InstrProfIndexed.h
>> =================================================================== ---
lib/ProfileData/InstrProfIndexed.h
>> +++ lib/ProfileData/InstrProfIndexed.h
>> @@ -47,7 +47,7 @@
>>  }
>>  const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81"
>> -const uint64_t Version = 2;
>> +const uint64_t Version = 3;
>>  const HashT HashType = HashT::MD5;
>>  }
>> Index: lib/ProfileData/InstrProfReader.cpp
>> =================================================================== ---
lib/ProfileData/InstrProfReader.cpp
>> +++ lib/ProfileData/InstrProfReader.cpp
>> @@ -52,6 +52,10 @@
>>    // Create the reader.
>>    if (IndexedInstrProfReader::hasFormat(*Buffer))
>>      Result.reset(new IndexedInstrProfReader(std::move(Buffer)));
>> +  else if (RawInstrValueProfReader64::hasVPFormat(*Buffer))
>> +    Result.reset(new RawInstrValueProfReader64(std::move(Buffer))); +
else if (RawInstrValueProfReader32::hasVPFormat(*Buffer))
>> +    Result.reset(new RawInstrValueProfReader32(std::move(Buffer)));
>>    else if (RawInstrProfReader64::hasFormat(*Buffer))
>>      Result.reset(new RawInstrProfReader64(std::move(Buffer)));
>>    else if (RawInstrProfReader32::hasFormat(*Buffer))
>> @@ -183,8 +187,6 @@
>>  template <class IntPtrT>
>>  std::error_code RawInstrProfReader<IntPtrT>::readHeader() {
>> -  if (!hasFormat(*DataBuffer))
>> -    return error(instrprof_error::bad_magic);
>>    if (DataBuffer->getBufferSize() < sizeof(RawHeader))
>>      return error(instrprof_error::bad_header);
>>    auto *Header =
>> @@ -256,11 +258,7 @@
>>  template <class IntPtrT>
>>  std::error_code
>> -RawInstrProfReader<IntPtrT>::readNextRecord(InstrProfRecord &Record) {
-  if (Data == DataEnd)
>> -    if (std::error_code EC = readNextHeader(ProfileEnd))
>> -      return EC;
>> -
>> +RawInstrProfReader<IntPtrT>::readData(InstrProfRecord &Record) {
>>    // Get the raw data.
>>    StringRef RawName(getName(Data->NamePtr), swap(Data->NameSize));
uint32_t NumCounters = swap(Data->NumCounters);
>> @@ -288,6 +286,20 @@
>>    } else
>>      Record.Counts = RawCounts;
>> +  return success();
>> +}
>> +
>> +template <class IntPtrT>
>> +std::error_code
>> +RawInstrProfReader<IntPtrT>::readNextRecord(InstrProfRecord &Record) {
+
>> +  if (Data == DataEnd)
>> +    if (std::error_code EC = readNextHeader(ProfileEnd))
>> +      return EC;
>> +
>> +  if (std::error_code EC = readData(Record))
>> +    return EC;
>> +
>>    // Iterate.
>>    ++Data;
>>    return success();
>> @@ -298,6 +310,174 @@
>>  template class RawInstrProfReader<uint64_t>;
>>  }
>> +template <class IntPtrT>
>> +static uint64_t getRawVPMagic();
>> +
>> +template <>
>> +uint64_t getRawVPMagic<uint64_t>() {
>> +  return
>> +    uint64_t(255) << 56 |
>> +    uint64_t('v') << 48 |
>> +    uint64_t('p') << 40 |
>> +    uint64_t('r') << 32 |
>> +    uint64_t('o') << 24 |
>> +    uint64_t('f') << 16 |
>> +    uint64_t('r') <<  8 |
>> +    uint64_t(129);
>> +}
>> +
>> +template <>
>> +uint64_t getRawVPMagic<uint32_t>() {
>> +  return
>> +    uint64_t(255) << 56 |
>> +    uint64_t('v') << 48 |
>> +    uint64_t('p') << 40 |
>> +    uint64_t('r') << 32 |
>> +    uint64_t('o') << 24 |
>> +    uint64_t('f') << 16 |
>> +    uint64_t('R') <<  8 |
>> +    uint64_t(129);
>> +}
>> +
>> +template <class IntPtrT>
>> +bool RawInstrValueProfReader<IntPtrT>::hasVPFormat(
>> +  const MemoryBuffer &DataBuffer) {
>> +
>> +  if (DataBuffer.getBufferSize() < sizeof(uint64_t))
>> +    return false;
>> +  uint64_t Magic =
>> +    *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart());
+  return getRawVPMagic<IntPtrT>() == Magic ||
>> +    sys::getSwappedBytes(getRawVPMagic<IntPtrT>()) == Magic;
>> +}
>> +
>> +template <class IntPtrT>
>> +std::error_code RawInstrValueProfReader<IntPtrT>::readHeader() { +  if
(Base::DataBuffer->getBufferSize() < sizeof(ValueHeader)) +    return
Base::error(instrprof_error::bad_header);
>> +  auto *VHeader =
>> +    reinterpret_cast<const ValueHeader
>> *>(Base::DataBuffer->getBufferStart());
>> +  Base::ShouldSwapBytes = VHeader->Header.Magic !=
>> getRawVPMagic<IntPtrT>();
>> +  return readHeader(*VHeader);
>> +}
>> +
>> +template <class IntPtrT>
>> +std::error_code
>> +RawInstrValueProfReader<IntPtrT>::readNextHeader(const char
>> *CurrentPos) {
>> +  const char *End = Base::DataBuffer->getBufferEnd();
>> +  // Skip zero padding between profiles.
>> +  while (CurrentPos != End && *CurrentPos == 0)
>> +    ++CurrentPos;
>> +  // If there's nothing left, we're done.
>> +  if (CurrentPos == End)
>> +    return instrprof_error::eof;
>> +  // If there isn't enough space for another header, this is probably
just
>> +  // garbage at the end of the file.
>> +  if (CurrentPos + sizeof(ValueHeader) > End)
>> +    return instrprof_error::malformed;
>> +  // The writer ensures each profile is padded to start at an aligned
address.
>> +  if (reinterpret_cast<size_t>(CurrentPos) % alignOf<uint64_t>()) +
return instrprof_error::malformed;
>> +  // The magic should have the same byte order as in the previous
header.
>> +  uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos); +
if (Magic != swap(getRawVPMagic<IntPtrT>()))
>> +    return instrprof_error::bad_magic;
>> +
>> +  // There's another profile to read, so we need to process the
header.
>> +  auto *VHeader = reinterpret_cast<const ValueHeader *>(CurrentPos); +
 return readHeader(*VHeader);
>> +}
>> +
>> +template <class IntPtrT>
>> +std::error_code
>> +RawInstrValueProfReader<IntPtrT>::readHeader(const ValueHeader
&VHeader) {
>> +  const RawHeader &Header = VHeader.Header;
>> +  Base::CountersDelta = swap(Header.CountersDelta);
>> +  Base::NamesDelta = swap(Header.NamesDelta);
>> +  auto DataSize = swap(Header.DataSize);
>> +  auto CountersSize = swap(Header.CountersSize);
>> +  auto NamesSize = swap(Header.NamesSize);
>> +  auto PaddingSize = sizeof(uint64_t) - NamesSize % sizeof(uint64_t);
+  auto ValuesSize = swap(VHeader.ValuesSize);
>> +  auto ValuesSizeInBytes = ValuesSize * sizeof(ValueEntry);
>> +
>> +  ptrdiff_t DataOffset = sizeof(ValueHeader);
>> +  ptrdiff_t CountersOffset = DataOffset + sizeof(ValueData) *
DataSize;
>> +  ptrdiff_t NamesOffset = CountersOffset + sizeof(uint64_t) *
>> CountersSize;
>> +  ptrdiff_t ValuesOffset = NamesOffset +
>> +    sizeof(char) * (NamesSize + PaddingSize);
>> +  size_t ProfileSize = ValuesOffset + ValuesSizeInBytes;
>> +
>> +  auto *Start = reinterpret_cast<const char *>(&VHeader);
>> +  if (Start + ProfileSize > Base::DataBuffer->getBufferEnd())
>> +    return Base::error(instrprof_error::bad_header);
>> +
>> +  CurrentVDataIndex = 0;
>> +  VData = reinterpret_cast<const ValueData *>(Start + DataOffset); +
VDataEnd = VData + DataSize;
>> +  Base::CountersStart =
>> +    reinterpret_cast<const uint64_t *>(Start + CountersOffset); +
Base::NamesStart = Start + NamesOffset;
>> +  CurrentVEntry = reinterpret_cast<const ValueEntry *>(Start +
ValuesOffset);
>> +  Base::ProfileEnd = Start + ProfileSize;
>> +
>> +  FunctionPtrToNameMap.clear();
>> +  for (const ValueData *I = VData; I != VDataEnd; ++I) {
>> +    std::string RawFunctionName(getName(I->Data.NamePtr),
>> swap(I->Data.NameSize));
>> +    FunctionPtrToNameMap.insert(std::pair<const IntPtrT, std::string>
+      (swap(I->FunctionPointer), RawFunctionName));
>> +  }
>> +  return Base::success();
>> +}
>> +
>> +template <class IntPtrT>
>> +std::error_code
>> +RawInstrValueProfReader<IntPtrT>::readNextRecord(InstrProfRecord
&Record) {
>> +  if (VData == VDataEnd)
>> +    if (std::error_code EC = readNextHeader(Base::ProfileEnd)) +
return EC;
>> +
>> +  Base::Data = &(VData->Data);
>> +  if (std::error_code EC = Base::readData(Record))
>> +    if (EC != Base::success())
>> +      return EC;
>> +
>> +  // Read value profiling data.
>> +  auto *BufferEnd = Base::DataBuffer->getBufferEnd();
>> +  Record.clearValues();
>> +  while ((const char*)(CurrentVEntry+1) <= BufferEnd) {
>> +    if (CurrentVEntry->VDataIndex != CurrentVDataIndex)
>> +      break;
>> +    std::string TargetFunctionName;
>> +    if (CurrentVEntry->ValueKind ==
>> +        instr_value_prof_kind::indirect_call_target) {
>> +      typename std::map<const IntPtrT, std::string>::iterator I = +
    FunctionPtrToNameMap.find(swap(CurrentVEntry->Value)); +      if (I
== FunctionPtrToNameMap.end()) {
>> +        ++CurrentVEntry;
>> +        continue;
>> +      }
>> +      TargetFunctionName = I->second;
>> +    }
>> +    InstrProfValueRecord NewVEntry;
>> +    NewVEntry.Name = TargetFunctionName;
>> +    NewVEntry.Value = swap(CurrentVEntry->Value);
>> +    NewVEntry.NumTaken = swap(CurrentVEntry->NumTaken);
>> +    NewVEntry.CounterIndex = swap(CurrentVEntry->CounterIndex); +
Record.Values[CurrentVEntry->ValueKind].push_back(NewVEntry); +   
++CurrentVEntry;
>> +  }
>> +  // Iterate.
>> +  ++CurrentVDataIndex;
>> +  ++VData;
>> +  return Base::success();
>> +}
>> +
>> +namespace llvm {
>> +template class RawInstrValueProfReader<uint32_t>;
>> +template class RawInstrValueProfReader<uint64_t>;
>> +}
>> +
>>  InstrProfLookupTrait::hash_value_type
>>  InstrProfLookupTrait::ComputeHash(StringRef K) {
>>    return IndexedInstrProf::ComputeHash(HashType, K);
>> @@ -340,38 +520,55 @@
>>    if (HashType > IndexedInstrProf::HashT::Last)
>>      return error(instrprof_error::unsupported_hash_type);
>>    uint64_t HashOffset = endian::readNext<uint64_t, little,
>> unaligned>(Cur);
>> -
>>    // The rest of the file is an on disk hash table.
>>    Index.reset(InstrProfReaderIndex::Create(Start + HashOffset, Cur,
>> Start,
>> -
>> InstrProfLookupTrait(HashType)));
>> +      InstrProfLookupTrait(HashType, FormatVersion)));
>>    // Set up our iterator for readNextRecord.
>>    RecordIterator = Index->data_begin();
>>    return success();
>>  }
>>  std::error_code IndexedInstrProfReader::getFunctionCounts(
>> -    StringRef FuncName, uint64_t FuncHash, std::vector<uint64_t>
&Counts) {
>> +  StringRef FuncName, uint64_t FuncHash,
>> +  std::vector<uint64_t> &Counts) {
>> +
>>    auto Iter = Index->find(FuncName);
>>    if (Iter == Index->end())
>>      return error(instrprof_error::unknown_function);
>>    // Found it. Look for counters with the right hash.
>> -  ArrayRef<uint64_t> Data = (*Iter).Data;
>> -  uint64_t NumCounts;
>> -  for (uint64_t I = 0, E = Data.size(); I != E; I += NumCounts) { -
// The function hash comes first.
>> -    uint64_t FoundHash = Data[I++];
>> -    // In v1, we have at least one count. Later, we have the number of
counts.
>> -    if (I == E)
>> -      return error(instrprof_error::malformed);
>> -    NumCounts = FormatVersion == 1 ? E - I : Data[I++];
>> -    // If we have more counts than data, this is bogus.
>> -    if (I + NumCounts > E)
>> -      return error(instrprof_error::malformed);
>> +  ArrayRef<InstrProfRecord> Data = (*Iter);
>> +  if (Data.empty())
>> +    return error(instrprof_error::malformed);
>> +
>> +  for (unsigned I = 0, E = Data.size(); I < E; ++I) {
>> +    // Check for a match and fill the vector if there is one.
>> +    if (Data[I].Hash == FuncHash) {
>> +      Counts = Data[I].Counts;
>> +      return success();
>> +    }
>> +  }
>> +  return error(instrprof_error::hash_mismatch);
>> +}
>> +
>> +std::error_code IndexedInstrProfReader::getFunctionValuesForKind( +
StringRef FuncName, uint64_t FuncHash, uint32_t ValueKind,
>> +  std::vector<InstrProfValueRecord> &Values) {
>> +
>> +  auto Iter = Index->find(FuncName);
>> +  if (Iter == Index->end())
>> +    return error(instrprof_error::unknown_function);
>> +
>> +  // Found it. Look for counters with the right hash.
>> +  ArrayRef<InstrProfRecord> Data = (*Iter);
>> +  if (Data.empty())
>> +    return error(instrprof_error::malformed);
>> +
>> +  for (unsigned I = 0, E = Data.size(); I < E; ++I) {
>>      // Check for a match and fill the vector if there is one.
>> -    if (FoundHash == FuncHash) {
>> -      Counts = Data.slice(I, NumCounts);
>> +    if (Data[I].Hash == FuncHash) {
>> +      Values = Data[I].Values[ValueKind];
>>        return success();
>>      }
>>    }
>> @@ -384,30 +581,14 @@
>>    if (RecordIterator == Index->data_end())
>>      return error(instrprof_error::eof);
>> -  // Record the current function name.
>> -  Record.Name = (*RecordIterator).Name;
>> -
>> -  ArrayRef<uint64_t> Data = (*RecordIterator).Data;
>> -  // Valid data starts with a hash and either a count or the number of
counts.
>> -  if (CurrentOffset + 1 > Data.size())
>> -    return error(instrprof_error::malformed);
>> -  // First we have a function hash.
>> -  Record.Hash = Data[CurrentOffset++];
>> -  // In version 1 we knew the number of counters implicitly, but in
newer
>> -  // versions we store the number of counters next.
>> -  uint64_t NumCounts =
>> -      FormatVersion == 1 ? Data.size() - CurrentOffset :
>> Data[CurrentOffset++];
>> -  if (CurrentOffset + NumCounts > Data.size())
>> +  if ((*RecordIterator).empty())
>>      return error(instrprof_error::malformed);
>> -  // And finally the counts themselves.
>> -  Record.Counts = Data.slice(CurrentOffset, NumCounts);
>> -  // If we've exhausted this function's data, increment the record. -
CurrentOffset += NumCounts;
>> -  if (CurrentOffset == Data.size()) {
>> +  static unsigned RecordIndex = 0;
>> +  Record = (*RecordIterator)[RecordIndex++];
>> +  if (RecordIndex >= (*RecordIterator).size()) {
> There're a lot of uses of (*RecordIterator) in here, which is awkward
for reading - a named variable would be nice.

Done.

>>      ++RecordIterator;
>> -    CurrentOffset = 0;
>> +    RecordIndex = 0;
>>    }
>> -
>>    return success();
>>  }
>> Index: lib/ProfileData/InstrProfWriter.cpp
>> =================================================================== ---
lib/ProfileData/InstrProfWriter.cpp
>> +++ lib/ProfileData/InstrProfWriter.cpp
>> @@ -26,8 +26,8 @@
>>    typedef StringRef key_type;
>>    typedef StringRef key_type_ref;
>> -  typedef const InstrProfWriter::CounterData *const data_type; -
typedef const InstrProfWriter::CounterData *const data_type_ref; + 
typedef const InstrProfWriter::ProfilingData *const data_type; +
typedef const InstrProfWriter::ProfilingData *const data_type_ref;
>>    typedef uint64_t hash_value_type;
>>    typedef uint64_t offset_type;
>> @@ -44,9 +44,25 @@
>>      offset_type N = K.size();
>>      LE.write<offset_type>(N);
>> -    offset_type M = 0;
>> -    for (const auto &Counts : *V)
>> -      M += (2 + Counts.second.size()) * sizeof(uint64_t);
>> +    offset_type M = sizeof(uint64_t); // size of ProfileData
>> +    for (const auto &ProfileData : *V) {
>> +      M += sizeof(uint64_t); // size of ProfileData.first
>> +      M += sizeof(uint32_t); // size of
>> ProfileData.second.Counts.size()
>> +      M += ProfileData.second.Counts.size() * sizeof(uint64_t); +
for (uint32_t Kind = instr_value_prof_kind::first;
>> +           Kind < instr_value_prof_kind::last; ++Kind) {
>> +        M += sizeof(uint32_t); // size of Kind
>> +        M += sizeof(uint32_t); // size of
>> ProfileData.second.Values[Kind].size()
>> +        for (InstrProfValueRecord I : ProfileData.second.Values[Kind])
{
>> +          M += sizeof(uint32_t);  // size of I.Name
>> +          M += I.Name.str().size() * sizeof(char);
>> +          M += sizeof(uint64_t);  // size of I.Value
>> +          M += sizeof(uint64_t);  // size of I.NumTaken
>> +          M += sizeof(uint32_t);  // size of I.CounterIndex
>> +        }
>> +      }
>> +    }
>> +
>>      LE.write<offset_type>(M);
>>      return std::make_pair(N, M);
>> @@ -61,51 +77,98 @@
>>      using namespace llvm::support;
>>      endian::Writer<little> LE(Out);
>> -    for (const auto &Counts : *V) {
>> -      LE.write<uint64_t>(Counts.first);
>> -      LE.write<uint64_t>(Counts.second.size());
>> -      for (uint64_t I : Counts.second)
>> +    LE.write<uint64_t>(V->size());
>> +    for (const auto &ProfileData : *V) {
>> +      LE.write<uint64_t>(ProfileData.first);
>> +      LE.write<uint32_t>(ProfileData.second.Counts.size());
>> +      for (uint64_t I : ProfileData.second.Counts)
>>          LE.write<uint64_t>(I);
>> +      for (uint32_t Kind = instr_value_prof_kind::first;
>> +           Kind < instr_value_prof_kind::last; ++Kind) {
>> +        LE.write<uint32_t>(Kind);
>> +        LE.write<uint32_t>(ProfileData.second.Values[Kind].size()); +
      for (InstrProfValueRecord I : ProfileData.second.Values[Kind]) {
>> +          LE.write<uint32_t>(I.Name.str().size());
>> +          Out.write(I.Name.c_str(), I.Name.str().size());
>> +          LE.write<uint64_t>(I.Value);
>> +          LE.write<uint64_t>(I.NumTaken);
>> +          LE.write<uint32_t>(I.CounterIndex);
>> +        }
>> +      }
>>      }
>>    }
>>  };
>>  }
>> -std::error_code
>> -InstrProfWriter::addFunctionCounts(StringRef FunctionName,
>> -                                   uint64_t FunctionHash,
>> -                                   ArrayRef<uint64_t> Counters) { -
auto &CounterData = FunctionData[FunctionName];
>> -
>> -  auto Where = CounterData.find(FunctionHash);
>> -  if (Where == CounterData.end()) {
>> -    // We've never seen a function with this name and hash, add it. -
  CounterData[FunctionHash] = Counters;
>> -    // We keep track of the max function count as we go for
simplicity.
>> -    if (Counters[0] > MaxFunctionCount)
>> -      MaxFunctionCount = Counters[0];
>> -    return instrprof_error::success;
>> -  }
>> +static std::error_code combineInstrProfRecords(
>> +  IndexedInstrProfRecord &Dest, const InstrProfRecord &Source, +
uint64_t &MaxFunctionCount) {
>> -  // We're updating a function we've seen before.
>> -  auto &FoundCounters = Where->second;
>> -  // If the number of counters doesn't match we either have bad data
or
>> a hash
>> -  // collision.
>> -  if (FoundCounters.size() != Counters.size())
>> +  // If the number of counters doesn't match we either have bad data +
 // or a hash collision.
>> +  if (Dest.Counts.size() != Source.Counts.size())
>>      return instrprof_error::count_mismatch;
>> -  for (size_t I = 0, E = Counters.size(); I < E; ++I) {
>> -    if (FoundCounters[I] + Counters[I] < FoundCounters[I])
>> +  for (size_t I = 0, E = Source.Counts.size(); I < E; ++I) {
>> +    if (Dest.Counts[I] + Source.Counts[I] < Dest.Counts[I])
>>        return instrprof_error::counter_overflow;
>> -    FoundCounters[I] += Counters[I];
>> +    Dest.Counts[I] += Source.Counts[I];
>> +  }
>> +
>> +  for (uint32_t Kind = instr_value_prof_kind::first;
>> +       Kind < instr_value_prof_kind::last; ++Kind) {
>> +    std::vector<InstrProfValueRecord>::const_iterator I =
>> +      Source.Values[Kind].begin();
>> +    for ( ; I != Source.Values[Kind].end(); ++I) {
>> +      bool Found = false;
>> +      std::vector<InstrProfValueRecord>::iterator J =
>> Dest.Values[Kind].begin();
>> +      for ( ; J != Dest.Values[Kind].end(); ++J) {
>> +        if (J->CounterIndex == I->CounterIndex) {
>> +          if ((Kind == instr_value_prof_kind::indirect_call_target) &&
+              (0 == J->Name.compare(I->Name)))
>> +            Found = true;
>> +          else if (J->Value == I->Value)
>> +            Found = true;
>> +
>> +          if (Found) {
>> +            J->NumTaken += I->NumTaken;
>> +            break;
>> +          }
>> +        }
>> +      }
>> +      if (!Found) Dest.Values[Kind].push_back(*I);
>> +    }
>>    }
>> +
>>    // We keep track of the max function count as we go for simplicity.
>> -  if (FoundCounters[0] > MaxFunctionCount)
>> -    MaxFunctionCount = FoundCounters[0];
>> +  if (Dest.Counts[0] > MaxFunctionCount)
>> +    MaxFunctionCount = Dest.Counts[0];
>>    return instrprof_error::success;
>>  }
>> +std::error_code
>> +InstrProfWriter::addFunctionCounts(const InstrProfRecord &I) { +  auto
&ProfileDataMap = FunctionData[I.Name];
>> +
>> +  auto Where = ProfileDataMap.find(I.Hash);
>> +  if (Where == ProfileDataMap.end()) {
>> +    // We've never seen a function with this name and hash, add it. +
  ProfileDataMap[I.Hash] = IndexedInstrProfRecord(I.Counts); +    for
(uint32_t Kind = instr_value_prof_kind::first;
>> +         Kind < instr_value_prof_kind::last; ++Kind)
>> +      ProfileDataMap[I.Hash].Values[Kind] = I.Values[Kind];
>> +
>> +    // We keep track of the max function count as we go for
simplicity.
>> +    if (I.Counts[0] > MaxFunctionCount)
>> +      MaxFunctionCount = I.Counts[0];
>> +    return instrprof_error::success;
>> +  }
>> +
>> +  // We're updating a function we've seen before.
>> +  return combineInstrProfRecords(Where->second, I, MaxFunctionCount);
+}
>> +
>>  std::pair<uint64_t, uint64_t> InstrProfWriter::writeImpl(raw_ostream
>> &OS) {
>>    OnDiskChainedHashTableGenerator<InstrProfRecordTrait> Generator;
>> Index: lib/Transforms/Instrumentation/InstrProfiling.cpp
>> =================================================================== ---
lib/Transforms/Instrumentation/InstrProfiling.cpp
>> +++ lib/Transforms/Instrumentation/InstrProfiling.cpp
>> @@ -7,9 +7,9 @@
>>  //
>>  //===----------------------------------------------------------------------===//
//
>> -// This pass lowers instrprof_increment intrinsics emitted by a
frontend for
>> -// profiling. It also builds the data structures and initialization
code needed
>> -// for updating execution counts and emitting the profile at runtime.
+// This pass lowers instrprof_* intrinsics emitted by a frontend for
profiling.
>> +// It also builds the data structures and initialization code needed
for
>> +// updating execution counts and emitting the profile at runtime.
>>  //
>>  //===----------------------------------------------------------------------===//
>> @@ -19,6 +19,7 @@
>>  #include "llvm/IR/IRBuilder.h"
>>  #include "llvm/IR/IntrinsicInst.h"
>>  #include "llvm/IR/Module.h"
>> +#include "llvm/ProfileData/InstrProf.h"
>>  #include "llvm/Transforms/Utils/ModuleUtils.h"
>>  using namespace llvm;
>> @@ -49,7 +50,15 @@
>>  private:
>>    InstrProfOptions Options;
>>    Module *M;
>> -  DenseMap<GlobalVariable *, GlobalVariable *> RegionCounters; +
typedef struct PerFunctionProfileData {
>> +    uint32_t NumValueSites[instr_value_prof_kind::last];
>> +    GlobalVariable* RegionCounters;
>> +    GlobalVariable* DataVar;
>> +    PerFunctionProfileData() : RegionCounters(nullptr),
>> DataVar(nullptr) {
>> +      memset(NumValueSites, 0, instr_value_prof_kind::last);
>> +    }
>> +  } PerFunctionProfileData;
>> +  DenseMap<GlobalVariable *, PerFunctionProfileData> ProfileDataMap;
>>    std::vector<Value *> UsedVars;
>>    bool isMachO() const {
>> @@ -76,6 +85,12 @@
>>      return isMachO() ? "__DATA,__llvm_covmap" : "__llvm_covmap";
>>    }
>> +  /// Count the number of instrumented value sites for the function. +
 void computeNumValueSiteCounts(InstrProfInstrumentTargetInst *Ins); +
>> +  /// Replace instrprof_instrument_target with a call to runtime
library.
>> +  void lowerInstrumentTargetInst(InstrProfInstrumentTargetInst *Ins);
+
>>    /// Replace instrprof_increment with an increment of the appropriate
>> value.
>>    void lowerIncrement(InstrProfIncrementInst *Inc);
>> @@ -117,20 +132,39 @@
>>    bool MadeChange = false;
>>    this->M = &M;
>> -  RegionCounters.clear();
>> +  ProfileDataMap.clear();
>>    UsedVars.clear();
>> +  // We did not know how many value sites there would be inside +  //
the instrumented function. This is counting the number of instrumented
>> +  // target value sites to enter it as field in the profile data
variable.
>> +  for (Function &F : M)
>> +    for (BasicBlock &BB : F)
>> +      for (auto I = BB.begin(), E = BB.end(); I != E;)
>> +        if (auto *Ind = dyn_cast<InstrProfInstrumentTargetInst>(I++))
+          computeNumValueSiteCounts(Ind);
> The frontend should pass the number of value sites in the function in
the arguments to the intrinsic, like we do for the number of counters in
the increment intrinsic. In addition to being simpler, it's more robust
against things like dead code elimination which could remove the call to
an intrinsic completely.

Unfortunately, this is not straightforwardly doable. We do not know the
number of target expressions we'd like to profile prior to inserting the
intrinsics. This is due to the fact that AST level expressions are not one
to one mappable to the generated LLVM IR level expressions. In the case of
indirect calls for instance the number of calls generated per a single
CXXDestructor could be more than one.

Instead the current implementation assigns an index by incrementing the
NumValueSites counter at each point where a target expression is detected.
The index value is passed along to the InstrProfiling.cpp as an argument
of the intrinsic. Then NumValueSites is determined by taking the max of
all indices recorded for a function + 1.

>> +
>>    for (Function &F : M)
>>      for (BasicBlock &BB : F)
>>        for (auto I = BB.begin(), E = BB.end(); I != E;)
>>          if (auto *Inc = dyn_cast<InstrProfIncrementInst>(I++)) {
>>            lowerIncrement(Inc);
>>            MadeChange = true;
>>          }
>> +
>> +  for (Function &F : M)
>> +    for (BasicBlock &BB : F)
>> +      for (auto I = BB.begin(), E = BB.end(); I != E;)
>> +        if (auto *Ind = dyn_cast<InstrProfInstrumentTargetInst>(I++))
{
>> +          lowerInstrumentTargetInst(Ind);
>> +          MadeChange = true;
>> +        }
> It seems like we could do both of these in one loop, no?

Not addressed in this patch.

>> +
>>    if (GlobalVariable *Coverage =
>> M.getNamedGlobal("__llvm_coverage_mapping")) {
>>      lowerCoverageData(Coverage);
>>      MadeChange = true;
>>    }
>> +
>>    if (!MadeChange)
>>      return false;
>> @@ -141,13 +175,62 @@
>>    return true;
>>  }
>> +static Constant *getOrInsertValueProfilingCall(Module &M) {
>> +  auto *VoidTy = Type::getVoidTy(M.getContext());
>> +  auto *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
>> +  auto *Int32Ty = Type::getInt32Ty(M.getContext());
>> +  auto *Int64Ty = Type::getInt64Ty(M.getContext());
>> +  Type *ArgTypes[] = {Int32Ty, Int64Ty, VoidPtrTy, Int32Ty};
>> +  auto *ValueProfilingCallTy = FunctionType::get(VoidTy,
>> +    makeArrayRef(ArgTypes), false);
>> +  return M.getOrInsertFunction("__llvm_profile_instrument_target", +
 ValueProfilingCallTy);
>> +}
>> +
>> +void InstrProfiling::computeNumValueSiteCounts(
>> +  InstrProfInstrumentTargetInst *Ind) {
>> +
>> +  GlobalVariable *Name = Ind->getName();
>> +  uint32_t ValueKind = Ind->getValueKind()->getZExtValue();
>> +  uint64_t Index = Ind->getIndex()->getZExtValue();
>> +  auto It = ProfileDataMap.find(Name);
>> +  if (It == ProfileDataMap.end()) {
>> +    PerFunctionProfileData PD;
>> +    PD.NumValueSites[ValueKind] = Index + 1;
>> +    ProfileDataMap[Name] = PD;
>> +  }
>> +  else if (It->second.NumValueSites[ValueKind] <= Index)
>> +    It->second.NumValueSites[ValueKind] = Index + 1;
>> +}
>> +
>> +void InstrProfiling::lowerInstrumentTargetInst(
>> +  InstrProfInstrumentTargetInst *Ind) {
>> +
>> +  GlobalVariable *Name = Ind->getName();
>> +  auto It = ProfileDataMap.find(Name);
>> +  assert(It != ProfileDataMap.end() && It->second.DataVar &&
>> +    "value profiling detected in function with no counter
incerement");
>> +
>> +  GlobalVariable *DataVar = It->second.DataVar;
>> +  uint32_t ValueKind = Ind->getValueKind()->getZExtValue();
>> +  uint64_t Index = Ind->getIndex()->getZExtValue();
>> +  IRBuilder<> Builder(Ind->getParent(), *Ind);
>> +  Value* Args[4] = {Builder.getInt32(ValueKind),
>> +    Ind->getTargetValue(),
>> +    Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
> Presumably we'll pass the actual address of the first
> __llvm_profile_value_data here if we allocate that statically.

Dynamic allocation is favored over static allocation.

>> +    Builder.getInt32(Index)};
>> +  Ind->replaceAllUsesWith(
>> +    Builder.CreateCall(getOrInsertValueProfilingCall(*M), Args)); +
Ind->eraseFromParent();
>> +}
>> +
>>  void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
>>    GlobalVariable *Counters = getOrCreateRegionCounters(Inc);
>>    IRBuilder<> Builder(Inc->getParent(), *Inc);
>>    uint64_t Index = Inc->getIndex()->getZExtValue();
>> -  llvm::Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
Index);
>> -  llvm::Value *Count = Builder.CreateLoad(Addr, "pgocount");
>> +  Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
Index);
>> +  Value *Count = Builder.CreateLoad(Addr, "pgocount");
>>    Count = Builder.CreateAdd(Count, Builder.getInt64(1));
>>    Inc->replaceAllUsesWith(Builder.CreateStore(Count, Addr));
>>    Inc->eraseFromParent();
>> @@ -172,9 +255,10 @@
>>      GlobalVariable *Name = cast<GlobalVariable>(V);
>>      // If we have region counters for this name, we've already handled
>> it.
>> -    auto It = RegionCounters.find(Name);
>> -    if (It != RegionCounters.end())
>> -      continue;
>> +    auto It = ProfileDataMap.find(Name);
>> +    if (It != ProfileDataMap.end())
>> +      if (It->second.RegionCounters)
>> +        continue;
>>      // Move the name variable to the right section.
>>      Name->setSection(getNameSection());
>> @@ -192,9 +276,13 @@
>>  GlobalVariable *
>>  InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc)
>> {
>>    GlobalVariable *Name = Inc->getName();
>> -  auto It = RegionCounters.find(Name);
>> -  if (It != RegionCounters.end())
>> -    return It->second;
>> +  auto It = ProfileDataMap.find(Name);
>> +  PerFunctionProfileData PD;
>> +  if (It != ProfileDataMap.end()) {
>> +    if (It->second.RegionCounters)
>> +      return It->second.RegionCounters;
>> +    PD = It->second;
>> +  }
>>    // Move the name variable to the right section.
>>    Name->setSection(getNameSection());
>> @@ -212,30 +300,46 @@
>>    Counters->setSection(getCountersSection());
>>    Counters->setAlignment(8);
>> -  RegionCounters[Inc->getName()] = Counters;
>> -
>>    // Create data variable.
>>    auto *NameArrayTy = Name->getType()->getPointerElementType(); auto
*Int32Ty = Type::getInt32Ty(Ctx);
>>    auto *Int64Ty = Type::getInt64Ty(Ctx);
>>    auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
>>    auto *Int64PtrTy = Type::getInt64PtrTy(Ctx);
>> +  auto *Int32ArrayTy = ArrayType::get(Int32Ty,
>> instr_value_prof_kind::last);
>> +  auto *Int8PtrArrayTy = ArrayType::get(Int8PtrTy,
>> instr_value_prof_kind::last);
>> +
>> +  Constant *Int32ArrayVals[instr_value_prof_kind::last];
>> +  Constant *Int8PtrArrayVals[instr_value_prof_kind::last];
>> +  for (uint32_t Kind = instr_value_prof_kind::first;
>> +       Kind < instr_value_prof_kind::last; ++Kind) {
>> +    Int32ArrayVals[Kind] = ConstantInt::get(Int32Ty,
>> PD.NumValueSites[Kind]);
>> +    Int8PtrArrayVals[Kind] = ConstantPointerNull::get(Int8PtrTy); +  }
-  Type *DataTypes[] = {Int32Ty, Int32Ty, Int64Ty, Int8PtrTy,
>> Int64PtrTy};
>> +  Type *DataTypes[] = {Int32Ty, Int32Ty, Int64Ty, Int8PtrTy,
>> Int64PtrTy,
>> +    Int8PtrTy, Int32ArrayTy, Int8PtrArrayTy};
>>    auto *DataTy = StructType::get(Ctx, makeArrayRef(DataTypes));
Constant *DataVals[] = {
>>        ConstantInt::get(Int32Ty, NameArrayTy->getArrayNumElements()),
ConstantInt::get(Int32Ty, NumCounters),
>>        ConstantInt::get(Int64Ty, Inc->getHash()->getZExtValue()),
ConstantExpr::getBitCast(Name, Int8PtrTy),
>> -      ConstantExpr::getBitCast(Counters, Int64PtrTy)};
>> -  auto *Data = new GlobalVariable(*M, DataTy, true,
Name->getLinkage(),
>> +      ConstantExpr::getBitCast(Counters, Int64PtrTy),
>> +      ConstantExpr::getBitCast(Inc->getParent()->getParent(),
>> Int8PtrTy),
>> +      ConstantArray::get(Int32ArrayTy, Int32ArrayVals),
>> +      ConstantArray::get(Int8PtrArrayTy,Int8PtrArrayVals)};
>> +  auto *Data = new GlobalVariable(*M, DataTy, false,
>> Name->getLinkage(),
>>                                    ConstantStruct::get(DataTy,
>> DataVals),
>>                                    getVarName(Inc, "data"));
>>    Data->setVisibility(Name->getVisibility());
>>    Data->setSection(getDataSection());
>>    Data->setAlignment(8);
>> +  PD.RegionCounters = Counters;
>> +  PD.DataVar = Data;
>> +  ProfileDataMap[Name] = PD;
>> +
>>    // Mark the data variable as used so that it isn't stripped out.
UsedVars.push_back(Data);
>> @@ -257,7 +361,7 @@
>>    if (Options.NoRedZone)
>>      RegisterF->addFnAttr(Attribute::NoRedZone);
>> -  auto *RuntimeRegisterTy = llvm::FunctionType::get(VoidTy, VoidPtrTy,
false);
>> +  auto *RuntimeRegisterTy = FunctionType::get(VoidTy, VoidPtrTy,
false);
> Please don't include unrelated cleanups - they make patch review harder.
This can happen separately.

Not addressed in the latest patch. I'll make sure to submit this change as
a separate CL.

>>    auto *RuntimeRegisterF =
>>        Function::Create(RuntimeRegisterTy,
>> GlobalVariable::ExternalLinkage,
>>                         "__llvm_profile_register_function", M);
>> @@ -317,13 +421,13 @@
>>    // Add uses for our data.
>>    for (auto *Value : UsedVars)
>>      MergedVars.push_back(
>> -        ConstantExpr::getBitCast(cast<llvm::Constant>(Value), i8PTy));
+        ConstantExpr::getBitCast(cast<Constant>(Value), i8PTy));
>>    // Recreate llvm.used.
>>    ArrayType *ATy = ArrayType::get(i8PTy, MergedVars.size());
>> -  LLVMUsed = new llvm::GlobalVariable(
>> -      *M, ATy, false, llvm::GlobalValue::AppendingLinkage,
>> -      llvm::ConstantArray::get(ATy, MergedVars), "llvm.used"); +
LLVMUsed = new GlobalVariable(
>> +      *M, ATy, false, GlobalValue::AppendingLinkage,
>> +      ConstantArray::get(ATy, MergedVars), "llvm.used");
>>    LLVMUsed->setSection("llvm.metadata");
>>  }
>> Index: test/Instrumentation/InstrProfiling/linkage.ll
>> =================================================================== ---
test/Instrumentation/InstrProfiling/linkage.ll
>> +++ test/Instrumentation/InstrProfiling/linkage.ll
>> @@ -9,28 +9,28 @@
>>  @__llvm_profile_name_foo_inline = linkonce_odr hidden constant [10 x
>> i8] c"foo_inline"
>>  ; CHECK: @__llvm_profile_counters_foo = hidden global
>> -; CHECK: @__llvm_profile_data_foo = hidden constant
>> +; CHECK: @__llvm_profile_data_foo = hidden
>>  define void @foo() {
>>    call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x
>> i8], [3 x i8]* @__llvm_profile_name_foo, i32 0, i32 0), i64 0, i32 1,
i32 0)
>>    ret void
>>  }
>>  ; CHECK: @__llvm_profile_counters_foo_weak = weak hidden global
>> -; CHECK: @__llvm_profile_data_foo_weak = weak hidden constant
>> +; CHECK: @__llvm_profile_data_foo_weak = weak hidden
>>  define weak void @foo_weak() {
>>    call void @llvm.instrprof.increment(i8* getelementptr inbounds ([8 x
>> i8], [8 x i8]* @__llvm_profile_name_foo_weak, i32 0, i32 0), i64 0, i32
1, i32 0)
>>    ret void
>>  }
>>  ; CHECK: @"__llvm_profile_counters_linkage.ll:foo_internal" = internal
>> global
>> -; CHECK: @"__llvm_profile_data_linkage.ll:foo_internal" = internal
constant
>> +; CHECK: @"__llvm_profile_data_linkage.ll:foo_internal" = internal
>>  define internal void @foo_internal() {
>>    call void @llvm.instrprof.increment(i8* getelementptr inbounds ([23
x
>> i8], [23 x i8]* @"__llvm_profile_name_linkage.ll:foo_internal", i32 0,
i32 0), i64 0, i32 1, i32 0)
>>    ret void
>>  }
>>  ; CHECK: @__llvm_profile_counters_foo_inline = linkonce_odr hidden
>> global
>> -; CHECK: @__llvm_profile_data_foo_inline = linkonce_odr hidden
constant
>> +; CHECK: @__llvm_profile_data_foo_inline = linkonce_odr hidden
>>  define linkonce_odr void @foo_inline() {
>>    call void @llvm.instrprof.increment(i8* getelementptr inbounds ([10
x
>> i8], [10 x i8]* @__llvm_profile_name_foo_inline, i32 0, i32 0), i64 0,
i32 1, i32 0)
>>    ret void
>> Index: test/Instrumentation/InstrProfiling/platform.ll
>> =================================================================== ---
test/Instrumentation/InstrProfiling/platform.ll
>> +++ test/Instrumentation/InstrProfiling/platform.ll
>> @@ -10,8 +10,8 @@
>>  ; MACHO: @__llvm_profile_counters_foo = hidden global [1 x i64]
>> zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
>>  ; ELF: @__llvm_profile_counters_foo = hidden global [1 x i64]
>> zeroinitializer, section "__llvm_prf_cnts", align 8
>> -; MACHO: @__llvm_profile_data_foo = hidden constant {{.*}}, section
"__DATA,__llvm_prf_data", align 8
>> -; ELF: @__llvm_profile_data_foo = hidden constant {{.*}}, section
"__llvm_prf_data", align 8
>> +; MACHO: @__llvm_profile_data_foo = hidden {{.*}}, section
>> "__DATA,__llvm_prf_data", align 8
>> +; ELF: @__llvm_profile_data_foo = hidden {{.*}}, section
>> "__llvm_prf_data", align 8
>>  define void @foo() {
>>    call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x
>> i8], [3 x i8]* @__llvm_profile_name_foo, i32 0, i32 0), i64 0, i32 1,
i32 0)
>>    ret void
>> Index: test/Instrumentation/InstrProfiling/profiling.ll
>> =================================================================== ---
test/Instrumentation/InstrProfiling/profiling.ll
>> +++ test/Instrumentation/InstrProfiling/profiling.ll
>> @@ -10,21 +10,21 @@
>>  ; CHECK: @baz_prof_name = hidden constant [3 x i8] c"baz", section
>> "__DATA,__llvm_prf_names", align 1
>>  ; CHECK: @__llvm_profile_counters_foo = hidden global [1 x i64]
>> zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
>> -; CHECK: @__llvm_profile_data_foo = hidden constant {{.*}}, section
"__DATA,__llvm_prf_data", align 8
>> +; CHECK: @__llvm_profile_data_foo = hidden {{.*}}, section
>> "__DATA,__llvm_prf_data", align 8
>>  define void @foo() {
>>    call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x
>> i8], [3 x i8]* @__llvm_profile_name_foo, i32 0, i32 0), i64 0, i32 1,
i32 0)
>>    ret void
>>  }
>>  ; CHECK: @__llvm_profile_counters_bar = hidden global [1 x i64]
>> zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
>> -; CHECK: @__llvm_profile_data_bar = hidden constant {{.*}}, section
"__DATA,__llvm_prf_data", align 8
>> +; CHECK: @__llvm_profile_data_bar = hidden {{.*}}, section
>> "__DATA,__llvm_prf_data", align 8
>>  define void @bar() {
>>    call void @llvm.instrprof.increment(i8* getelementptr inbounds ([4 x
>> i8], [4 x i8]* @__llvm_profile_name_bar, i32 0, i32 0), i64 0, i32 1,
i32 0)
>>    ret void
>>  }
>>  ; CHECK: @__llvm_profile_counters_baz = hidden global [3 x i64]
>> zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
>> -; CHECK: @__llvm_profile_data_baz = hidden constant {{.*}}, section
"__DATA,__llvm_prf_data", align 8
>> +; CHECK: @__llvm_profile_data_baz = hidden {{.*}}, section
>> "__DATA,__llvm_prf_data", align 8
>>  define void @baz() {
>>    call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x
>> i8], [3 x i8]* @baz_prof_name, i32 0, i32 0), i64 0, i32 3, i32 0)
>>    call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x
>> i8], [3 x i8]* @baz_prof_name, i32 0, i32 0), i64 0, i32 3, i32 1)
Index: tools/llvm-profdata/llvm-profdata.cpp
>> =================================================================== ---
tools/llvm-profdata/llvm-profdata.cpp
>> +++ tools/llvm-profdata/llvm-profdata.cpp
>> @@ -57,8 +57,7 @@
>>      auto Reader = std::move(ReaderOrErr.get());
>>      for (const auto &I : *Reader)
>> -      if (std::error_code EC =
>> -              Writer.addFunctionCounts(I.Name, I.Hash, I.Counts)) +
  if (std::error_code EC = Writer.addFunctionCounts(I))
>>          errs() << Filename << ": " << I.Name << ": " << EC.message()
<<
>> "\n";
>>      if (Reader->hasError())
>>        exitWithError(Reader->getError().message(), Filename);
>> @@ -132,8 +131,8 @@
>>  }
>>  static int showInstrProfile(std::string Filename, bool ShowCounts,
>> -                            bool ShowAllFunctions, std::string
ShowFunction,
>> -                            raw_fd_ostream &OS) {
>> +                            bool ShowIndirectCallTargets, bool
ShowAllFunctions,
>> +                            std::string ShowFunction, raw_fd_ostream
&OS) {
>>    auto ReaderOrErr = InstrProfReader::create(Filename);
>>    if (std::error_code EC = ReaderOrErr.getError())
>>      exitWithError(EC.message(), Filename);
>> @@ -160,6 +159,9 @@
>>           << "    Hash: " << format("0x%016" PRIx64, Func.Hash) << "\n"
<< "    Counters: " << Func.Counts.size() << "\n"
>>           << "    Function count: " << Func.Counts[0] << "\n";
>> +      if (ShowIndirectCallTargets)
>> +        OS << "    Indirect Target Data: "
>> +           <<
>> Func.Values[instr_value_prof_kind::indirect_call_target].size() <<
"\n";
>>      }
>>      if (Show && ShowCounts)
>> @@ -172,6 +174,15 @@
>>      }
>>      if (Show && ShowCounts)
>>        OS << "]\n";
>> +
>> +    if (Show && ShowIndirectCallTargets) {
>> +      OS << "    Indirect Target Results: \n";
>> +      for (size_t I = 0, E = Func.Values[0].size(); I < E; ++I) { +
    OS << "\t[ " << Func.Values[0][I].CounterIndex << ", "; +        OS
<< Func.Values[0][I].Name << ", ";
>> +        OS << Func.Values[0][I].NumTaken << " ]\n";
>> +      }
>> +    }
>>    }
>>    if (Reader->hasError())
>>      exitWithError(Reader->getError().message(), Filename);
>> @@ -208,6 +219,8 @@
>>    cl::opt<bool> ShowCounts("counts", cl::init(false),
>>                             cl::desc("Show counter values for shown
>> functions"));
>> +  cl::opt<bool> ShowIndirectCallTargets("ic-targets", cl::init(false),
+      cl::desc("Show indirect call site target values for shown
functions"));
>>    cl::opt<bool> ShowAllFunctions("all-functions", cl::init(false),
>>                                   cl::desc("Details for every
>> function"));
>>    cl::opt<std::string> ShowFunction("function",
>> @@ -236,8 +249,8 @@
>>      errs() << "warning: -function argument ignored: showing all
>> functions\n";
>>    if (ProfileKind == instr)
>> -    return showInstrProfile(Filename, ShowCounts, ShowAllFunctions, -
                          ShowFunction, OS);
>> +    return showInstrProfile(Filename, ShowCounts,
>> ShowIndirectCallTargets,
>> +                            ShowAllFunctions, ShowFunction, OS);
>>    else
>>      return showSampleProfile(Filename, ShowCounts, ShowAllFunctions,
>>                               ShowFunction, OS);
>> Index: unittests/ProfileData/CoverageMappingTest.cpp
>> =================================================================== ---
unittests/ProfileData/CoverageMappingTest.cpp
>> +++ unittests/ProfileData/CoverageMappingTest.cpp
>> @@ -188,7 +188,8 @@
>>  }
>>  TEST_F(CoverageMappingTest, basic_coverage_iteration) {
>> -  ProfileWriter.addFunctionCounts("func", 0x1234, {30, 20, 10, 0}); +
InstrProfRecord Record("func", 0x1234, {30, 20, 10, 0});
>> +  ProfileWriter.addFunctionCounts(Record);
>>    readProfCounts();
>>    addCMR(Counter::getCounter(0), "file1", 1, 1, 9, 9);
>> @@ -238,7 +239,8 @@
>>  }
>>  TEST_F(CoverageMappingTest, combine_regions) {
>> -  ProfileWriter.addFunctionCounts("func", 0x1234, {10, 20, 30}); +
InstrProfRecord Record("func", 0x1234, {10, 20, 30});
>> +  ProfileWriter.addFunctionCounts(Record);
>>    readProfCounts();
>>    addCMR(Counter::getCounter(0), "file1", 1, 1, 9, 9);
>> @@ -256,7 +258,8 @@
>>  }
>>  TEST_F(CoverageMappingTest, dont_combine_expansions) {
>> -  ProfileWriter.addFunctionCounts("func", 0x1234, {10, 20});
>> +  InstrProfRecord Record("func", 0x1234, {10, 20});
>> +  ProfileWriter.addFunctionCounts(Record);
>>    readProfCounts();
>>    addCMR(Counter::getCounter(0), "file1", 1, 1, 9, 9);
>> @@ -275,7 +278,8 @@
>>  }
>>  TEST_F(CoverageMappingTest, strip_filename_prefix) {
>> -  ProfileWriter.addFunctionCounts("file1:func", 0x1234, {10}); +
InstrProfRecord Record("file1:func", 0x1234, {10});
>> +  ProfileWriter.addFunctionCounts(Record);
>>    readProfCounts();
>>    addCMR(Counter::getCounter(0), "file1", 1, 1, 9, 9);
>> Index: unittests/ProfileData/InstrProfTest.cpp
>> =================================================================== ---
unittests/ProfileData/InstrProfTest.cpp
>> +++ unittests/ProfileData/InstrProfTest.cpp
>> @@ -50,7 +50,8 @@
>>  }
>>  TEST_F(InstrProfTest, write_and_read_one_function) {
>> -  Writer.addFunctionCounts("foo", 0x1234, {1, 2, 3, 4});
>> +  InstrProfRecord Record("foo", 0x1234, {1, 2, 3, 4});
>> +  Writer.addFunctionCounts(Record);
>>    auto Profile = Writer.writeBuffer();
>>    readProfile(std::move(Profile));
>> @@ -67,12 +68,14 @@
>>  }
>>  TEST_F(InstrProfTest, get_function_counts) {
>> -  Writer.addFunctionCounts("foo", 0x1234, {1, 2});
>> +  InstrProfRecord Record("foo", 0x1234, {1, 2});
>> +  Writer.addFunctionCounts(Record);
>>    auto Profile = Writer.writeBuffer();
>>    readProfile(std::move(Profile));
>>    std::vector<uint64_t> Counts;
>> -  ASSERT_TRUE(NoError(Reader->getFunctionCounts("foo", 0x1234,
Counts)));
>> +  ASSERT_TRUE(NoError(
>> +    Reader->getFunctionCounts("foo", 0x1234, Counts)));
>>    ASSERT_EQ(2U, Counts.size());
>>    ASSERT_EQ(1U, Counts[0]);
>>    ASSERT_EQ(2U, Counts[1]);
>> @@ -86,9 +89,12 @@
>>  }
>>  TEST_F(InstrProfTest, get_max_function_count) {
>> -  Writer.addFunctionCounts("foo", 0x1234, {1ULL << 31, 2});
>> -  Writer.addFunctionCounts("bar", 0, {1ULL << 63});
>> -  Writer.addFunctionCounts("baz", 0x5678, {0, 0, 0, 0});
>> +  InstrProfRecord Record1("foo", 0x1234, {1ULL << 31, 2});
>> +  InstrProfRecord Record2("bar", 0, {1ULL << 63});
>> +  InstrProfRecord Record3("baz", 0x5678, {0, 0, 0, 0});
>> +  Writer.addFunctionCounts(Record1);
>> +  Writer.addFunctionCounts(Record2);
>> +  Writer.addFunctionCounts(Record3);
>>    auto Profile = Writer.writeBuffer();
>>    readProfile(std::move(Profile));