[llvm] 0d362c9 - [memprof] Store callsite metadata with memprof records.

Snehasish Kumar via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 21 13:58:35 PDT 2022


Author: Snehasish Kumar
Date: 2022-03-21T13:58:29-07:00
New Revision: 0d362c90d335509c57c0fbd01ae1829e2b9c3765

URL: https://github.com/llvm/llvm-project/commit/0d362c90d335509c57c0fbd01ae1829e2b9c3765
DIFF: https://github.com/llvm/llvm-project/commit/0d362c90d335509c57c0fbd01ae1829e2b9c3765.diff

LOG: [memprof] Store callsite metadata with memprof records.

To ease profile annotation, each of the callsites in a function can be
annotated with profile data - "IR metadata format for MemProf" [1]. This
patch extends the on-disk serialized record format to store the debug
information for allocation callsites incl inline frames. This change is
incompatible with the existing format i.e. indexed profiles must be
regenerated, raw profiles are unaffected.

[1] https://groups.google.com/g/llvm-dev/c/aWHsdMxKAfE/m/WtEmRqyhAgAJ

Reviewed By: tejohnson

Differential Revision: https://reviews.llvm.org/D121179

Added: 
    llvm/test/tools/llvm-profdata/Inputs/inline.memprofexe

Modified: 
    llvm/include/llvm/ProfileData/InstrProfWriter.h
    llvm/include/llvm/ProfileData/MemProf.h
    llvm/include/llvm/ProfileData/RawMemProfReader.h
    llvm/lib/ProfileData/InstrProfWriter.cpp
    llvm/lib/ProfileData/MemProf.cpp
    llvm/lib/ProfileData/RawMemProfReader.cpp
    llvm/test/tools/llvm-profdata/memprof-basic.test
    llvm/test/tools/llvm-profdata/memprof-inline.test
    llvm/tools/llvm-profdata/llvm-profdata.cpp
    llvm/unittests/ProfileData/InstrProfTest.cpp
    llvm/unittests/ProfileData/MemProfTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h
index a8cfd380c4a19..bb37f41cddc86 100644
--- a/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -15,7 +15,9 @@
 #define LLVM_PROFILEDATA_INSTRPROFWRITER_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/IR/GlobalValue.h"
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/ProfileData/MemProf.h"
 #include "llvm/Support/Endian.h"
@@ -41,7 +43,7 @@ class InstrProfWriter {
 
   // A map to hold memprof data per function. The lower 64 bits obtained from
   // the md5 hash of the function name is used to index into the map.
-  memprof::FunctionMemProfMap MemProfData;
+  llvm::MapVector<GlobalValue::GUID, memprof::MemProfRecord> MemProfData;
 
   // An enum describing the attributes of the profile.
   InstrProfKind ProfileKind = InstrProfKind::Unknown;
@@ -63,7 +65,8 @@ class InstrProfWriter {
     addRecord(std::move(I), 1, Warn);
   }
 
-  void addRecord(const ::llvm::memprof::MemProfRecord &MR,
+  void addRecord(const GlobalValue::GUID Id,
+                 const memprof::MemProfRecord &Record,
                  function_ref<void(Error)> Warn);
 
   /// Merge existing function counts from the given writer.

diff  --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index 1436ea2a32938..3055b38ebe342 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -82,9 +82,9 @@ struct PortableMemInfoBlock {
 
   // Print out the contents of the MemInfoBlock in YAML format.
   void printYAML(raw_ostream &OS) const {
-    OS << "    MemInfoBlock:\n";
+    OS << "      MemInfoBlock:\n";
 #define MIBEntryDef(NameTag, Name, Type)                                       \
-  OS << "      " << #Name << ": " << Name << "\n";
+  OS << "        " << #Name << ": " << Name << "\n";
 #include "llvm/ProfileData/MIBEntryDef.inc"
 #undef MIBEntryDef
   }
@@ -133,6 +133,7 @@ struct PortableMemInfoBlock {
 #undef MIBEntryDef
 };
 
+// Holds the memprof profile information for a function.
 struct MemProfRecord {
   // Describes a call frame for a dynamic allocation context. The contents of
   // the frame are populated by symbolizing the stack depot call frame from the
@@ -193,64 +194,152 @@ struct MemProfRecord {
       return sizeof(Frame::Function) + sizeof(Frame::LineOffset) +
              sizeof(Frame::Column) + sizeof(Frame::IsInlineFrame);
     }
+
+    // Print the frame information in YAML format.
+    void printYAML(raw_ostream &OS) const {
+      OS << "      -\n"
+         << "        Function: " << Function << "\n"
+         << "        LineOffset: " << LineOffset << "\n"
+         << "        Column: " << Column << "\n"
+         << "        Inline: " << IsInlineFrame << "\n";
+    }
   };
 
-  // The dynamic calling context for the allocation.
-  llvm::SmallVector<Frame> CallStack;
-  // The statistics obtained from the runtime for the allocation.
-  PortableMemInfoBlock Info;
+  struct AllocationInfo {
+    // The dynamic calling context for the allocation.
+    llvm::SmallVector<Frame> CallStack;
+    // The statistics obtained from the runtime for the allocation.
+    PortableMemInfoBlock Info;
+
+    AllocationInfo() = default;
+    AllocationInfo(ArrayRef<Frame> CS, const MemInfoBlock &MB)
+        : CallStack(CS.begin(), CS.end()), Info(MB) {}
+
+    void printYAML(raw_ostream &OS) const {
+      OS << "    -\n";
+      OS << "      Callstack:\n";
+      // TODO: Print out the frame on one line with to make it easier for deep
+      // callstacks once we have a test to check valid YAML is generated.
+      for (const auto &Frame : CallStack)
+        Frame.printYAML(OS);
+      Info.printYAML(OS);
+    }
+
+    size_t serializedSize() const {
+      return sizeof(uint64_t) + // The number of frames to serialize.
+             Frame::serializedSize() *
+                 CallStack.size() + // The contents of the frames.
+             PortableMemInfoBlock::serializedSize(); // The size of the payload.
+    }
+
+    bool operator==(const AllocationInfo &Other) const {
+      if (Other.Info != Info)
+        return false;
+
+      if (Other.CallStack.size() != CallStack.size())
+        return false;
+
+      for (size_t J = 0; J < Other.CallStack.size(); J++) {
+        if (Other.CallStack[J] != CallStack[J])
+          return false;
+      }
+      return true;
+    }
+
+    bool operator!=(const AllocationInfo &Other) const {
+      return !operator==(Other);
+    }
+  };
+
+  // Memory allocation sites in this function for which we have memory profiling
+  // data.
+  llvm::SmallVector<AllocationInfo> AllocSites;
+  // Holds call sites in this function which are part of some memory allocation
+  // context. We store this as a list of locations, each with its list of
+  // inline locations in bottom-up order i.e. from leaf to root. The inline
+  // location list may include additional entries, users should pick the last
+  // entry in the list with the same function GUID.
+  llvm::SmallVector<llvm::SmallVector<Frame>> CallSites;
 
   void clear() {
-    CallStack.clear();
-    Info.clear();
+    AllocSites.clear();
+    CallSites.clear();
+  }
+
+  void merge(const MemProfRecord &Other) {
+    // TODO: Filter out duplicates which may occur if multiple memprof profiles
+    // are merged together using llvm-profdata.
+    AllocSites.append(Other.AllocSites);
+    CallSites.append(Other.CallSites);
   }
 
   size_t serializedSize() const {
-    return sizeof(uint64_t) + // The number of frames to serialize.
-           Frame::serializedSize() *
-               CallStack.size() + // The contents of the frames.
-           PortableMemInfoBlock::serializedSize(); // The size of the payload.
+    size_t Result = sizeof(GlobalValue::GUID);
+    for (const AllocationInfo &N : AllocSites)
+      Result += N.serializedSize();
+
+    // The number of callsites we have information for.
+    Result += sizeof(uint64_t);
+    for (const auto &Frames : CallSites) {
+      // The number of frames to serialize.
+      Result += sizeof(uint64_t);
+      for (const Frame &F : Frames)
+        Result += F.serializedSize();
+    }
+    return Result;
   }
 
   // Prints out the contents of the memprof record in YAML.
   void print(llvm::raw_ostream &OS) const {
-    OS << "    Callstack:\n";
-    // TODO: Print out the frame on one line with to make it easier for deep
-    // callstacks once we have a test to check valid YAML is generated.
-    for (const auto &Frame : CallStack) {
-      OS << "    -\n"
-         << "      Function: " << Frame.Function << "\n"
-         << "      LineOffset: " << Frame.LineOffset << "\n"
-         << "      Column: " << Frame.Column << "\n"
-         << "      Inline: " << Frame.IsInlineFrame << "\n";
+    if (!AllocSites.empty()) {
+      OS << "    AllocSites:\n";
+      for (const AllocationInfo &N : AllocSites)
+        N.printYAML(OS);
     }
 
-    Info.printYAML(OS);
+    if (!CallSites.empty()) {
+      OS << "    CallSites:\n";
+      for (const auto &Frames : CallSites) {
+        for (const auto &F : Frames) {
+          OS << "    -\n";
+          F.printYAML(OS);
+        }
+      }
+    }
   }
 
   bool operator==(const MemProfRecord &Other) const {
-    if (Other.Info != Info)
+    if (Other.AllocSites.size() != AllocSites.size())
       return false;
 
-    if (Other.CallStack.size() != CallStack.size())
+    if (Other.CallSites.size() != CallSites.size())
       return false;
 
-    for (size_t I = 0; I < Other.CallStack.size(); I++) {
-      if (Other.CallStack[I] != CallStack[I])
+    for (size_t I = 0; I < AllocSites.size(); I++) {
+      if (AllocSites[I] != Other.AllocSites[I])
+        return false;
+    }
+
+    for (size_t I = 0; I < CallSites.size(); I++) {
+      if (CallSites[I] != Other.CallSites[I])
         return false;
     }
     return true;
   }
-};
 
-// Serializes the memprof records in \p Records to the ostream \p OS based on
-// the schema provided in \p Schema.
-void serializeRecords(const ArrayRef<MemProfRecord> Records,
-                      const MemProfSchema &Schema, raw_ostream &OS);
+  // Serializes the memprof records in \p Records to the ostream \p OS based on
+  // the schema provided in \p Schema.
+  void serialize(const MemProfSchema &Schema, raw_ostream &OS);
 
-// Deserializes memprof records from the Buffer
-SmallVector<MemProfRecord, 4> deserializeRecords(const MemProfSchema &Schema,
-                                                 const unsigned char *Buffer);
+  // Deserializes memprof records from the Buffer.
+  static MemProfRecord deserialize(const MemProfSchema &Schema,
+                                   const unsigned char *Buffer);
+
+  // Returns the GUID for the function name after canonicalization. For memprof,
+  // we remove any .llvm suffix added by LTO. MemProfRecords are mapped to
+  // functions using this GUID.
+  static GlobalValue::GUID getGUID(const StringRef FunctionName);
+};
 
 // Reads a memprof schema from a buffer. All entries in the buffer are
 // interpreted as uint64_t. The first entry in the buffer denotes the number of
@@ -259,14 +348,11 @@ SmallVector<MemProfRecord, 4> deserializeRecords(const MemProfSchema &Schema,
 // byte past the schema contents.
 Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer);
 
-using FunctionMemProfMap =
-    DenseMap<uint64_t, SmallVector<memprof::MemProfRecord, 4>>;
-
 /// Trait for lookups into the on-disk hash table for memprof format in the
 /// indexed profile.
 class MemProfRecordLookupTrait {
 public:
-  using data_type = ArrayRef<MemProfRecord>;
+  using data_type = const MemProfRecord &;
   using internal_key_type = uint64_t;
   using external_key_type = uint64_t;
   using hash_value_type = uint64_t;
@@ -297,15 +383,15 @@ class MemProfRecordLookupTrait {
 
   data_type ReadData(uint64_t K, const unsigned char *D,
                      offset_type /*Unused*/) {
-    Records = deserializeRecords(Schema, D);
-    return Records;
+    Record = MemProfRecord::deserialize(Schema, D);
+    return Record;
   }
 
 private:
   // Holds the memprof schema used to deserialize records.
   MemProfSchema Schema;
   // Holds the records from one function deserialized from the indexed format.
-  llvm::SmallVector<MemProfRecord, 4> Records;
+  MemProfRecord Record;
 };
 
 class MemProfRecordWriterTrait {
@@ -313,8 +399,8 @@ class MemProfRecordWriterTrait {
   using key_type = uint64_t;
   using key_type_ref = uint64_t;
 
-  using data_type = ArrayRef<MemProfRecord>;
-  using data_type_ref = ArrayRef<MemProfRecord>;
+  using data_type = MemProfRecord;
+  using data_type_ref = MemProfRecord &;
 
   using hash_value_type = uint64_t;
   using offset_type = uint64_t;
@@ -333,17 +419,9 @@ class MemProfRecordWriterTrait {
     using namespace support;
 
     endian::Writer LE(Out, little);
-
     offset_type N = sizeof(K);
     LE.write<offset_type>(N);
-
-    offset_type M = 0;
-
-    M += sizeof(uint64_t);
-    for (const auto &Record : V) {
-      M += Record.serializedSize();
-    }
-
+    offset_type M = V.serializedSize();
     LE.write<offset_type>(M);
     return std::make_pair(N, M);
   }
@@ -357,7 +435,7 @@ class MemProfRecordWriterTrait {
   void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V,
                 offset_type /*Unused*/) {
     assert(Schema != nullptr && "MemProf schema is not initialized!");
-    serializeRecords(V, *Schema, Out);
+    V.serialize(*Schema, Out);
   }
 };
 

diff  --git a/llvm/include/llvm/ProfileData/RawMemProfReader.h b/llvm/include/llvm/ProfileData/RawMemProfReader.h
index 944d71386a4b2..872a71fd5cf56 100644
--- a/llvm/include/llvm/ProfileData/RawMemProfReader.h
+++ b/llvm/include/llvm/ProfileData/RawMemProfReader.h
@@ -14,9 +14,11 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
+#include "llvm/IR/GlobalValue.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/ProfileData/InstrProfReader.h"
@@ -57,15 +59,16 @@ class RawMemProfReader {
   static Expected<std::unique_ptr<RawMemProfReader>>
   create(const Twine &Path, const StringRef ProfiledBinary);
 
-  Error readNextRecord(MemProfRecord &Record);
-
-  using Iterator = InstrProfIterator<MemProfRecord, RawMemProfReader>;
+  using GuidMemProfRecordPair = std::pair<GlobalValue::GUID, MemProfRecord>;
+  using Iterator = InstrProfIterator<GuidMemProfRecordPair, RawMemProfReader>;
   Iterator end() { return Iterator(); }
   Iterator begin() {
-    Iter = ProfileData.begin();
+    Iter = FunctionProfileData.begin();
     return Iterator(this);
   }
 
+  Error readNextRecord(GuidMemProfRecordPair &GuidRecord);
+
   // The RawMemProfReader only holds memory profile information.
   InstrProfKind getProfileKind() const { return InstrProfKind::MemProf; }
 
@@ -75,7 +78,7 @@ class RawMemProfReader {
                    llvm::MapVector<uint64_t, MemInfoBlock> &Prof,
                    CallStackMap &SM)
       : Symbolizer(std::move(Sym)), SegmentInfo(Seg.begin(), Seg.end()),
-        ProfileData(Prof), StackMap(SM) {
+        CallstackProfileData(Prof), StackMap(SM) {
     // We don't call initialize here since there is no raw profile to read. The
     // test should pass in the raw profile as structured data.
 
@@ -83,6 +86,8 @@ class RawMemProfReader {
     // initialized properly.
     if (Error E = symbolizeAndFilterStackFrames())
       report_fatal_error(std::move(E));
+    if (Error E = mapRawProfileToRecords())
+      report_fatal_error(std::move(E));
   }
 
 private:
@@ -96,10 +101,12 @@ class RawMemProfReader {
   // symbolize or those that belong to the runtime. For profile entries where
   // the entire callstack is pruned, we drop the entry from the profile.
   Error symbolizeAndFilterStackFrames();
+  // Construct memprof records for each function and store it in the
+  // `FunctionProfileData` map. A function may have allocation profile data or
+  // callsite data or both.
+  Error mapRawProfileToRecords();
 
   object::SectionedAddress getModuleOffset(uint64_t VirtualAddress);
-  Error fillRecord(const uint64_t Id, const MemInfoBlock &MIB,
-                   MemProfRecord &Record);
   // Prints aggregate counts for each raw profile parsed from the DataBuffer in
   // YAML format.
   void printSummaries(raw_ostream &OS) const;
@@ -112,15 +119,15 @@ class RawMemProfReader {
   llvm::SmallVector<SegmentEntry, 16> SegmentInfo;
   // A map from callstack id (same as key in CallStackMap below) to the heap
   // information recorded for that allocation context.
-  llvm::MapVector<uint64_t, MemInfoBlock> ProfileData;
+  llvm::MapVector<uint64_t, MemInfoBlock> CallstackProfileData;
   CallStackMap StackMap;
 
   // Cached symbolization from PC to Frame.
   llvm::DenseMap<uint64_t, llvm::SmallVector<MemProfRecord::Frame>>
       SymbolizedFrame;
 
-  // Iterator to read from the ProfileData MapVector.
-  llvm::MapVector<uint64_t, MemInfoBlock>::iterator Iter = ProfileData.end();
+  llvm::MapVector<GlobalValue::GUID, MemProfRecord> FunctionProfileData;
+  llvm::MapVector<GlobalValue::GUID, MemProfRecord>::iterator Iter;
 };
 
 } // namespace memprof

diff  --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 6c08f83be5dc2..494d563917678 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -253,28 +253,14 @@ void InstrProfWriter::addRecord(StringRef Name, uint64_t Hash,
   Dest.sortValueData();
 }
 
-void InstrProfWriter::addRecord(const memprof::MemProfRecord &MR,
+void InstrProfWriter::addRecord(const Function::GUID Id,
+                                const memprof::MemProfRecord &Record,
                                 function_ref<void(Error)> Warn) {
-  // Use 0 as a sentinel value since its highly unlikely that the lower 64-bits
-  // of a 128 bit md5 hash will be all zeros.
-  // TODO: Move this Key frame detection to the contructor to avoid having to
-  // scan all the callstacks again when adding a new record.
-  uint64_t Key = 0;
-  for (auto Iter = MR.CallStack.rbegin(), End = MR.CallStack.rend();
-       Iter != End; Iter++) {
-    if (!Iter->IsInlineFrame) {
-      Key = Iter->Function;
-      break;
-    }
-  }
-
-  if (Key == 0) {
-    Warn(make_error<InstrProfError>(
-        instrprof_error::invalid_prof,
-        "could not determine leaf function for memprof record."));
+  auto Result = MemProfData.insert({Id, Record});
+  if (!Result.second) {
+    memprof::MemProfRecord &Existing = Result.first->second;
+    Existing.merge(Record);
   }
-
-  MemProfData[Key].push_back(MR);
 }
 
 void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW,
@@ -283,9 +269,9 @@ void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW,
     for (auto &Func : I.getValue())
       addRecord(I.getKey(), Func.first, std::move(Func.second), 1, Warn);
 
-  for (auto &I : IPW.MemProfData)
-    for (const auto &MR : I.second)
-      addRecord(MR, Warn);
+  for (auto &I : IPW.MemProfData) {
+    addRecord(I.first, I.second, Warn);
+  }
 }
 
 bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) {
@@ -415,8 +401,8 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
     MemProfWriter->Schema = &Schema;
     OnDiskChainedHashTableGenerator<memprof::MemProfRecordWriterTrait>
         MemProfGenerator;
-    for (const auto &I : MemProfData) {
-      // Insert the key (func hash) and value (vector of memprof records).
+    for (auto &I : MemProfData) {
+      // Insert the key (func hash) and value (memprof record).
       MemProfGenerator.insert(I.first, I.second);
     }
 

diff  --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp
index cdfc261256c65..92d759a88ea45 100644
--- a/llvm/lib/ProfileData/MemProf.cpp
+++ b/llvm/lib/ProfileData/MemProf.cpp
@@ -1,4 +1,6 @@
 #include "llvm/ProfileData/MemProf.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Function.h"
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/EndianStream.h"
@@ -6,43 +8,76 @@
 namespace llvm {
 namespace memprof {
 
-void serializeRecords(const ArrayRef<MemProfRecord> Records,
-                      const MemProfSchema &Schema, raw_ostream &OS) {
+void MemProfRecord::serialize(const MemProfSchema &Schema, raw_ostream &OS) {
   using namespace support;
 
   endian::Writer LE(OS, little);
 
-  LE.write<uint64_t>(Records.size());
-  for (const MemProfRecord &MR : Records) {
-    LE.write<uint64_t>(MR.CallStack.size());
-    for (const MemProfRecord::Frame &F : MR.CallStack) {
+  LE.write<uint64_t>(AllocSites.size());
+  for (const AllocationInfo &N : AllocSites) {
+    LE.write<uint64_t>(N.CallStack.size());
+    for (const Frame &F : N.CallStack)
+      F.serialize(OS);
+    N.Info.serialize(Schema, OS);
+  }
+
+  // Related contexts.
+  LE.write<uint64_t>(CallSites.size());
+  for (const auto &Frames : CallSites) {
+    LE.write<uint64_t>(Frames.size());
+    for (const Frame &F : Frames)
       F.serialize(OS);
-    }
-    MR.Info.serialize(Schema, OS);
   }
 }
 
-SmallVector<MemProfRecord, 4> deserializeRecords(const MemProfSchema &Schema,
-                                                 const unsigned char *Ptr) {
+MemProfRecord MemProfRecord::deserialize(const MemProfSchema &Schema,
+                                         const unsigned char *Ptr) {
   using namespace support;
 
-  SmallVector<MemProfRecord, 4> Records;
-  const uint64_t NumRecords =
-      endian::readNext<uint64_t, little, unaligned>(Ptr);
-  for (uint64_t I = 0; I < NumRecords; I++) {
-    MemProfRecord MR;
+  MemProfRecord Record;
+
+  // Read the meminfo nodes.
+  const uint64_t NumNodes = endian::readNext<uint64_t, little, unaligned>(Ptr);
+  for (uint64_t I = 0; I < NumNodes; I++) {
+    MemProfRecord::AllocationInfo Node;
     const uint64_t NumFrames =
         endian::readNext<uint64_t, little, unaligned>(Ptr);
     for (uint64_t J = 0; J < NumFrames; J++) {
       const auto F = MemProfRecord::Frame::deserialize(Ptr);
       Ptr += MemProfRecord::Frame::serializedSize();
-      MR.CallStack.push_back(F);
+      Node.CallStack.push_back(F);
     }
-    MR.Info.deserialize(Schema, Ptr);
+    Node.Info.deserialize(Schema, Ptr);
     Ptr += PortableMemInfoBlock::serializedSize();
-    Records.push_back(MR);
+    Record.AllocSites.push_back(Node);
   }
-  return Records;
+
+  // Read the callsite information.
+  const uint64_t NumCtxs = endian::readNext<uint64_t, little, unaligned>(Ptr);
+  for (uint64_t J = 0; J < NumCtxs; J++) {
+    const uint64_t NumFrames =
+        endian::readNext<uint64_t, little, unaligned>(Ptr);
+    llvm::SmallVector<Frame> Frames;
+    for (uint64_t K = 0; K < NumFrames; K++) {
+      const auto F = MemProfRecord::Frame::deserialize(Ptr);
+      Ptr += MemProfRecord::Frame::serializedSize();
+      Frames.push_back(F);
+    }
+    Record.CallSites.push_back(Frames);
+  }
+
+  return Record;
+}
+
+GlobalValue::GUID MemProfRecord::getGUID(const StringRef FunctionName) {
+  const auto Pos = FunctionName.find(".llvm.");
+
+  // We use the function guid which we expect to be a uint64_t. At
+  // this time, it is the lower 64 bits of the md5 of the function
+  // name. Any suffix with .llvm. is trimmed since these are added by
+  // thinLTO global promotion. At the time the profile is consumed,
+  // these suffixes will not be present.
+  return Function::getGUID(FunctionName.take_front(Pos));
 }
 
 Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer) {

diff  --git a/llvm/lib/ProfileData/RawMemProfReader.cpp b/llvm/lib/ProfileData/RawMemProfReader.cpp
index a87410f6e5099..a5b7ea9e728c3 100644
--- a/llvm/lib/ProfileData/RawMemProfReader.cpp
+++ b/llvm/lib/ProfileData/RawMemProfReader.cpp
@@ -14,13 +14,13 @@
 #include <cstdint>
 #include <type_traits>
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
 #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h"
-#include "llvm/IR/Function.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/ELFObjectFile.h"
 #include "llvm/Object/ObjectFile.h"
@@ -163,11 +163,6 @@ bool mergeStackMap(const CallStackMap &From, CallStackMap &To) {
   return false;
 }
 
-StringRef trimSuffix(const StringRef Name) {
-  const auto Pos = Name.find(".llvm.");
-  return Name.take_front(Pos);
-}
-
 Error report(Error E, const StringRef Context) {
   return joinErrors(createStringError(inconvertibleErrorCode(), Context),
                     std::move(E));
@@ -233,9 +228,10 @@ void RawMemProfReader::printYAML(raw_ostream &OS) {
   printSummaries(OS);
   // Print out the merged contents of the profiles.
   OS << "  Records:\n";
-  for (const auto &Record : *this) {
+  for (const auto &Entry : *this) {
     OS << "  -\n";
-    Record.print(OS);
+    OS << "    FunctionGUID: " << Entry.first << "\n";
+    Entry.second.print(OS);
   }
 }
 
@@ -288,7 +284,90 @@ Error RawMemProfReader::initialize() {
   if (Error E = readRawProfile())
     return E;
 
-  return symbolizeAndFilterStackFrames();
+  if (Error E = symbolizeAndFilterStackFrames())
+    return E;
+
+  return mapRawProfileToRecords();
+}
+
+Error RawMemProfReader::mapRawProfileToRecords() {
+  // Hold a mapping from function to each callsite location we encounter within
+  // it that is part of some dynamic allocation context. The location is stored
+  // as a pointer to a symbolized list of inline frames.
+  using LocationPtr = const llvm::SmallVector<MemProfRecord::Frame> *;
+  llvm::DenseMap<GlobalValue::GUID, llvm::SetVector<LocationPtr>>
+      PerFunctionCallSites;
+
+  // Convert the raw profile callstack data into memprof records. While doing so
+  // keep track of related contexts so that we can fill these in later.
+  for (const auto &Entry : CallstackProfileData) {
+    const uint64_t StackId = Entry.first;
+
+    auto It = StackMap.find(StackId);
+    if (It == StackMap.end())
+      return make_error<InstrProfError>(
+          instrprof_error::malformed,
+          "memprof callstack record does not contain id: " + Twine(StackId));
+
+    // Construct the symbolized callstack.
+    llvm::SmallVector<MemProfRecord::Frame> Callstack;
+    Callstack.reserve(It->getSecond().size());
+
+    llvm::ArrayRef<uint64_t> Addresses = It->getSecond();
+    for (size_t I = 0; I < Addresses.size(); I++) {
+      const uint64_t Address = Addresses[I];
+      assert(SymbolizedFrame.count(Address) > 0 &&
+             "Address not found in SymbolizedFrame map");
+      const SmallVector<MemProfRecord::Frame> &Frames =
+          SymbolizedFrame[Address];
+
+      assert(!Frames.back().IsInlineFrame &&
+             "The last frame should not be inlined");
+
+      // Record the callsites for each function. Skip the first frame of the
+      // first address since it is the allocation site itself that is recorded
+      // as an alloc site.
+      for (size_t J = 0; J < Frames.size(); J++) {
+        if (I == 0 && J == 0)
+          continue;
+        // We attach the entire bottom-up frame here for the callsite even
+        // though we only need the frames up to and including the frame for
+        // Frames[J].Function. This will enable better deduplication for
+        // compression in the future.
+        PerFunctionCallSites[Frames[J].Function].insert(&Frames);
+      }
+
+      // Add all the frames to the current allocation callstack.
+      Callstack.append(Frames.begin(), Frames.end());
+    }
+
+    // We attach the memprof record to each function bottom-up including the
+    // first non-inline frame.
+    for (size_t I = 0; /*Break out using the condition below*/; I++) {
+      auto Result =
+          FunctionProfileData.insert({Callstack[I].Function, MemProfRecord()});
+      MemProfRecord &Record = Result.first->second;
+      Record.AllocSites.emplace_back(Callstack, Entry.second);
+
+      if (!Callstack[I].IsInlineFrame)
+        break;
+    }
+  }
+
+  // Fill in the related callsites per function.
+  for (auto I = PerFunctionCallSites.begin(), E = PerFunctionCallSites.end();
+       I != E; I++) {
+    const GlobalValue::GUID Id = I->first;
+    // Some functions may have only callsite data and no allocation data. Here
+    // we insert a new entry for callsite data if we need to.
+    auto Result = FunctionProfileData.insert({Id, MemProfRecord()});
+    MemProfRecord &Record = Result.first->second;
+    for (LocationPtr Loc : I->getSecond()) {
+      Record.CallSites.push_back(*Loc);
+    }
+  }
+
+  return Error::success();
 }
 
 Error RawMemProfReader::symbolizeAndFilterStackFrames() {
@@ -331,15 +410,10 @@ Error RawMemProfReader::symbolizeAndFilterStackFrames() {
         LLVM_DEBUG(
             // Print out the name to guid mapping for debugging.
             llvm::dbgs() << "FunctionName: " << Frame.FunctionName << " GUID: "
-                         << Function::getGUID(trimSuffix(Frame.FunctionName))
+                         << MemProfRecord::getGUID(Frame.FunctionName)
                          << "\n";);
         SymbolizedFrame[VAddr].emplace_back(
-            // We use the function guid which we expect to be a uint64_t. At
-            // this time, it is the lower 64 bits of the md5 of the function
-            // name. Any suffix with .llvm. is trimmed since these are added by
-            // thinLTO global promotion. At the time the profile is consumed,
-            // these suffixes will not be present.
-            Function::getGUID(trimSuffix(Frame.FunctionName)),
+            MemProfRecord::getGUID(Frame.FunctionName),
             Frame.Line - Frame.StartLine, Frame.Column,
             // Only the last entry is not an inlined location.
             I != NumFrames - 1);
@@ -359,7 +433,7 @@ Error RawMemProfReader::symbolizeAndFilterStackFrames() {
   // Drop the entries where the callstack is empty.
   for (const uint64_t Id : EntriesToErase) {
     StackMap.erase(Id);
-    ProfileData.erase(Id);
+    CallstackProfileData.erase(Id);
   }
 
   if (StackMap.empty())
@@ -394,10 +468,10 @@ Error RawMemProfReader::readRawProfile() {
     // raw profiles in the same binary file are from the same process so the
     // stackdepot ids are the same.
     for (const auto &Value : readMemInfoBlocks(Next + Header->MIBOffset)) {
-      if (ProfileData.count(Value.first)) {
-        ProfileData[Value.first].Merge(Value.second);
+      if (CallstackProfileData.count(Value.first)) {
+        CallstackProfileData[Value.first].Merge(Value.second);
       } else {
-        ProfileData[Value.first] = Value.second;
+        CallstackProfileData[Value.first] = Value.second;
       }
     }
 
@@ -438,29 +512,14 @@ RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) {
   return object::SectionedAddress{VirtualAddress};
 }
 
-Error RawMemProfReader::fillRecord(const uint64_t Id, const MemInfoBlock &MIB,
-                                   MemProfRecord &Record) {
-  auto &CallStack = StackMap[Id];
-  for (const uint64_t Address : CallStack) {
-    assert(SymbolizedFrame.count(Address) &&
-           "Address not found in symbolized frame cache.");
-    Record.CallStack.append(SymbolizedFrame[Address]);
-  }
-  Record.Info = PortableMemInfoBlock(MIB);
-  return Error::success();
-}
-
-Error RawMemProfReader::readNextRecord(MemProfRecord &Record) {
-  if (ProfileData.empty())
+Error RawMemProfReader::readNextRecord(GuidMemProfRecordPair &GuidRecord) {
+  if (FunctionProfileData.empty())
     return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
 
-  if (Iter == ProfileData.end())
+  if (Iter == FunctionProfileData.end())
     return make_error<InstrProfError>(instrprof_error::eof);
 
-  Record.clear();
-  if (Error E = fillRecord(Iter->first, Iter->second, Record)) {
-    return E;
-  }
+  GuidRecord = {Iter->first, Iter->second};
   Iter++;
   return Error::success();
 }

diff  --git a/llvm/test/tools/llvm-profdata/Inputs/inline.memprofexe b/llvm/test/tools/llvm-profdata/Inputs/inline.memprofexe
new file mode 100755
index 0000000000000..9b6fd16e9a272
Binary files /dev/null and b/llvm/test/tools/llvm-profdata/Inputs/inline.memprofexe 
diff er

diff  --git a/llvm/test/tools/llvm-profdata/memprof-basic.test b/llvm/test/tools/llvm-profdata/memprof-basic.test
index af22c3b6c39b3..e72728af101dd 100644
--- a/llvm/test/tools/llvm-profdata/memprof-basic.test
+++ b/llvm/test/tools/llvm-profdata/memprof-basic.test
@@ -26,7 +26,7 @@ recorded.
 ```
 clang -fuse-ld=lld -Wl,--no-rosegment -gmlt -fdebug-info-for-profiling \
       -fmemory-profile -mno-omit-leaf-frame-pointer -fno-omit-frame-pointer \
-      -fno-optimize-sibling-calls -m64 -Wl,-build-id source.c -o basic.memprofexe 
+      -fno-optimize-sibling-calls -m64 -Wl,-build-id source.c -o basic.memprofexe
 
 env MEMPROF_OPTIONS=log_path=stdout ./rawprofile.out > basic.memprofraw
 ```
@@ -46,56 +46,59 @@ CHECK-NEXT:     NumMibInfo: 3
 CHECK-NEXT:     NumStackOffsets: 3
 CHECK-NEXT:   Records:
 CHECK-NEXT:   -
-CHECK-NEXT:     Callstack:
+CHECK-NEXT:     FunctionGUID: {{[0-9]+}}
+CHECK-NEXT:     AllocSites:
 CHECK-NEXT:     -
-CHECK-NEXT:       Function: {{[0-9]+}}
-CHECK-NEXT:       LineOffset: 1
-CHECK-NEXT:       Column: 21
-CHECK-NEXT:       Inline: 0
-CHECK-NEXT:     MemInfoBlock:
-CHECK-NEXT:       AllocCount: 1
-CHECK-NEXT:       TotalAccessCount: 2
-CHECK-NEXT:       MinAccessCount: 2
-CHECK-NEXT:       MaxAccessCount: 2
-CHECK-NEXT:       TotalSize: 10
-CHECK-NEXT:       MinSize: 10
-CHECK-NEXT:       MaxSize: 10
-CHECK-NEXT:       AllocTimestamp: 986
-CHECK-NEXT:       DeallocTimestamp: 986
-CHECK-NEXT:       TotalLifetime: 0
-CHECK-NEXT:       MinLifetime: 0
-CHECK-NEXT:       MaxLifetime: 0
-CHECK-NEXT:       AllocCpuId: 56
-CHECK-NEXT:       DeallocCpuId: 56
-CHECK-NEXT:       NumMigratedCpu: 0
-CHECK-NEXT:       NumLifetimeOverlaps: 0
-CHECK-NEXT:       NumSameAllocCpu: 0
-CHECK-NEXT:       NumSameDeallocCpu: 0
-CHECK-NEXT:       DataTypeId: {{[0-9]+}}
-CHECK-NEXT:   -
-CHECK-NEXT:     Callstack:
+CHECK-NEXT:       Callstack:
+CHECK-NEXT:       -
+CHECK-NEXT:         Function: {{[0-9]+}}
+CHECK-NEXT:         LineOffset: 1
+CHECK-NEXT:         Column: 21
+CHECK-NEXT:         Inline: 0
+CHECK-NEXT:       MemInfoBlock:
+CHECK-NEXT:         AllocCount: 1
+CHECK-NEXT:         TotalAccessCount: 2
+CHECK-NEXT:         MinAccessCount: 2
+CHECK-NEXT:         MaxAccessCount: 2
+CHECK-NEXT:         TotalSize: 10
+CHECK-NEXT:         MinSize: 10
+CHECK-NEXT:         MaxSize: 10
+CHECK-NEXT:         AllocTimestamp: 986
+CHECK-NEXT:         DeallocTimestamp: 986
+CHECK-NEXT:         TotalLifetime: 0
+CHECK-NEXT:         MinLifetime: 0
+CHECK-NEXT:         MaxLifetime: 0
+CHECK-NEXT:         AllocCpuId: 56
+CHECK-NEXT:         DeallocCpuId: 56
+CHECK-NEXT:         NumMigratedCpu: 0
+CHECK-NEXT:         NumLifetimeOverlaps: 0
+CHECK-NEXT:         NumSameAllocCpu: 0
+CHECK-NEXT:         NumSameDeallocCpu: 0
+CHECK-NEXT:         DataTypeId: {{[0-9]+}}
 CHECK-NEXT:     -
-CHECK-NEXT:       Function: {{[0-9]+}}
-CHECK-NEXT:       LineOffset: 5
-CHECK-NEXT:       Column: 15
-CHECK-NEXT:       Inline: 0
-CHECK-NEXT:     MemInfoBlock:
-CHECK-NEXT:       AllocCount: 1
-CHECK-NEXT:       TotalAccessCount: 2
-CHECK-NEXT:       MinAccessCount: 2
-CHECK-NEXT:       MaxAccessCount: 2
-CHECK-NEXT:       TotalSize: 10
-CHECK-NEXT:       MinSize: 10
-CHECK-NEXT:       MaxSize: 10
-CHECK-NEXT:       AllocTimestamp: 987
-CHECK-NEXT:       DeallocTimestamp: 987
-CHECK-NEXT:       TotalLifetime: 0
-CHECK-NEXT:       MinLifetime: 0
-CHECK-NEXT:       MaxLifetime: 0
-CHECK-NEXT:       AllocCpuId: 56
-CHECK-NEXT:       DeallocCpuId: 56
-CHECK-NEXT:       NumMigratedCpu: 0
-CHECK-NEXT:       NumLifetimeOverlaps: 0
-CHECK-NEXT:       NumSameAllocCpu: 0
-CHECK-NEXT:       NumSameDeallocCpu: 0
-CHECK-NEXT:       DataTypeId: {{[0-9]+}}
+CHECK-NEXT:       Callstack:
+CHECK-NEXT:       -
+CHECK-NEXT:         Function: {{[0-9]+}}
+CHECK-NEXT:         LineOffset: 5
+CHECK-NEXT:         Column: 15
+CHECK-NEXT:         Inline: 0
+CHECK-NEXT:       MemInfoBlock:
+CHECK-NEXT:         AllocCount: 1
+CHECK-NEXT:         TotalAccessCount: 2
+CHECK-NEXT:         MinAccessCount: 2
+CHECK-NEXT:         MaxAccessCount: 2
+CHECK-NEXT:         TotalSize: 10
+CHECK-NEXT:         MinSize: 10
+CHECK-NEXT:         MaxSize: 10
+CHECK-NEXT:         AllocTimestamp: 987
+CHECK-NEXT:         DeallocTimestamp: 987
+CHECK-NEXT:         TotalLifetime: 0
+CHECK-NEXT:         MinLifetime: 0
+CHECK-NEXT:         MaxLifetime: 0
+CHECK-NEXT:         AllocCpuId: 56
+CHECK-NEXT:         DeallocCpuId: 56
+CHECK-NEXT:         NumMigratedCpu: 0
+CHECK-NEXT:         NumLifetimeOverlaps: 0
+CHECK-NEXT:         NumSameAllocCpu: 0
+CHECK-NEXT:         NumSameDeallocCpu: 0
+CHECK-NEXT:         DataTypeId: {{[0-9]+}}

diff  --git a/llvm/test/tools/llvm-profdata/memprof-inline.test b/llvm/test/tools/llvm-profdata/memprof-inline.test
index ecf4f178a9743..a31903e120c72 100644
--- a/llvm/test/tools/llvm-profdata/memprof-inline.test
+++ b/llvm/test/tools/llvm-profdata/memprof-inline.test
@@ -50,7 +50,7 @@ FunctionName: main GUID: 15822663052811949562
 [..omit output here which is checked below..]
 ```
 
-RUN: llvm-profdata show --memory %p/Inputs/inline.memprofraw --profiled-binary %p/Inputs/memprof-inline.exe
+RUN: llvm-profdata show --memory %p/Inputs/inline.memprofraw --profiled-binary %p/Inputs/inline.memprofexe | FileCheck %s
 
 CHECK:  MemprofProfile:
 CHECK-NEXT:  -
@@ -62,45 +62,123 @@ CHECK-NEXT:    NumMibInfo: 2
 CHECK-NEXT:    NumStackOffsets: 2
 CHECK-NEXT:  Records:
 CHECK-NEXT:  -
-CHECK-NEXT:    Callstack:
+CHECK-NEXT:    FunctionGUID: 15505678318020221912
+CHECK-NEXT:    AllocSites:
 CHECK-NEXT:    -
-CHECK-NEXT:      Function: 15505678318020221912
-CHECK-NEXT:      LineOffset: 1
-CHECK-NEXT:      Column: 15
-CHECK-NEXT:      Inline: 0
+CHECK-NEXT:      Callstack:
+CHECK-NEXT:      -
+CHECK-NEXT:        Function: 15505678318020221912
+CHECK-NEXT:        LineOffset: 1
+CHECK-NEXT:        Column: 15
+CHECK-NEXT:        Inline: 1
+CHECK-NEXT:      -
+CHECK-NEXT:        Function: 6699318081062747564
+CHECK-NEXT:        LineOffset: 0
+CHECK-NEXT:        Column: 18
+CHECK-NEXT:        Inline: 0
+CHECK-NEXT:      -
+CHECK-NEXT:        Function: 16434608426314478903
+CHECK-NEXT:        LineOffset: 0
+CHECK-NEXT:        Column: 19
+CHECK-NEXT:        Inline: 0
+CHECK-NEXT:      -
+CHECK-NEXT:        Function: 15822663052811949562
+CHECK-NEXT:        LineOffset: 1
+CHECK-NEXT:        Column: 3
+CHECK-NEXT:        Inline: 0
+CHECK-NEXT:      MemInfoBlock:
+CHECK-NEXT:        AllocCount: 1
+CHECK-NEXT:        TotalAccessCount: 1
+CHECK-NEXT:        MinAccessCount: 1
+CHECK-NEXT:        MaxAccessCount: 1
+CHECK-NEXT:        TotalSize: 1
+CHECK-NEXT:        MinSize: 1
+CHECK-NEXT:        MaxSize: 1
+CHECK-NEXT:        AllocTimestamp: 894
+CHECK-NEXT:        DeallocTimestamp: 894
+CHECK-NEXT:        TotalLifetime: 0
+CHECK-NEXT:        MinLifetime: 0
+CHECK-NEXT:        MaxLifetime: 0
+CHECK-NEXT:        AllocCpuId: 23
+CHECK-NEXT:        DeallocCpuId: 23
+CHECK-NEXT:        NumMigratedCpu: 0
+CHECK-NEXT:        NumLifetimeOverlaps: 0
+CHECK-NEXT:        NumSameAllocCpu: 0
+CHECK-NEXT:        NumSameDeallocCpu: 0
+CHECK-NEXT:        DataTypeId: {{[0-9]+}}
+CHECK-NEXT:  -
+CHECK-NEXT:    FunctionGUID: 6699318081062747564
+CHECK-NEXT:    AllocSites:
 CHECK-NEXT:    -
-CHECK-NEXT:      Function: 6699318081062747564
-CHECK-NEXT:      LineOffset: 0
-CHECK-NEXT:      Column: 18
-CHECK-NEXT:      Inline: 1
+CHECK-NEXT:      Callstack:
+CHECK-NEXT:      -
+CHECK-NEXT:        Function: 15505678318020221912
+CHECK-NEXT:        LineOffset: 1
+CHECK-NEXT:        Column: 15
+CHECK-NEXT:        Inline: 1
+CHECK-NEXT:      -
+CHECK-NEXT:        Function: 6699318081062747564
+CHECK-NEXT:        LineOffset: 0
+CHECK-NEXT:        Column: 18
+CHECK-NEXT:        Inline: 0
+CHECK-NEXT:      -
+CHECK-NEXT:        Function: 16434608426314478903
+CHECK-NEXT:        LineOffset: 0
+CHECK-NEXT:        Column: 19
+CHECK-NEXT:        Inline: 0
+CHECK-NEXT:      -
+CHECK-NEXT:        Function: 15822663052811949562
+CHECK-NEXT:        LineOffset: 1
+CHECK-NEXT:        Column: 3
+CHECK-NEXT:        Inline: 0
+CHECK-NEXT:      MemInfoBlock:
+CHECK-NEXT:        AllocCount: 1
+CHECK-NEXT:        TotalAccessCount: 1
+CHECK-NEXT:        MinAccessCount: 1
+CHECK-NEXT:        MaxAccessCount: 1
+CHECK-NEXT:        TotalSize: 1
+CHECK-NEXT:        MinSize: 1
+CHECK-NEXT:        MaxSize: 1
+CHECK-NEXT:        AllocTimestamp: 894
+CHECK-NEXT:        DeallocTimestamp: 894
+CHECK-NEXT:        TotalLifetime: 0
+CHECK-NEXT:        MinLifetime: 0
+CHECK-NEXT:        MaxLifetime: 0
+CHECK-NEXT:        AllocCpuId: 23
+CHECK-NEXT:        DeallocCpuId: 23
+CHECK-NEXT:        NumMigratedCpu: 0
+CHECK-NEXT:        NumLifetimeOverlaps: 0
+CHECK-NEXT:        NumSameAllocCpu: 0
+CHECK-NEXT:        NumSameDeallocCpu: 0
+CHECK-NEXT:        DataTypeId: {{[0-9]+}}
+CHECK-NEXT:    CallSites:
 CHECK-NEXT:    -
-CHECK-NEXT:      Function: 16434608426314478903
-CHECK-NEXT:      LineOffset: 0
-CHECK-NEXT:      Column: 19
-CHECK-NEXT:      Inline: 0
+CHECK-NEXT:      -
+CHECK-NEXT:        Function: 15505678318020221912
+CHECK-NEXT:        LineOffset: 1
+CHECK-NEXT:        Column: 15
+CHECK-NEXT:        Inline: 1
 CHECK-NEXT:    -
-CHECK-NEXT:      Function: 15822663052811949562
-CHECK-NEXT:      LineOffset: 1
-CHECK-NEXT:      Column: 3
-CHECK-NEXT:      Inline: 0
-CHECK-NEXT:    MemInfoBlock:
-CHECK-NEXT:      AllocCount: 1
-CHECK-NEXT:      TotalAccessCount: 1
-CHECK-NEXT:      MinAccessCount: 1
-CHECK-NEXT:      MaxAccessCount: 1
-CHECK-NEXT:      TotalSize: 1
-CHECK-NEXT:      MinSize: 1
-CHECK-NEXT:      MaxSize: 1
-CHECK-NEXT:      AllocTimestamp: 894
-CHECK-NEXT:      DeallocTimestamp: 894
-CHECK-NEXT:      TotalLifetime: 0
-CHECK-NEXT:      MinLifetime: 0
-CHECK-NEXT:      MaxLifetime: 0
-CHECK-NEXT:      AllocCpuId: 23
-CHECK-NEXT:      DeallocCpuId: 23
-CHECK-NEXT:      NumMigratedCpu: 0
-CHECK-NEXT:      NumLifetimeOverlaps: 0
-CHECK-NEXT:      NumSameAllocCpu: 0
-CHECK-NEXT:      NumSameDeallocCpu: 0
-CHECK-NEXT:      DataTypeId: {{[0-9]+}}
-
+CHECK-NEXT:      -
+CHECK-NEXT:        Function: 6699318081062747564
+CHECK-NEXT:        LineOffset: 0
+CHECK-NEXT:        Column: 18
+CHECK-NEXT:        Inline: 0
+CHECK-NEXT:  -
+CHECK-NEXT:    FunctionGUID: 15822663052811949562
+CHECK-NEXT:    CallSites:
+CHECK-NEXT:    -
+CHECK-NEXT:      -
+CHECK-NEXT:        Function: 15822663052811949562
+CHECK-NEXT:        LineOffset: 1
+CHECK-NEXT:        Column: 3
+CHECK-NEXT:        Inline: 0
+CHECK-NEXT:  -
+CHECK-NEXT:    FunctionGUID: 16434608426314478903
+CHECK-NEXT:    CallSites:
+CHECK-NEXT:    -
+CHECK-NEXT:      -
+CHECK-NEXT:        Function: 16434608426314478903
+CHECK-NEXT:        LineOffset: 0
+CHECK-NEXT:        Column: 19
+CHECK-NEXT:        Inline: 0

diff  --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index ba2f1b6038c48..df56a76b1552e 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -267,8 +267,8 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
     }
 
     // Add the records into the writer context.
-    for (const memprof::MemProfRecord &MR : *Reader) {
-      WC->Writer.addRecord(MR, [&](Error E) {
+    for (auto I = Reader->begin(), E = Reader->end(); I != E; ++I) {
+      WC->Writer.addRecord(/*Id=*/I->first, /*Record=*/I->second, [&](Error E) {
         instrprof_error IPE = InstrProfError::take(std::move(E));
         WC->Errors.emplace_back(make_error<InstrProfError>(IPE), Filename);
       });

diff  --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp
index 434e6aaee8b02..422492266797e 100644
--- a/llvm/unittests/ProfileData/InstrProfTest.cpp
+++ b/llvm/unittests/ProfileData/InstrProfTest.cpp
@@ -13,6 +13,7 @@
 #include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/ProfileData/InstrProfWriter.h"
 #include "llvm/ProfileData/MemProf.h"
+#include "llvm/ProfileData/MemProfData.inc"
 #include "llvm/Support/Compression.h"
 #include "llvm/Testing/Support/Error.h"
 #include "llvm/Testing/Support/SupportHelpers.h"
@@ -222,18 +223,41 @@ TEST_F(InstrProfTest, test_writer_merge) {
   ASSERT_EQ(0U, R->Counts[1]);
 }
 
+using ::llvm::memprof::MemInfoBlock;
+using ::llvm::memprof::MemProfRecord;
+MemProfRecord
+makeRecord(std::initializer_list<std::initializer_list<MemProfRecord::Frame>>
+               AllocFrames,
+           std::initializer_list<std::initializer_list<MemProfRecord::Frame>>
+               CallSiteFrames,
+           const MemInfoBlock &Block = MemInfoBlock()) {
+  llvm::memprof::MemProfRecord MR;
+  for (const auto &Frames : AllocFrames)
+    MR.AllocSites.emplace_back(Frames, Block);
+  for (const auto &Frames : CallSiteFrames)
+    MR.CallSites.push_back(Frames);
+  return MR;
+}
+
 TEST_F(InstrProfTest, test_memprof) {
   ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf),
                     Succeeded());
-  llvm::memprof::MemProfRecord MR;
-  MR.CallStack.push_back({0x123, 1, 2, false});
-  MR.CallStack.push_back({0x345, 3, 4, true});
-  Writer.addRecord(MR, Err);
+
+  const MemProfRecord MR = makeRecord(
+      /*AllocFrames=*/
+      {
+          {{0x123, 1, 2, false}, {0x345, 3, 4, true}},
+          {{0x125, 5, 6, false}, {0x567, 7, 8, true}},
+      },
+      /*CallSiteFrames=*/{
+          {{0x124, 5, 6, false}, {0x789, 8, 9, true}},
+      });
+  Writer.addRecord(/*Id=*/0x9999, MR, Err);
 
   auto Profile = Writer.writeBuffer();
   readProfile(std::move(Profile));
 
-  auto RecordsOr = Reader->getMemProfRecord(0x123);
+  auto RecordsOr = Reader->getMemProfRecord(0x9999);
   ASSERT_THAT_ERROR(RecordsOr.takeError(), Succeeded());
   const auto Records = RecordsOr.get();
   ASSERT_EQ(Records.size(), 1U);
@@ -247,10 +271,16 @@ TEST_F(InstrProfTest, test_memprof_merge) {
   ASSERT_THAT_ERROR(Writer2.mergeProfileKind(InstrProfKind::MemProf),
                     Succeeded());
 
-  llvm::memprof::MemProfRecord MR;
-  MR.CallStack.push_back({0x123, 1, 2, false});
-  MR.CallStack.push_back({0x345, 3, 4, true});
-  Writer2.addRecord(MR, Err);
+  const MemProfRecord MR = makeRecord(
+      /*AllocFrames=*/
+      {
+          {{0x123, 1, 2, false}, {0x345, 3, 4, true}},
+          {{0x125, 5, 6, false}, {0x567, 7, 8, true}},
+      },
+      /*CallSiteFrames=*/{
+          {{0x124, 5, 6, false}, {0x789, 8, 9, true}},
+      });
+  Writer2.addRecord(/*Id=*/0x9999, MR, Err);
 
   ASSERT_THAT_ERROR(Writer.mergeProfileKind(Writer2.getProfileKind()),
                     Succeeded());
@@ -264,25 +294,13 @@ TEST_F(InstrProfTest, test_memprof_merge) {
   ASSERT_EQ(1U, R->Counts.size());
   ASSERT_EQ(42U, R->Counts[0]);
 
-  auto RecordsOr = Reader->getMemProfRecord(0x123);
+  auto RecordsOr = Reader->getMemProfRecord(0x9999);
   ASSERT_THAT_ERROR(RecordsOr.takeError(), Succeeded());
   const auto Records = RecordsOr.get();
   ASSERT_EQ(Records.size(), 1U);
   EXPECT_EQ(Records[0], MR);
 }
 
-TEST_F(InstrProfTest, test_memprof_invalid_add_record) {
-  llvm::memprof::MemProfRecord MR;
-  // At least one of the frames should be a non-inline frame.
-  MR.CallStack.push_back({0x123, 1, 2, true});
-  MR.CallStack.push_back({0x345, 3, 4, true});
-
-  auto CheckErr = [](Error &&E) {
-    EXPECT_TRUE(ErrorEquals(instrprof_error::invalid_prof, std::move(E)));
-  };
-  Writer.addRecord(MR, CheckErr);
-}
-
 static const char callee1[] = "callee1";
 static const char callee2[] = "callee2";
 static const char callee3[] = "callee3";

diff  --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp
index 8921235d063fb..7f7cd64f54065 100644
--- a/llvm/unittests/ProfileData/MemProfTest.cpp
+++ b/llvm/unittests/ProfileData/MemProfTest.cpp
@@ -4,6 +4,7 @@
 #include "llvm/DebugInfo/DIContext.h"
 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/Value.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/ProfileData/MemProfData.inc"
@@ -133,6 +134,13 @@ MemProfSchema getFullSchema() {
 TEST(MemProf, FillsValue) {
   std::unique_ptr<MockSymbolizer> Symbolizer(new MockSymbolizer());
 
+  EXPECT_CALL(*Symbolizer, symbolizeInlinedCode(SectionedAddress{0x1000},
+                                                specifier(), false))
+      .Times(1) // Only once since we remember invalid PCs.
+      .WillRepeatedly(Return(makeInliningInfo({
+          {"new", 70, 57, 3, "memprof/memprof_new_delete.cpp"},
+      })));
+
   EXPECT_CALL(*Symbolizer, symbolizeInlinedCode(SectionedAddress{0x2000},
                                                 specifier(), false))
       .Times(1) // Only once since we cache the result for future lookups.
@@ -141,41 +149,98 @@ TEST(MemProf, FillsValue) {
           {"bar", 201, 150, 20},
       })));
 
-  EXPECT_CALL(*Symbolizer, symbolizeInlinedCode(SectionedAddress{0x6000},
+  EXPECT_CALL(*Symbolizer, symbolizeInlinedCode(SectionedAddress{0x3000},
                                                 specifier(), false))
       .Times(1)
       .WillRepeatedly(Return(makeInliningInfo({
-          {"baz", 10, 5, 30},
-          {"qux.llvm.12345", 75, 70, 10},
+          {"xyz", 10, 5, 30},
+          {"abc", 10, 5, 30},
       })));
 
   CallStackMap CSM;
-  CSM[0x1] = {0x2000};
-  CSM[0x2] = {0x6000, 0x2000};
+  CSM[0x1] = {0x1000, 0x2000, 0x3000};
 
   llvm::MapVector<uint64_t, MemInfoBlock> Prof;
   Prof[0x1].AllocCount = 1;
-  Prof[0x2].AllocCount = 2;
 
   auto Seg = makeSegments();
 
   RawMemProfReader Reader(std::move(Symbolizer), Seg, Prof, CSM);
 
-  std::vector<MemProfRecord> Records;
-  for (const MemProfRecord &R : Reader) {
-    Records.push_back(R);
+  llvm::DenseMap<llvm::GlobalValue::GUID, MemProfRecord> Records;
+  for (const auto &Pair : Reader) {
+    Records.insert({Pair.first, Pair.second});
   }
-  EXPECT_EQ(Records.size(), 2U);
-
-  EXPECT_EQ(Records[0].Info.getAllocCount(), 1U);
-  EXPECT_EQ(Records[1].Info.getAllocCount(), 2U);
-  EXPECT_THAT(Records[0].CallStack[0], FrameContains("foo", 5U, 30U, true));
-  EXPECT_THAT(Records[0].CallStack[1], FrameContains("bar", 51U, 20U, false));
 
-  EXPECT_THAT(Records[1].CallStack[0], FrameContains("baz", 5U, 30U, true));
-  EXPECT_THAT(Records[1].CallStack[1], FrameContains("qux", 5U, 10U, false));
-  EXPECT_THAT(Records[1].CallStack[2], FrameContains("foo", 5U, 30U, true));
-  EXPECT_THAT(Records[1].CallStack[3], FrameContains("bar", 51U, 20U, false));
+  // Mock program psuedocode and expected memprof record contents.
+  //
+  //                              AllocSite       CallSite
+  // inline foo() { new(); }         Y               N
+  // bar() { foo(); }                Y               Y
+  // inline xyz() { bar(); }         N               Y
+  // abc() { xyz(); }                N               Y
+
+  // We expect 4 records. We attach alloc site data to foo and bar, i.e.
+  // all frames bottom up until we find a non-inline frame. We attach call site
+  // data to bar, xyz and abc.
+  ASSERT_EQ(Records.size(), 4U);
+
+  // Check the memprof record for foo.
+  const llvm::GlobalValue::GUID FooId = MemProfRecord::getGUID("foo");
+  ASSERT_EQ(Records.count(FooId), 1U);
+  const MemProfRecord &Foo = Records[FooId];
+  ASSERT_EQ(Foo.AllocSites.size(), 1U);
+  EXPECT_EQ(Foo.AllocSites[0].Info.getAllocCount(), 1U);
+  EXPECT_THAT(Foo.AllocSites[0].CallStack[0],
+              FrameContains("foo", 5U, 30U, true));
+  EXPECT_THAT(Foo.AllocSites[0].CallStack[1],
+              FrameContains("bar", 51U, 20U, false));
+  EXPECT_THAT(Foo.AllocSites[0].CallStack[2],
+              FrameContains("xyz", 5U, 30U, true));
+  EXPECT_THAT(Foo.AllocSites[0].CallStack[3],
+              FrameContains("abc", 5U, 30U, false));
+  EXPECT_TRUE(Foo.CallSites.empty());
+
+  // Check the memprof record for bar.
+  const llvm::GlobalValue::GUID BarId = MemProfRecord::getGUID("bar");
+  ASSERT_EQ(Records.count(BarId), 1U);
+  const MemProfRecord &Bar = Records[BarId];
+  ASSERT_EQ(Bar.AllocSites.size(), 1U);
+  EXPECT_EQ(Bar.AllocSites[0].Info.getAllocCount(), 1U);
+  EXPECT_THAT(Bar.AllocSites[0].CallStack[0],
+              FrameContains("foo", 5U, 30U, true));
+  EXPECT_THAT(Bar.AllocSites[0].CallStack[1],
+              FrameContains("bar", 51U, 20U, false));
+  EXPECT_THAT(Bar.AllocSites[0].CallStack[2],
+              FrameContains("xyz", 5U, 30U, true));
+  EXPECT_THAT(Bar.AllocSites[0].CallStack[3],
+              FrameContains("abc", 5U, 30U, false));
+
+  ASSERT_EQ(Bar.CallSites.size(), 1U);
+  ASSERT_EQ(Bar.CallSites[0].size(), 2U);
+  EXPECT_THAT(Bar.CallSites[0][0], FrameContains("foo", 5U, 30U, true));
+  EXPECT_THAT(Bar.CallSites[0][1], FrameContains("bar", 51U, 20U, false));
+
+  // Check the memprof record for xyz.
+  const llvm::GlobalValue::GUID XyzId = MemProfRecord::getGUID("xyz");
+  ASSERT_EQ(Records.count(XyzId), 1U);
+  const MemProfRecord &Xyz = Records[XyzId];
+  ASSERT_EQ(Xyz.CallSites.size(), 1U);
+  ASSERT_EQ(Xyz.CallSites[0].size(), 2U);
+  // Expect the entire frame even though in practice we only need the first
+  // entry here.
+  EXPECT_THAT(Xyz.CallSites[0][0], FrameContains("xyz", 5U, 30U, true));
+  EXPECT_THAT(Xyz.CallSites[0][1], FrameContains("abc", 5U, 30U, false));
+
+  // Check the memprof record for abc.
+  const llvm::GlobalValue::GUID AbcId = MemProfRecord::getGUID("abc");
+  ASSERT_EQ(Records.count(AbcId), 1U);
+  const MemProfRecord &Abc = Records[AbcId];
+  EXPECT_TRUE(Abc.AllocSites.empty());
+  ASSERT_EQ(Abc.CallSites.size(), 1U);
+  ASSERT_EQ(Abc.CallSites[0].size(), 2U);
+  EXPECT_THAT(Abc.CallSites[0][0], FrameContains("xyz", 5U, 30U, true));
+  EXPECT_THAT(Abc.CallSites[0][1], FrameContains("abc", 5U, 30U, false));
 }
 
 TEST(MemProf, PortableWrapper) {
@@ -206,36 +271,33 @@ TEST(MemProf, PortableWrapper) {
 TEST(MemProf, RecordSerializationRoundTrip) {
   const MemProfSchema Schema = getFullSchema();
 
-  llvm::SmallVector<MemProfRecord, 3> Records;
-  MemProfRecord MR;
-
   MemInfoBlock Info(/*size=*/16, /*access_count=*/7, /*alloc_timestamp=*/1000,
                     /*dealloc_timestamp=*/2000, /*alloc_cpu=*/3,
                     /*dealloc_cpu=*/4);
 
-  MR.Info = PortableMemInfoBlock(Info);
-  MR.CallStack.push_back({0x123, 1, 2, false});
-  MR.CallStack.push_back({0x345, 3, 4, false});
-  Records.push_back(MR);
+  llvm::SmallVector<llvm::SmallVector<MemProfRecord::Frame>> AllocCallStacks = {
+      {{0x123, 1, 2, false}, {0x345, 3, 4, false}},
+      {{0x123, 1, 2, false}, {0x567, 5, 6, false}}};
 
-  MR.clear();
-  MR.Info = PortableMemInfoBlock(Info);
-  MR.CallStack.push_back({0x567, 5, 6, false});
-  MR.CallStack.push_back({0x789, 7, 8, false});
-  Records.push_back(MR);
+  llvm::SmallVector<llvm::SmallVector<MemProfRecord::Frame>> CallSites = {
+      {{0x333, 1, 2, false}, {0x777, 3, 4, true}}};
+
+  MemProfRecord Record;
+  for (const auto &ACS : AllocCallStacks) {
+    // Use the same info block for both allocation sites.
+    Record.AllocSites.emplace_back(ACS, Info);
+  }
+  Record.CallSites.assign(CallSites);
 
   std::string Buffer;
   llvm::raw_string_ostream OS(Buffer);
-  serializeRecords(Records, Schema, OS);
+  Record.serialize(Schema, OS);
   OS.flush();
 
-  const llvm::SmallVector<MemProfRecord, 4> GotRecords = deserializeRecords(
+  const MemProfRecord GotRecord = MemProfRecord::deserialize(
       Schema, reinterpret_cast<const unsigned char *>(Buffer.data()));
 
-  ASSERT_TRUE(!GotRecords.empty());
-  EXPECT_EQ(GotRecords.size(), Records.size());
-  EXPECT_THAT(GotRecords[0], EqualsRecord(Records[0]));
-  EXPECT_THAT(GotRecords[1], EqualsRecord(Records[1]));
+  EXPECT_THAT(GotRecord, EqualsRecord(Record));
 }
 
 TEST(MemProf, SymbolizationFilter) {
@@ -283,12 +345,15 @@ TEST(MemProf, SymbolizationFilter) {
 
   RawMemProfReader Reader(std::move(Symbolizer), Seg, Prof, CSM);
 
-  std::vector<MemProfRecord> Records;
-  for (const MemProfRecord &R : Reader) {
-    Records.push_back(R);
+  llvm::SmallVector<MemProfRecord, 1> Records;
+  for (const auto &KeyRecordPair : Reader) {
+    Records.push_back(KeyRecordPair.second);
   }
+
   ASSERT_EQ(Records.size(), 1U);
-  ASSERT_EQ(Records[0].CallStack.size(), 1U);
-  EXPECT_THAT(Records[0].CallStack[0], FrameContains("foo", 5U, 30U, false));
+  ASSERT_EQ(Records[0].AllocSites.size(), 1U);
+  ASSERT_EQ(Records[0].AllocSites[0].CallStack.size(), 1U);
+  EXPECT_THAT(Records[0].AllocSites[0].CallStack[0],
+              FrameContains("foo", 5U, 30U, false));
 }
 } // namespace


        


More information about the llvm-commits mailing list