[llvm] 216575e - Revert "Revert "[ProfileData] Read and symbolize raw memprof profiles.""

Snehasish Kumar via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 8 13:37:42 PST 2022


Author: Snehasish Kumar
Date: 2022-02-08T13:37:27-08:00
New Revision: 216575e581023f6bac9bb81fbfb5b626faf93f23

URL: https://github.com/llvm/llvm-project/commit/216575e581023f6bac9bb81fbfb5b626faf93f23
DIFF: https://github.com/llvm/llvm-project/commit/216575e581023f6bac9bb81fbfb5b626faf93f23.diff

LOG: Revert "Revert "[ProfileData] Read and symbolize raw memprof profiles.""

This reverts commit dbf47d227d080e4eb7239b589660f51d7b08afa9.

Reapply https://reviews.llvm.org/D116784 now that
https://reviews.llvm.org/D118413 has landed with a couple of fixes:
* fix raw profile reader unaligned access identified by ubsan
* fix windows build by using MOCK_CONST_METHOD3 instead of MOCK_METHOD.

Added: 
    llvm/include/llvm/ProfileData/MemProf.h
    llvm/test/tools/llvm-profdata/Inputs/basic.memprofexe
    llvm/test/tools/llvm-profdata/Inputs/multi.memprofexe
    llvm/unittests/ProfileData/MemProfTest.cpp

Modified: 
    llvm/include/llvm/DebugInfo/DIContext.h
    llvm/include/llvm/ProfileData/RawMemProfReader.h
    llvm/lib/ProfileData/CMakeLists.txt
    llvm/lib/ProfileData/RawMemProfReader.cpp
    llvm/test/tools/llvm-profdata/Inputs/basic.memprofraw
    llvm/test/tools/llvm-profdata/Inputs/multi.memprofraw
    llvm/test/tools/llvm-profdata/memprof-basic.test
    llvm/test/tools/llvm-profdata/memprof-multi.test
    llvm/tools/llvm-profdata/llvm-profdata.cpp
    llvm/unittests/ProfileData/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/DebugInfo/DIContext.h b/llvm/include/llvm/DebugInfo/DIContext.h
index d029556c9d89c..5bddc8b52024a 100644
--- a/llvm/include/llvm/DebugInfo/DIContext.h
+++ b/llvm/include/llvm/DebugInfo/DIContext.h
@@ -151,6 +151,10 @@ struct DILineInfoSpecifier {
   DILineInfoSpecifier(FileLineInfoKind FLIKind = FileLineInfoKind::RawValue,
                       FunctionNameKind FNKind = FunctionNameKind::None)
       : FLIKind(FLIKind), FNKind(FNKind) {}
+
+  inline bool operator==(const DILineInfoSpecifier &RHS) const {
+    return FLIKind == RHS.FLIKind && FNKind == RHS.FNKind;
+  }
 };
 
 /// This is just a helper to programmatically construct DIDumpType.

diff  --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
new file mode 100644
index 0000000000000..c21903c940061
--- /dev/null
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -0,0 +1,95 @@
+#ifndef LLVM_PROFILEDATA_MEMPROF_H_
+#define LLVM_PROFILEDATA_MEMPROF_H_
+
+#include <cstdint>
+#include <string>
+#include <vector>
+
+#include "llvm/ProfileData/MemProfData.inc"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace memprof {
+
+struct MemProfRecord {
+  struct Frame {
+    std::string Function;
+    uint32_t LineOffset;
+    uint32_t Column;
+    bool IsInlineFrame;
+
+    Frame(std::string Str, uint32_t Off, uint32_t Col, bool Inline)
+        : Function(std::move(Str)), LineOffset(Off), Column(Col),
+          IsInlineFrame(Inline) {}
+  };
+
+  std::vector<Frame> CallStack;
+  // TODO: Replace this with the entry format described in the RFC so
+  // that the InstrProfRecord reader and writer do not have to be concerned
+  // about backwards compat.
+  MemInfoBlock Info;
+
+  void clear() {
+    CallStack.clear();
+    Info = MemInfoBlock();
+  }
+
+  // Prints out the contents of the memprof record in YAML.
+  void print(llvm::raw_ostream &OS) const {
+    OS << "    Callstack:\n";
+    // TODO: Print out the frame on one line with to make it easier for deep
+    // callstacks once we have a test to check valid YAML is generated.
+    for (const auto &Frame : CallStack) {
+      OS << "    -\n"
+         << "      Function: " << Frame.Function << "\n"
+         << "      LineOffset: " << Frame.LineOffset << "\n"
+         << "      Column: " << Frame.Column << "\n"
+         << "      Inline: " << Frame.IsInlineFrame << "\n";
+    }
+
+    OS << "    MemInfoBlock:\n";
+
+    // TODO: Replace this once the format is updated to be version agnostic.
+    OS << "      "
+       << "AllocCount: " << Info.alloc_count << "\n";
+    OS << "      "
+       << "TotalAccessCount: " << Info.total_access_count << "\n";
+    OS << "      "
+       << "MinAccessCount: " << Info.min_access_count << "\n";
+    OS << "      "
+       << "MaxAccessCount: " << Info.max_access_count << "\n";
+    OS << "      "
+       << "TotalSize: " << Info.total_size << "\n";
+    OS << "      "
+       << "MinSize: " << Info.min_size << "\n";
+    OS << "      "
+       << "MaxSize: " << Info.max_size << "\n";
+    OS << "      "
+       << "AllocTimestamp: " << Info.alloc_timestamp << "\n";
+    OS << "      "
+       << "DeallocTimestamp: " << Info.dealloc_timestamp << "\n";
+    OS << "      "
+       << "TotalLifetime: " << Info.total_lifetime << "\n";
+    OS << "      "
+       << "MinLifetime: " << Info.min_lifetime << "\n";
+    OS << "      "
+       << "MaxLifetime: " << Info.max_lifetime << "\n";
+    OS << "      "
+       << "AllocCpuId: " << Info.alloc_cpu_id << "\n";
+    OS << "      "
+       << "DeallocCpuId: " << Info.dealloc_cpu_id << "\n";
+    OS << "      "
+       << "NumMigratedCpu: " << Info.num_migrated_cpu << "\n";
+    OS << "      "
+       << "NumLifetimeOverlaps: " << Info.num_lifetime_overlaps << "\n";
+    OS << "      "
+       << "NumSameAllocCpu: " << Info.num_same_alloc_cpu << "\n";
+    OS << "      "
+       << "NumSameDeallocCpu: " << Info.num_same_dealloc_cpu << "\n";
+  }
+};
+
+} // namespace memprof
+} // namespace llvm
+
+#endif // LLVM_PROFILEDATA_MEMPROF_H_

diff  --git a/llvm/include/llvm/ProfileData/RawMemProfReader.h b/llvm/include/llvm/ProfileData/RawMemProfReader.h
index 5041f54c46ad2..55ba31d2a6492 100644
--- a/llvm/include/llvm/ProfileData/RawMemProfReader.h
+++ b/llvm/include/llvm/ProfileData/RawMemProfReader.h
@@ -12,33 +12,95 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
+#include "llvm/DebugInfo/Symbolize/Symbolize.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/ProfileData/InstrProfReader.h"
+#include "llvm/ProfileData/MemProf.h"
+#include "llvm/ProfileData/MemProfData.inc"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/MemoryBuffer.h"
 
+#include <cstddef>
+
 namespace llvm {
 namespace memprof {
 
+// Map from id (recorded from sanitizer stack depot) to virtual addresses for
+// each program counter address in the callstack.
+using CallStackMap = llvm::DenseMap<uint64_t, llvm::SmallVector<uint64_t, 32>>;
+
 class RawMemProfReader {
 public:
   RawMemProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
       : DataBuffer(std::move(DataBuffer)) {}
+  RawMemProfReader(const RawMemProfReader &) = delete;
+  RawMemProfReader &operator=(const RawMemProfReader &) = delete;
+
   // Prints the contents of the profile in YAML format.
   void printYAML(raw_ostream &OS);
 
   // Return true if the \p DataBuffer starts with magic bytes indicating it is
   // a raw binary memprof profile.
   static bool hasFormat(const MemoryBuffer &DataBuffer);
+  // Return true if the file at \p Path starts with magic bytes indicating it is
+  // a raw binary memprof profile.
+  static bool hasFormat(const StringRef Path);
 
   // Create a RawMemProfReader after sanity checking the contents of the file at
-  // \p Path.
-  static Expected<std::unique_ptr<RawMemProfReader>> create(const Twine &Path);
+  // \p Path. The binary from which the profile has been collected is specified
+  // via a path in \p ProfiledBinary.
+  static Expected<std::unique_ptr<RawMemProfReader>>
+  create(const Twine &Path, const StringRef ProfiledBinary);
+
+  Error readNextRecord(MemProfRecord &Record);
+
+  using Iterator = InstrProfIterator<MemProfRecord, RawMemProfReader>;
+  Iterator end() { return Iterator(); }
+  Iterator begin() {
+    Iter = ProfileData.begin();
+    return Iterator(this);
+  }
+
+  // Constructor for unittests only.
+  RawMemProfReader(std::unique_ptr<llvm::symbolize::SymbolizableModule> Sym,
+                   llvm::SmallVectorImpl<SegmentEntry> &Seg,
+                   llvm::MapVector<uint64_t, MemInfoBlock> &Prof,
+                   CallStackMap &SM)
+      : Symbolizer(std::move(Sym)), SegmentInfo(Seg.begin(), Seg.end()),
+        ProfileData(Prof), StackMap(SM) {}
 
 private:
+  RawMemProfReader(std::unique_ptr<MemoryBuffer> DataBuffer,
+                   object::OwningBinary<object::Binary> &&Bin)
+      : DataBuffer(std::move(DataBuffer)), Binary(std::move(Bin)) {}
+  Error initialize();
+  Error readRawProfile();
+
+  object::SectionedAddress getModuleOffset(uint64_t VirtualAddress);
+  Error fillRecord(const uint64_t Id, const MemInfoBlock &MIB,
+                   MemProfRecord &Record);
   // Prints aggregate counts for each raw profile parsed from the DataBuffer in
   // YAML format.
   void printSummaries(raw_ostream &OS) const;
 
   std::unique_ptr<MemoryBuffer> DataBuffer;
+  object::OwningBinary<object::Binary> Binary;
+  std::unique_ptr<llvm::symbolize::SymbolizableModule> Symbolizer;
+
+  // The contents of the raw profile.
+  llvm::SmallVector<SegmentEntry, 16> SegmentInfo;
+  // A map from callstack id (same as key in CallStackMap below) to the heap
+  // information recorded for that allocation context.
+  llvm::MapVector<uint64_t, MemInfoBlock> ProfileData;
+  CallStackMap StackMap;
+
+  // Iterator to read from the ProfileData MapVector.
+  llvm::MapVector<uint64_t, MemInfoBlock>::iterator Iter = ProfileData.end();
 };
 
 } // namespace memprof

diff  --git a/llvm/lib/ProfileData/CMakeLists.txt b/llvm/lib/ProfileData/CMakeLists.txt
index 1237bf77983fc..2749119f72d90 100644
--- a/llvm/lib/ProfileData/CMakeLists.txt
+++ b/llvm/lib/ProfileData/CMakeLists.txt
@@ -18,9 +18,10 @@ add_llvm_component_library(LLVMProfileData
 
   LINK_COMPONENTS
   Core
+  Object
   Support
   Demangle
-  Object
+  Symbolize
   DebugInfoDWARF
   )
 

diff  --git a/llvm/lib/ProfileData/RawMemProfReader.cpp b/llvm/lib/ProfileData/RawMemProfReader.cpp
index f6c59cdf6663d..d13d51d68025b 100644
--- a/llvm/lib/ProfileData/RawMemProfReader.cpp
+++ b/llvm/lib/ProfileData/RawMemProfReader.cpp
@@ -13,9 +13,20 @@
 #include <cstdint>
 #include <type_traits>
 
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
+#include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/ObjectFile.h"
 #include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/MemProf.h"
 #include "llvm/ProfileData/MemProfData.inc"
 #include "llvm/ProfileData/RawMemProfReader.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/MD5.h"
 
 namespace llvm {
 namespace memprof {
@@ -48,31 +59,22 @@ Summary computeSummary(const char *Start) {
   };
 }
 
-} // namespace
-
-Expected<std::unique_ptr<RawMemProfReader>>
-RawMemProfReader::create(const Twine &Path) {
-  auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path, /*IsText=*/true);
-  if (std::error_code EC = BufferOr.getError())
-    return errorCodeToError(EC);
-
-  std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
+Error checkBuffer(const MemoryBuffer &Buffer) {
+  if (!RawMemProfReader::hasFormat(Buffer))
+    return make_error<InstrProfError>(instrprof_error::bad_magic);
 
-  if (Buffer->getBufferSize() == 0)
+  if (Buffer.getBufferSize() == 0)
     return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
 
-  if (!RawMemProfReader::hasFormat(*Buffer))
-    return make_error<InstrProfError>(instrprof_error::bad_magic);
-
-  if (Buffer->getBufferSize() < sizeof(Header)) {
+  if (Buffer.getBufferSize() < sizeof(Header)) {
     return make_error<InstrProfError>(instrprof_error::truncated);
   }
 
   // The size of the buffer can be > header total size since we allow repeated
   // serialization of memprof profiles to the same file.
   uint64_t TotalSize = 0;
-  const char *Next = Buffer->getBufferStart();
-  while (Next < Buffer->getBufferEnd()) {
+  const char *Next = Buffer.getBufferStart();
+  while (Next < Buffer.getBufferEnd()) {
     auto *H = reinterpret_cast<const Header *>(Next);
     if (H->Version != MEMPROF_RAW_VERSION) {
       return make_error<InstrProfError>(instrprof_error::unsupported_version);
@@ -82,11 +84,126 @@ RawMemProfReader::create(const Twine &Path) {
     Next += H->TotalSize;
   }
 
-  if (Buffer->getBufferSize() != TotalSize) {
+  if (Buffer.getBufferSize() != TotalSize) {
     return make_error<InstrProfError>(instrprof_error::malformed);
   }
+  return Error::success();
+}
+
+llvm::SmallVector<SegmentEntry> readSegmentEntries(const char *Ptr) {
+  using namespace support;
+
+  const uint64_t NumItemsToRead =
+      endian::readNext<uint64_t, little, unaligned>(Ptr);
+  llvm::SmallVector<SegmentEntry> Items;
+  for (uint64_t I = 0; I < NumItemsToRead; I++) {
+    Items.push_back(*reinterpret_cast<const SegmentEntry *>(
+        Ptr + I * sizeof(SegmentEntry)));
+  }
+  return Items;
+}
+
+llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
+readMemInfoBlocks(const char *Ptr) {
+  using namespace support;
+
+  const uint64_t NumItemsToRead =
+      endian::readNext<uint64_t, little, unaligned>(Ptr);
+  llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items;
+  for (uint64_t I = 0; I < NumItemsToRead; I++) {
+    const uint64_t Id = endian::readNext<uint64_t, little, unaligned>(Ptr);
+    const MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr);
+    Items.push_back({Id, MIB});
+    // Only increment by size of MIB since readNext implicitly increments.
+    Ptr += sizeof(MemInfoBlock);
+  }
+  return Items;
+}
+
+CallStackMap readStackInfo(const char *Ptr) {
+  using namespace support;
+
+  const uint64_t NumItemsToRead =
+      endian::readNext<uint64_t, little, unaligned>(Ptr);
+  CallStackMap Items;
+
+  for (uint64_t I = 0; I < NumItemsToRead; I++) {
+    const uint64_t StackId = endian::readNext<uint64_t, little, unaligned>(Ptr);
+    const uint64_t NumPCs = endian::readNext<uint64_t, little, unaligned>(Ptr);
+
+    SmallVector<uint64_t, 32> CallStack;
+    for (uint64_t J = 0; J < NumPCs; J++) {
+      CallStack.push_back(endian::readNext<uint64_t, little, unaligned>(Ptr));
+    }
+
+    Items[StackId] = CallStack;
+  }
+  return Items;
+}
+
+// Merges the contents of stack information in \p From to \p To. Returns true if
+// any stack ids observed previously map to a 
diff erent set of program counter
+// addresses.
+bool mergeStackMap(const CallStackMap &From, CallStackMap &To) {
+  for (const auto &IdStack : From) {
+    auto I = To.find(IdStack.first);
+    if (I == To.end()) {
+      To[IdStack.first] = IdStack.second;
+    } else {
+      // Check that the PCs are the same (in order).
+      if (IdStack.second != I->second)
+        return true;
+    }
+  }
+  return false;
+}
+
+StringRef trimSuffix(const StringRef Name) {
+  const auto Pos = Name.find(".llvm.");
+  return Name.take_front(Pos);
+}
+
+Error report(Error E, const StringRef Context) {
+  return joinErrors(createStringError(inconvertibleErrorCode(), Context),
+                    std::move(E));
+}
+} // namespace
+
+Expected<std::unique_ptr<RawMemProfReader>>
+RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary) {
+  auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path);
+  if (std::error_code EC = BufferOr.getError())
+    return report(errorCodeToError(EC), Path.getSingleStringRef());
+
+  std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
+  if (Error E = checkBuffer(*Buffer))
+    return report(std::move(E), Path.getSingleStringRef());
+
+  if (ProfiledBinary.empty())
+    return report(
+        errorCodeToError(make_error_code(std::errc::invalid_argument)),
+        "Path to profiled binary is empty!");
+
+  auto BinaryOr = llvm::object::createBinary(ProfiledBinary);
+  if (!BinaryOr) {
+    return report(BinaryOr.takeError(), ProfiledBinary);
+  }
+
+  std::unique_ptr<RawMemProfReader> Reader(
+      new RawMemProfReader(std::move(Buffer), std::move(BinaryOr.get())));
+  if (Error E = Reader->initialize()) {
+    return std::move(E);
+  }
+  return std::move(Reader);
+}
+
+bool RawMemProfReader::hasFormat(const StringRef Path) {
+  auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path);
+  if (!BufferOr)
+    return false;
 
-  return std::make_unique<RawMemProfReader>(std::move(Buffer));
+  std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
+  return hasFormat(*Buffer);
 }
 
 bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) {
@@ -101,6 +218,12 @@ bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) {
 void RawMemProfReader::printYAML(raw_ostream &OS) {
   OS << "MemprofProfile:\n";
   printSummaries(OS);
+  // Print out the merged contents of the profiles.
+  OS << "  Records:\n";
+  for (const auto &Record : *this) {
+    OS << "  -\n";
+    Record.print(OS);
+  }
 }
 
 void RawMemProfReader::printSummaries(raw_ostream &OS) const {
@@ -122,5 +245,142 @@ void RawMemProfReader::printSummaries(raw_ostream &OS) const {
   }
 }
 
+Error RawMemProfReader::initialize() {
+  const StringRef FileName = Binary.getBinary()->getFileName();
+
+  auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Binary.getBinary());
+  if (!ElfObject) {
+    return report(make_error<StringError>(Twine("Not an ELF file: "),
+                                          inconvertibleErrorCode()),
+                  FileName);
+  }
+
+  auto Triple = ElfObject->makeTriple();
+  if (!Triple.isX86())
+    return report(make_error<StringError>(Twine("Unsupported target: ") +
+                                              Triple.getArchName(),
+                                          inconvertibleErrorCode()),
+                  FileName);
+
+  auto *Object = cast<object::ObjectFile>(Binary.getBinary());
+  std::unique_ptr<DIContext> Context = DWARFContext::create(
+      *Object, DWARFContext::ProcessDebugRelocations::Process);
+
+  auto SOFOr = symbolize::SymbolizableObjectFile::create(
+      Object, std::move(Context), /*UntagAddresses=*/false);
+  if (!SOFOr)
+    return report(SOFOr.takeError(), FileName);
+  Symbolizer = std::move(SOFOr.get());
+
+  return readRawProfile();
+}
+
+Error RawMemProfReader::readRawProfile() {
+  const char *Next = DataBuffer->getBufferStart();
+
+  while (Next < DataBuffer->getBufferEnd()) {
+    auto *Header = reinterpret_cast<const memprof::Header *>(Next);
+
+    // Read in the segment information, check whether its the same across all
+    // profiles in this binary file.
+    const llvm::SmallVector<SegmentEntry> Entries =
+        readSegmentEntries(Next + Header->SegmentOffset);
+    if (!SegmentInfo.empty() && SegmentInfo != Entries) {
+      // We do not expect segment information to change when deserializing from
+      // the same binary profile file. This can happen if dynamic libraries are
+      // loaded/unloaded between profile dumping.
+      return make_error<InstrProfError>(
+          instrprof_error::malformed,
+          "memprof raw profile has 
diff erent segment information");
+    }
+    SegmentInfo.assign(Entries.begin(), Entries.end());
+
+    // Read in the MemInfoBlocks. Merge them based on stack id - we assume that
+    // raw profiles in the same binary file are from the same process so the
+    // stackdepot ids are the same.
+    for (const auto &Value : readMemInfoBlocks(Next + Header->MIBOffset)) {
+      if (ProfileData.count(Value.first)) {
+        ProfileData[Value.first].Merge(Value.second);
+      } else {
+        ProfileData[Value.first] = Value.second;
+      }
+    }
+
+    // Read in the callstack for each ids. For multiple raw profiles in the same
+    // file, we expect that the callstack is the same for a unique id.
+    const CallStackMap CSM = readStackInfo(Next + Header->StackOffset);
+    if (StackMap.empty()) {
+      StackMap = CSM;
+    } else {
+      if (mergeStackMap(CSM, StackMap))
+        return make_error<InstrProfError>(
+            instrprof_error::malformed,
+            "memprof raw profile got 
diff erent call stack for same id");
+    }
+
+    Next += Header->TotalSize;
+  }
+
+  return Error::success();
+}
+
+object::SectionedAddress
+RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) {
+  SegmentEntry *ContainingSegment = nullptr;
+  for (auto &SE : SegmentInfo) {
+    if (VirtualAddress > SE.Start && VirtualAddress <= SE.End) {
+      ContainingSegment = &SE;
+    }
+  }
+
+  // Ensure that the virtual address is valid.
+  assert(ContainingSegment && "Could not find a segment entry");
+
+  // TODO: Compute the file offset based on the maps and program headers. For
+  // now this only works for non PIE binaries.
+  return object::SectionedAddress{VirtualAddress};
+}
+
+Error RawMemProfReader::fillRecord(const uint64_t Id, const MemInfoBlock &MIB,
+                                   MemProfRecord &Record) {
+  auto &CallStack = StackMap[Id];
+  DILineInfoSpecifier Specifier(
+      DILineInfoSpecifier::FileLineInfoKind::RawValue,
+      DILineInfoSpecifier::FunctionNameKind::LinkageName);
+  for (const uint64_t Address : CallStack) {
+    Expected<DIInliningInfo> DIOr = Symbolizer->symbolizeInlinedCode(
+        getModuleOffset(Address), Specifier, /*UseSymbolTable=*/false);
+
+    if (!DIOr)
+      return DIOr.takeError();
+    DIInliningInfo DI = DIOr.get();
+
+    for (size_t I = 0; I < DI.getNumberOfFrames(); I++) {
+      const auto &Frame = DI.getFrame(I);
+      Record.CallStack.emplace_back(
+          std::to_string(llvm::MD5Hash(trimSuffix(Frame.FunctionName))),
+          Frame.Line - Frame.StartLine, Frame.Column,
+          // Only the first entry is not an inlined location.
+          I != 0);
+    }
+  }
+  Record.Info = MIB;
+  return Error::success();
+}
+
+Error RawMemProfReader::readNextRecord(MemProfRecord &Record) {
+  if (ProfileData.empty())
+    return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
+
+  if (Iter == ProfileData.end())
+    return make_error<InstrProfError>(instrprof_error::eof);
+
+  Record.clear();
+  if (Error E = fillRecord(Iter->first, Iter->second, Record)) {
+    return E;
+  }
+  Iter++;
+  return Error::success();
+}
 } // namespace memprof
 } // namespace llvm

diff  --git a/llvm/test/tools/llvm-profdata/Inputs/basic.memprofexe b/llvm/test/tools/llvm-profdata/Inputs/basic.memprofexe
new file mode 100755
index 0000000000000..7f89f135c9d5c
Binary files /dev/null and b/llvm/test/tools/llvm-profdata/Inputs/basic.memprofexe 
diff er

diff  --git a/llvm/test/tools/llvm-profdata/Inputs/basic.memprofraw b/llvm/test/tools/llvm-profdata/Inputs/basic.memprofraw
index 42bd6e72140ca..af27be1e0a10f 100644
Binary files a/llvm/test/tools/llvm-profdata/Inputs/basic.memprofraw and b/llvm/test/tools/llvm-profdata/Inputs/basic.memprofraw 
diff er

diff  --git a/llvm/test/tools/llvm-profdata/Inputs/multi.memprofexe b/llvm/test/tools/llvm-profdata/Inputs/multi.memprofexe
new file mode 100755
index 0000000000000..cd14838f2e3b3
Binary files /dev/null and b/llvm/test/tools/llvm-profdata/Inputs/multi.memprofexe 
diff er

diff  --git a/llvm/test/tools/llvm-profdata/Inputs/multi.memprofraw b/llvm/test/tools/llvm-profdata/Inputs/multi.memprofraw
index fd8f4129e6094..bf843a9f6ad55 100644
Binary files a/llvm/test/tools/llvm-profdata/Inputs/multi.memprofraw and b/llvm/test/tools/llvm-profdata/Inputs/multi.memprofraw 
diff er

diff  --git a/llvm/test/tools/llvm-profdata/memprof-basic.test b/llvm/test/tools/llvm-profdata/memprof-basic.test
index 8e4adaae5577e..23c35c28ae96d 100644
--- a/llvm/test/tools/llvm-profdata/memprof-basic.test
+++ b/llvm/test/tools/llvm-profdata/memprof-basic.test
@@ -24,12 +24,14 @@ the shared libraries linked in which could change the number of segments
 recorded.
 
 ```
-clang -fmemory-profile -mno-omit-leaf-frame-pointer -fno-omit-frame-pointer -fno-optimize-sibling-calls -gline-tables-only -m64 -Wl,-build-id source.c -o rawprofile.out
+clang -fuse-ld=lld -Wl,--no-rosegment -gmlt -fdebug-info-for-profiling \
+      -fmemory-profile -mno-omit-leaf-frame-pointer -fno-omit-frame-pointer \
+      -fno-optimize-sibling-calls -m64 -Wl,-build-id source.c -o basic.memprofexe 
 
 env MEMPROF_OPTIONS=log_path=stdout ./rawprofile.out > basic.memprofraw
 ```
 
-RUN: llvm-profdata show --memory %p/Inputs/basic.memprofraw -o - | FileCheck %s
+RUN: llvm-profdata show --memory %p/Inputs/basic.memprofraw --profiled-binary %p/Inputs/basic.memprofexe -o - | FileCheck %s
 
 We expect 3 MIB entries, 1 each for the malloc calls in the program and one
 additional entry from a realloc in glibc/libio/vasprintf.c.
@@ -42,3 +44,107 @@ CHECK-NEXT:     TotalSizeBytes: 1016
 CHECK-NEXT:     NumSegments: 9
 CHECK-NEXT:     NumMibInfo: 3
 CHECK-NEXT:     NumStackOffsets: 3
+CHECK-NEXT:   Records:
+CHECK-NEXT:   -
+CHECK-NEXT:     Callstack:
+CHECK-NEXT:     -
+CHECK-NEXT:       Function: {{[0-9]+}}
+CHECK-NEXT:       LineOffset: 73
+CHECK-NEXT:       Column: 3
+CHECK-NEXT:       Inline: 0
+CHECK-NEXT:     -
+CHECK-NEXT:       Function: {{[0-9]+}}
+CHECK-NEXT:       LineOffset: 0
+CHECK-NEXT:       Column: 0
+CHECK-NEXT:       Inline: 0
+CHECK-NEXT:     MemInfoBlock:
+CHECK-NEXT:       AllocCount: 1
+CHECK-NEXT:       TotalAccessCount: 0
+CHECK-NEXT:       MinAccessCount: 0
+CHECK-NEXT:       MaxAccessCount: 0
+CHECK-NEXT:       TotalSize: 53
+CHECK-NEXT:       MinSize: 53
+CHECK-NEXT:       MaxSize: 53
+CHECK-NEXT:       AllocTimestamp: 0
+CHECK-NEXT:       DeallocTimestamp: 987
+CHECK-NEXT:       TotalLifetime: 987
+CHECK-NEXT:       MinLifetime: 987
+CHECK-NEXT:       MaxLifetime: 987
+CHECK-NEXT:       AllocCpuId: 4294967295
+CHECK-NEXT:       DeallocCpuId: 56
+CHECK-NEXT:       NumMigratedCpu: 1
+CHECK-NEXT:       NumLifetimeOverlaps: 0
+CHECK-NEXT:       NumSameAllocCpu: 0
+CHECK-NEXT:       NumSameDeallocCpu: 0
+CHECK-NEXT:   -
+CHECK-NEXT:     Callstack:
+CHECK-NEXT:     -
+CHECK-NEXT:       Function: {{[0-9]+}}
+CHECK-NEXT:       LineOffset: 57
+CHECK-NEXT:       Column: 3
+CHECK-NEXT:       Inline: 0
+CHECK-NEXT:     -
+CHECK-NEXT:       Function: {{[0-9]+}}
+CHECK-NEXT:       LineOffset: 1
+CHECK-NEXT:       Column: 21
+CHECK-NEXT:       Inline: 0
+CHECK-NEXT:     -
+CHECK-NEXT:       Function: {{[0-9]+}}
+CHECK-NEXT:       LineOffset: 0
+CHECK-NEXT:       Column: 0
+CHECK-NEXT:       Inline: 0
+CHECK-NEXT:     MemInfoBlock:
+CHECK-NEXT:       AllocCount: 1
+CHECK-NEXT:       TotalAccessCount: 2
+CHECK-NEXT:       MinAccessCount: 2
+CHECK-NEXT:       MaxAccessCount: 2
+CHECK-NEXT:       TotalSize: 10
+CHECK-NEXT:       MinSize: 10
+CHECK-NEXT:       MaxSize: 10
+CHECK-NEXT:       AllocTimestamp: 986
+CHECK-NEXT:       DeallocTimestamp: 986
+CHECK-NEXT:       TotalLifetime: 0
+CHECK-NEXT:       MinLifetime: 0
+CHECK-NEXT:       MaxLifetime: 0
+CHECK-NEXT:       AllocCpuId: 56
+CHECK-NEXT:       DeallocCpuId: 56
+CHECK-NEXT:       NumMigratedCpu: 0
+CHECK-NEXT:       NumLifetimeOverlaps: 0
+CHECK-NEXT:       NumSameAllocCpu: 0
+CHECK-NEXT:       NumSameDeallocCpu: 0
+CHECK-NEXT:   -
+CHECK-NEXT:     Callstack:
+CHECK-NEXT:     -
+CHECK-NEXT:       Function: {{[0-9]+}}
+CHECK-NEXT:       LineOffset: 57
+CHECK-NEXT:       Column: 3
+CHECK-NEXT:       Inline: 0
+CHECK-NEXT:     -
+CHECK-NEXT:       Function: {{[0-9]+}}
+CHECK-NEXT:       LineOffset: 5
+CHECK-NEXT:       Column: 15
+CHECK-NEXT:       Inline: 0
+CHECK-NEXT:     -
+CHECK-NEXT:       Function: {{[0-9]+}}
+CHECK-NEXT:       LineOffset: 0
+CHECK-NEXT:       Column: 0
+CHECK-NEXT:       Inline: 0
+CHECK-NEXT:     MemInfoBlock:
+CHECK-NEXT:       AllocCount: 1
+CHECK-NEXT:       TotalAccessCount: 2
+CHECK-NEXT:       MinAccessCount: 2
+CHECK-NEXT:       MaxAccessCount: 2
+CHECK-NEXT:       TotalSize: 10
+CHECK-NEXT:       MinSize: 10
+CHECK-NEXT:       MaxSize: 10
+CHECK-NEXT:       AllocTimestamp: 987
+CHECK-NEXT:       DeallocTimestamp: 987
+CHECK-NEXT:       TotalLifetime: 0
+CHECK-NEXT:       MinLifetime: 0
+CHECK-NEXT:       MaxLifetime: 0
+CHECK-NEXT:       AllocCpuId: 56
+CHECK-NEXT:       DeallocCpuId: 56
+CHECK-NEXT:       NumMigratedCpu: 0
+CHECK-NEXT:       NumLifetimeOverlaps: 0
+CHECK-NEXT:       NumSameAllocCpu: 0
+CHECK-NEXT:       NumSameDeallocCpu: 0

diff  --git a/llvm/test/tools/llvm-profdata/memprof-multi.test b/llvm/test/tools/llvm-profdata/memprof-multi.test
index 99c32a9bde5f2..3643b54039038 100644
--- a/llvm/test/tools/llvm-profdata/memprof-multi.test
+++ b/llvm/test/tools/llvm-profdata/memprof-multi.test
@@ -26,12 +26,14 @@ the shared libraries linked in which could change the number of segments
 recorded.
 
 ```
-clang -fmemory-profile -mno-omit-leaf-frame-pointer -fno-omit-frame-pointer -fno-optimize-sibling-calls -gline-tables-only -m64 -Wl,-build-id source.c -o rawprofile.out
+clang -fuse-ld=lld -Wl,--no-rosegment -gmlt -fdebug-info-for-profiling \
+      -fmemory-profile -mno-omit-leaf-frame-pointer -fno-omit-frame-pointer \
+      -fno-optimize-sibling-calls -m64 -Wl,-build-id source.c -o multi.memprofexe
 
 env MEMPROF_OPTIONS=log_path=stdout ./rawprofile.out > multi.memprofraw
 ```
 
-RUN: llvm-profdata show --memory %p/Inputs/multi.memprofraw -o - | FileCheck %s
+RUN: llvm-profdata show --memory %p/Inputs/multi.memprofraw --profiled-binary %p/Inputs/multi.memprofexe -o - | FileCheck %s
 
 We expect 2 MIB entries, 1 each for the malloc calls in the program. Unlike the
 memprof-basic.test we do not see any allocation from glibc.

diff  --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 9a345b48a418f..71f07f53fe55a 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ProfileData/InstrProfCorrelator.h"
 #include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/ProfileData/InstrProfWriter.h"
+#include "llvm/ProfileData/MemProf.h"
 #include "llvm/ProfileData/ProfileCommon.h"
 #include "llvm/ProfileData/RawMemProfReader.h"
 #include "llvm/ProfileData/SampleProfReader.h"
@@ -2480,10 +2481,16 @@ static int showSampleProfile(const std::string &Filename, bool ShowCounts,
   return 0;
 }
 
-static int showMemProfProfile(const std::string &Filename, raw_fd_ostream &OS) {
-  auto ReaderOr = llvm::memprof::RawMemProfReader::create(Filename);
+static int showMemProfProfile(const std::string &Filename,
+                              const std::string &ProfiledBinary,
+                              raw_fd_ostream &OS) {
+  auto ReaderOr =
+      llvm::memprof::RawMemProfReader::create(Filename, ProfiledBinary);
   if (Error E = ReaderOr.takeError())
-    exitWithError(std::move(E), Filename);
+    // Since the error can be related to the profile or the binary we do not
+    // pass whence. Instead additional context is provided where necessary in
+    // the error message.
+    exitWithError(std::move(E), /*Whence*/ "");
 
   std::unique_ptr<llvm::memprof::RawMemProfReader> Reader(
       ReaderOr.get().release());
@@ -2588,6 +2595,9 @@ static int show_main(int argc, const char *argv[]) {
   cl::opt<bool> ShowCovered(
       "covered", cl::init(false),
       cl::desc("Show only the functions that have been executed."));
+  cl::opt<std::string> ProfiledBinary(
+      "profiled-binary", cl::init(""),
+      cl::desc("Path to binary from which the profile was collected."));
 
   cl::ParseCommandLineOptions(argc, argv, "LLVM profile data summary\n");
 
@@ -2625,7 +2635,7 @@ static int show_main(int argc, const char *argv[]) {
                              ShowAllFunctions, ShowDetailedSummary,
                              ShowFunction, ShowProfileSymbolList,
                              ShowSectionInfoOnly, ShowHotFuncList, OS);
-  return showMemProfProfile(Filename, OS);
+  return showMemProfProfile(Filename, ProfiledBinary, OS);
 }
 
 int main(int argc, const char *argv[]) {

diff  --git a/llvm/unittests/ProfileData/CMakeLists.txt b/llvm/unittests/ProfileData/CMakeLists.txt
index 00a0079e675a8..f9c0dd3044d05 100644
--- a/llvm/unittests/ProfileData/CMakeLists.txt
+++ b/llvm/unittests/ProfileData/CMakeLists.txt
@@ -10,6 +10,7 @@ add_llvm_unittest(ProfileDataTests
   InstrProfDataTest.cpp
   InstrProfTest.cpp
   SampleProfTest.cpp
+  MemProfTest.cpp
   )
 
 target_link_libraries(ProfileDataTests PRIVATE LLVMTestingSupport)

diff  --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp
new file mode 100644
index 0000000000000..c63d2ecba82b1
--- /dev/null
+++ b/llvm/unittests/ProfileData/MemProfTest.cpp
@@ -0,0 +1,150 @@
+#include "llvm/ProfileData/MemProf.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/DebugInfo/DIContext.h"
+#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/MemProfData.inc"
+#include "llvm/ProfileData/RawMemProfReader.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MD5.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#include <initializer_list>
+
+namespace {
+
+using ::llvm::DIGlobal;
+using ::llvm::DIInliningInfo;
+using ::llvm::DILineInfo;
+using ::llvm::DILineInfoSpecifier;
+using ::llvm::DILocal;
+using ::llvm::memprof::CallStackMap;
+using ::llvm::memprof::MemInfoBlock;
+using ::llvm::memprof::MemProfRecord;
+using ::llvm::memprof::RawMemProfReader;
+using ::llvm::memprof::SegmentEntry;
+using ::llvm::object::SectionedAddress;
+using ::llvm::symbolize::SymbolizableModule;
+using ::testing::Return;
+
+class MockSymbolizer : public SymbolizableModule {
+public:
+  MOCK_CONST_METHOD3(symbolizeInlinedCode,
+                     DIInliningInfo(SectionedAddress, DILineInfoSpecifier,
+                                    bool));
+  // Most of the methods in the interface are unused. We only mock the
+  // method that we expect to be called from the memprof reader.
+  virtual DILineInfo symbolizeCode(SectionedAddress, DILineInfoSpecifier,
+                                   bool) const {
+    llvm_unreachable("unused");
+  }
+  virtual DIGlobal symbolizeData(SectionedAddress) const {
+    llvm_unreachable("unused");
+  }
+  virtual std::vector<DILocal> symbolizeFrame(SectionedAddress) const {
+    llvm_unreachable("unused");
+  }
+  virtual bool isWin32Module() const { llvm_unreachable("unused"); }
+  virtual uint64_t getModulePreferredBase() const {
+    llvm_unreachable("unused");
+  }
+};
+
+struct MockInfo {
+  std::string FunctionName;
+  uint32_t Line;
+  uint32_t StartLine;
+  uint32_t Column;
+};
+DIInliningInfo makeInliningInfo(std::initializer_list<MockInfo> MockFrames) {
+  DIInliningInfo Result;
+  for (const auto &Item : MockFrames) {
+    DILineInfo Frame;
+    Frame.FunctionName = Item.FunctionName;
+    Frame.Line = Item.Line;
+    Frame.StartLine = Item.StartLine;
+    Frame.Column = Item.Column;
+    Result.addFrame(Frame);
+  }
+  return Result;
+}
+
+llvm::SmallVector<SegmentEntry, 4> makeSegments() {
+  llvm::SmallVector<SegmentEntry, 4> Result;
+  // Mimic an entry for a non position independent executable.
+  Result.emplace_back(0x0, 0x40000, 0x0);
+  return Result;
+}
+
+const DILineInfoSpecifier specifier() {
+  return DILineInfoSpecifier(
+      DILineInfoSpecifier::FileLineInfoKind::RawValue,
+      DILineInfoSpecifier::FunctionNameKind::LinkageName);
+}
+
+MATCHER_P4(FrameContains, Function, LineOffset, Column, Inline, "") {
+  const std::string ExpectedHash = std::to_string(llvm::MD5Hash(Function));
+  if (arg.Function != ExpectedHash) {
+    *result_listener << "Hash mismatch";
+    return false;
+  }
+  if (arg.LineOffset == LineOffset && arg.Column == Column &&
+      arg.IsInlineFrame == Inline) {
+    return true;
+  }
+  *result_listener << "LineOffset, Column or Inline mismatch";
+  return false;
+}
+
+TEST(MemProf, FillsValue) {
+  std::unique_ptr<MockSymbolizer> Symbolizer(new MockSymbolizer());
+
+  EXPECT_CALL(*Symbolizer, symbolizeInlinedCode(SectionedAddress{0x2000},
+                                                specifier(), false))
+      .Times(2)
+      .WillRepeatedly(Return(makeInliningInfo({
+          {"foo", 10, 5, 30},
+          {"bar", 201, 150, 20},
+      })));
+
+  EXPECT_CALL(*Symbolizer, symbolizeInlinedCode(SectionedAddress{0x6000},
+                                                specifier(), false))
+      .Times(1)
+      .WillRepeatedly(Return(makeInliningInfo({
+          {"baz", 10, 5, 30},
+          {"qux.llvm.12345", 75, 70, 10},
+      })));
+
+  CallStackMap CSM;
+  CSM[0x1] = {0x2000};
+  CSM[0x2] = {0x6000, 0x2000};
+
+  llvm::MapVector<uint64_t, MemInfoBlock> Prof;
+  Prof[0x1].alloc_count = 1;
+  Prof[0x2].alloc_count = 2;
+
+  auto Seg = makeSegments();
+
+  RawMemProfReader Reader(std::move(Symbolizer), Seg, Prof, CSM);
+
+  std::vector<MemProfRecord> Records;
+  for (const MemProfRecord &R : Reader) {
+    Records.push_back(R);
+  }
+  EXPECT_EQ(Records.size(), 2U);
+
+  EXPECT_EQ(Records[0].Info.alloc_count, 1U);
+  EXPECT_EQ(Records[1].Info.alloc_count, 2U);
+  EXPECT_THAT(Records[0].CallStack[0], FrameContains("foo", 5U, 30U, false));
+  EXPECT_THAT(Records[0].CallStack[1], FrameContains("bar", 51U, 20U, true));
+
+  EXPECT_THAT(Records[1].CallStack[0], FrameContains("baz", 5U, 30U, false));
+  EXPECT_THAT(Records[1].CallStack[1], FrameContains("qux", 5U, 10U, true));
+  EXPECT_THAT(Records[1].CallStack[2], FrameContains("foo", 5U, 30U, false));
+  EXPECT_THAT(Records[1].CallStack[3], FrameContains("bar", 51U, 20U, true));
+}
+
+} // namespace


        


More information about the llvm-commits mailing list