[llvm] 27a4f25 - Reland "[memprof] Store callsite metadata with memprof records."
Snehasish Kumar via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 22 14:45:28 PDT 2022
Author: Snehasish Kumar
Date: 2022-03-22T14:40:02-07:00
New Revision: 27a4f2545f335d5792a835aed1ce518a9c807e3b
URL: https://github.com/llvm/llvm-project/commit/27a4f2545f335d5792a835aed1ce518a9c807e3b
DIFF: https://github.com/llvm/llvm-project/commit/27a4f2545f335d5792a835aed1ce518a9c807e3b.diff
LOG: Reland "[memprof] Store callsite metadata with memprof records."
This reverts commit f4b794427e8037a4e952cacdfe7201e961f31a6f.
Reland with underlying msan issue fixed in D122260.
Added:
llvm/test/tools/llvm-profdata/Inputs/inline.memprofexe
Modified:
llvm/include/llvm/ProfileData/InstrProfWriter.h
llvm/include/llvm/ProfileData/MemProf.h
llvm/include/llvm/ProfileData/RawMemProfReader.h
llvm/lib/ProfileData/InstrProfWriter.cpp
llvm/lib/ProfileData/MemProf.cpp
llvm/lib/ProfileData/RawMemProfReader.cpp
llvm/test/tools/llvm-profdata/memprof-basic.test
llvm/test/tools/llvm-profdata/memprof-inline.test
llvm/tools/llvm-profdata/llvm-profdata.cpp
llvm/unittests/ProfileData/InstrProfTest.cpp
llvm/unittests/ProfileData/MemProfTest.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h
index a8cfd380c4a19..bb37f41cddc86 100644
--- a/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -15,7 +15,9 @@
#define LLVM_PROFILEDATA_INSTRPROFWRITER_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/StringMap.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/MemProf.h"
#include "llvm/Support/Endian.h"
@@ -41,7 +43,7 @@ class InstrProfWriter {
// A map to hold memprof data per function. The lower 64 bits obtained from
// the md5 hash of the function name is used to index into the map.
- memprof::FunctionMemProfMap MemProfData;
+ llvm::MapVector<GlobalValue::GUID, memprof::MemProfRecord> MemProfData;
// An enum describing the attributes of the profile.
InstrProfKind ProfileKind = InstrProfKind::Unknown;
@@ -63,7 +65,8 @@ class InstrProfWriter {
addRecord(std::move(I), 1, Warn);
}
- void addRecord(const ::llvm::memprof::MemProfRecord &MR,
+ void addRecord(const GlobalValue::GUID Id,
+ const memprof::MemProfRecord &Record,
function_ref<void(Error)> Warn);
/// Merge existing function counts from the given writer.
diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index 1436ea2a32938..3055b38ebe342 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -82,9 +82,9 @@ struct PortableMemInfoBlock {
// Print out the contents of the MemInfoBlock in YAML format.
void printYAML(raw_ostream &OS) const {
- OS << " MemInfoBlock:\n";
+ OS << " MemInfoBlock:\n";
#define MIBEntryDef(NameTag, Name, Type) \
- OS << " " << #Name << ": " << Name << "\n";
+ OS << " " << #Name << ": " << Name << "\n";
#include "llvm/ProfileData/MIBEntryDef.inc"
#undef MIBEntryDef
}
@@ -133,6 +133,7 @@ struct PortableMemInfoBlock {
#undef MIBEntryDef
};
+// Holds the memprof profile information for a function.
struct MemProfRecord {
// Describes a call frame for a dynamic allocation context. The contents of
// the frame are populated by symbolizing the stack depot call frame from the
@@ -193,64 +194,152 @@ struct MemProfRecord {
return sizeof(Frame::Function) + sizeof(Frame::LineOffset) +
sizeof(Frame::Column) + sizeof(Frame::IsInlineFrame);
}
+
+ // Print the frame information in YAML format.
+ void printYAML(raw_ostream &OS) const {
+ OS << " -\n"
+ << " Function: " << Function << "\n"
+ << " LineOffset: " << LineOffset << "\n"
+ << " Column: " << Column << "\n"
+ << " Inline: " << IsInlineFrame << "\n";
+ }
};
- // The dynamic calling context for the allocation.
- llvm::SmallVector<Frame> CallStack;
- // The statistics obtained from the runtime for the allocation.
- PortableMemInfoBlock Info;
+ struct AllocationInfo {
+ // The dynamic calling context for the allocation.
+ llvm::SmallVector<Frame> CallStack;
+ // The statistics obtained from the runtime for the allocation.
+ PortableMemInfoBlock Info;
+
+ AllocationInfo() = default;
+ AllocationInfo(ArrayRef<Frame> CS, const MemInfoBlock &MB)
+ : CallStack(CS.begin(), CS.end()), Info(MB) {}
+
+ void printYAML(raw_ostream &OS) const {
+ OS << " -\n";
+ OS << " Callstack:\n";
+ // TODO: Print out the frame on one line with to make it easier for deep
+ // callstacks once we have a test to check valid YAML is generated.
+ for (const auto &Frame : CallStack)
+ Frame.printYAML(OS);
+ Info.printYAML(OS);
+ }
+
+ size_t serializedSize() const {
+ return sizeof(uint64_t) + // The number of frames to serialize.
+ Frame::serializedSize() *
+ CallStack.size() + // The contents of the frames.
+ PortableMemInfoBlock::serializedSize(); // The size of the payload.
+ }
+
+ bool operator==(const AllocationInfo &Other) const {
+ if (Other.Info != Info)
+ return false;
+
+ if (Other.CallStack.size() != CallStack.size())
+ return false;
+
+ for (size_t J = 0; J < Other.CallStack.size(); J++) {
+ if (Other.CallStack[J] != CallStack[J])
+ return false;
+ }
+ return true;
+ }
+
+ bool operator!=(const AllocationInfo &Other) const {
+ return !operator==(Other);
+ }
+ };
+
+ // Memory allocation sites in this function for which we have memory profiling
+ // data.
+ llvm::SmallVector<AllocationInfo> AllocSites;
+ // Holds call sites in this function which are part of some memory allocation
+ // context. We store this as a list of locations, each with its list of
+ // inline locations in bottom-up order i.e. from leaf to root. The inline
+ // location list may include additional entries, users should pick the last
+ // entry in the list with the same function GUID.
+ llvm::SmallVector<llvm::SmallVector<Frame>> CallSites;
void clear() {
- CallStack.clear();
- Info.clear();
+ AllocSites.clear();
+ CallSites.clear();
+ }
+
+ void merge(const MemProfRecord &Other) {
+ // TODO: Filter out duplicates which may occur if multiple memprof profiles
+ // are merged together using llvm-profdata.
+ AllocSites.append(Other.AllocSites);
+ CallSites.append(Other.CallSites);
}
size_t serializedSize() const {
- return sizeof(uint64_t) + // The number of frames to serialize.
- Frame::serializedSize() *
- CallStack.size() + // The contents of the frames.
- PortableMemInfoBlock::serializedSize(); // The size of the payload.
+ size_t Result = sizeof(GlobalValue::GUID);
+ for (const AllocationInfo &N : AllocSites)
+ Result += N.serializedSize();
+
+ // The number of callsites we have information for.
+ Result += sizeof(uint64_t);
+ for (const auto &Frames : CallSites) {
+ // The number of frames to serialize.
+ Result += sizeof(uint64_t);
+ for (const Frame &F : Frames)
+ Result += F.serializedSize();
+ }
+ return Result;
}
// Prints out the contents of the memprof record in YAML.
void print(llvm::raw_ostream &OS) const {
- OS << " Callstack:\n";
- // TODO: Print out the frame on one line with to make it easier for deep
- // callstacks once we have a test to check valid YAML is generated.
- for (const auto &Frame : CallStack) {
- OS << " -\n"
- << " Function: " << Frame.Function << "\n"
- << " LineOffset: " << Frame.LineOffset << "\n"
- << " Column: " << Frame.Column << "\n"
- << " Inline: " << Frame.IsInlineFrame << "\n";
+ if (!AllocSites.empty()) {
+ OS << " AllocSites:\n";
+ for (const AllocationInfo &N : AllocSites)
+ N.printYAML(OS);
}
- Info.printYAML(OS);
+ if (!CallSites.empty()) {
+ OS << " CallSites:\n";
+ for (const auto &Frames : CallSites) {
+ for (const auto &F : Frames) {
+ OS << " -\n";
+ F.printYAML(OS);
+ }
+ }
+ }
}
bool operator==(const MemProfRecord &Other) const {
- if (Other.Info != Info)
+ if (Other.AllocSites.size() != AllocSites.size())
return false;
- if (Other.CallStack.size() != CallStack.size())
+ if (Other.CallSites.size() != CallSites.size())
return false;
- for (size_t I = 0; I < Other.CallStack.size(); I++) {
- if (Other.CallStack[I] != CallStack[I])
+ for (size_t I = 0; I < AllocSites.size(); I++) {
+ if (AllocSites[I] != Other.AllocSites[I])
+ return false;
+ }
+
+ for (size_t I = 0; I < CallSites.size(); I++) {
+ if (CallSites[I] != Other.CallSites[I])
return false;
}
return true;
}
-};
-// Serializes the memprof records in \p Records to the ostream \p OS based on
-// the schema provided in \p Schema.
-void serializeRecords(const ArrayRef<MemProfRecord> Records,
- const MemProfSchema &Schema, raw_ostream &OS);
+ // Serializes the memprof records in \p Records to the ostream \p OS based on
+ // the schema provided in \p Schema.
+ void serialize(const MemProfSchema &Schema, raw_ostream &OS);
-// Deserializes memprof records from the Buffer
-SmallVector<MemProfRecord, 4> deserializeRecords(const MemProfSchema &Schema,
- const unsigned char *Buffer);
+ // Deserializes memprof records from the Buffer.
+ static MemProfRecord deserialize(const MemProfSchema &Schema,
+ const unsigned char *Buffer);
+
+ // Returns the GUID for the function name after canonicalization. For memprof,
+ // we remove any .llvm suffix added by LTO. MemProfRecords are mapped to
+ // functions using this GUID.
+ static GlobalValue::GUID getGUID(const StringRef FunctionName);
+};
// Reads a memprof schema from a buffer. All entries in the buffer are
// interpreted as uint64_t. The first entry in the buffer denotes the number of
@@ -259,14 +348,11 @@ SmallVector<MemProfRecord, 4> deserializeRecords(const MemProfSchema &Schema,
// byte past the schema contents.
Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer);
-using FunctionMemProfMap =
- DenseMap<uint64_t, SmallVector<memprof::MemProfRecord, 4>>;
-
/// Trait for lookups into the on-disk hash table for memprof format in the
/// indexed profile.
class MemProfRecordLookupTrait {
public:
- using data_type = ArrayRef<MemProfRecord>;
+ using data_type = const MemProfRecord &;
using internal_key_type = uint64_t;
using external_key_type = uint64_t;
using hash_value_type = uint64_t;
@@ -297,15 +383,15 @@ class MemProfRecordLookupTrait {
data_type ReadData(uint64_t K, const unsigned char *D,
offset_type /*Unused*/) {
- Records = deserializeRecords(Schema, D);
- return Records;
+ Record = MemProfRecord::deserialize(Schema, D);
+ return Record;
}
private:
// Holds the memprof schema used to deserialize records.
MemProfSchema Schema;
// Holds the records from one function deserialized from the indexed format.
- llvm::SmallVector<MemProfRecord, 4> Records;
+ MemProfRecord Record;
};
class MemProfRecordWriterTrait {
@@ -313,8 +399,8 @@ class MemProfRecordWriterTrait {
using key_type = uint64_t;
using key_type_ref = uint64_t;
- using data_type = ArrayRef<MemProfRecord>;
- using data_type_ref = ArrayRef<MemProfRecord>;
+ using data_type = MemProfRecord;
+ using data_type_ref = MemProfRecord &;
using hash_value_type = uint64_t;
using offset_type = uint64_t;
@@ -333,17 +419,9 @@ class MemProfRecordWriterTrait {
using namespace support;
endian::Writer LE(Out, little);
-
offset_type N = sizeof(K);
LE.write<offset_type>(N);
-
- offset_type M = 0;
-
- M += sizeof(uint64_t);
- for (const auto &Record : V) {
- M += Record.serializedSize();
- }
-
+ offset_type M = V.serializedSize();
LE.write<offset_type>(M);
return std::make_pair(N, M);
}
@@ -357,7 +435,7 @@ class MemProfRecordWriterTrait {
void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V,
offset_type /*Unused*/) {
assert(Schema != nullptr && "MemProf schema is not initialized!");
- serializeRecords(V, *Schema, Out);
+ V.serialize(*Schema, Out);
}
};
diff --git a/llvm/include/llvm/ProfileData/RawMemProfReader.h b/llvm/include/llvm/ProfileData/RawMemProfReader.h
index 944d71386a4b2..872a71fd5cf56 100644
--- a/llvm/include/llvm/ProfileData/RawMemProfReader.h
+++ b/llvm/include/llvm/ProfileData/RawMemProfReader.h
@@ -14,9 +14,11 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
#include "llvm/DebugInfo/Symbolize/Symbolize.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/ProfileData/InstrProfReader.h"
@@ -57,15 +59,16 @@ class RawMemProfReader {
static Expected<std::unique_ptr<RawMemProfReader>>
create(const Twine &Path, const StringRef ProfiledBinary);
- Error readNextRecord(MemProfRecord &Record);
-
- using Iterator = InstrProfIterator<MemProfRecord, RawMemProfReader>;
+ using GuidMemProfRecordPair = std::pair<GlobalValue::GUID, MemProfRecord>;
+ using Iterator = InstrProfIterator<GuidMemProfRecordPair, RawMemProfReader>;
Iterator end() { return Iterator(); }
Iterator begin() {
- Iter = ProfileData.begin();
+ Iter = FunctionProfileData.begin();
return Iterator(this);
}
+ Error readNextRecord(GuidMemProfRecordPair &GuidRecord);
+
// The RawMemProfReader only holds memory profile information.
InstrProfKind getProfileKind() const { return InstrProfKind::MemProf; }
@@ -75,7 +78,7 @@ class RawMemProfReader {
llvm::MapVector<uint64_t, MemInfoBlock> &Prof,
CallStackMap &SM)
: Symbolizer(std::move(Sym)), SegmentInfo(Seg.begin(), Seg.end()),
- ProfileData(Prof), StackMap(SM) {
+ CallstackProfileData(Prof), StackMap(SM) {
// We don't call initialize here since there is no raw profile to read. The
// test should pass in the raw profile as structured data.
@@ -83,6 +86,8 @@ class RawMemProfReader {
// initialized properly.
if (Error E = symbolizeAndFilterStackFrames())
report_fatal_error(std::move(E));
+ if (Error E = mapRawProfileToRecords())
+ report_fatal_error(std::move(E));
}
private:
@@ -96,10 +101,12 @@ class RawMemProfReader {
// symbolize or those that belong to the runtime. For profile entries where
// the entire callstack is pruned, we drop the entry from the profile.
Error symbolizeAndFilterStackFrames();
+ // Construct memprof records for each function and store it in the
+ // `FunctionProfileData` map. A function may have allocation profile data or
+ // callsite data or both.
+ Error mapRawProfileToRecords();
object::SectionedAddress getModuleOffset(uint64_t VirtualAddress);
- Error fillRecord(const uint64_t Id, const MemInfoBlock &MIB,
- MemProfRecord &Record);
// Prints aggregate counts for each raw profile parsed from the DataBuffer in
// YAML format.
void printSummaries(raw_ostream &OS) const;
@@ -112,15 +119,15 @@ class RawMemProfReader {
llvm::SmallVector<SegmentEntry, 16> SegmentInfo;
// A map from callstack id (same as key in CallStackMap below) to the heap
// information recorded for that allocation context.
- llvm::MapVector<uint64_t, MemInfoBlock> ProfileData;
+ llvm::MapVector<uint64_t, MemInfoBlock> CallstackProfileData;
CallStackMap StackMap;
// Cached symbolization from PC to Frame.
llvm::DenseMap<uint64_t, llvm::SmallVector<MemProfRecord::Frame>>
SymbolizedFrame;
- // Iterator to read from the ProfileData MapVector.
- llvm::MapVector<uint64_t, MemInfoBlock>::iterator Iter = ProfileData.end();
+ llvm::MapVector<GlobalValue::GUID, MemProfRecord> FunctionProfileData;
+ llvm::MapVector<GlobalValue::GUID, MemProfRecord>::iterator Iter;
};
} // namespace memprof
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 6c08f83be5dc2..494d563917678 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -253,28 +253,14 @@ void InstrProfWriter::addRecord(StringRef Name, uint64_t Hash,
Dest.sortValueData();
}
-void InstrProfWriter::addRecord(const memprof::MemProfRecord &MR,
+void InstrProfWriter::addRecord(const Function::GUID Id,
+ const memprof::MemProfRecord &Record,
function_ref<void(Error)> Warn) {
- // Use 0 as a sentinel value since its highly unlikely that the lower 64-bits
- // of a 128 bit md5 hash will be all zeros.
- // TODO: Move this Key frame detection to the contructor to avoid having to
- // scan all the callstacks again when adding a new record.
- uint64_t Key = 0;
- for (auto Iter = MR.CallStack.rbegin(), End = MR.CallStack.rend();
- Iter != End; Iter++) {
- if (!Iter->IsInlineFrame) {
- Key = Iter->Function;
- break;
- }
- }
-
- if (Key == 0) {
- Warn(make_error<InstrProfError>(
- instrprof_error::invalid_prof,
- "could not determine leaf function for memprof record."));
+ auto Result = MemProfData.insert({Id, Record});
+ if (!Result.second) {
+ memprof::MemProfRecord &Existing = Result.first->second;
+ Existing.merge(Record);
}
-
- MemProfData[Key].push_back(MR);
}
void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW,
@@ -283,9 +269,9 @@ void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW,
for (auto &Func : I.getValue())
addRecord(I.getKey(), Func.first, std::move(Func.second), 1, Warn);
- for (auto &I : IPW.MemProfData)
- for (const auto &MR : I.second)
- addRecord(MR, Warn);
+ for (auto &I : IPW.MemProfData) {
+ addRecord(I.first, I.second, Warn);
+ }
}
bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) {
@@ -415,8 +401,8 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
MemProfWriter->Schema = &Schema;
OnDiskChainedHashTableGenerator<memprof::MemProfRecordWriterTrait>
MemProfGenerator;
- for (const auto &I : MemProfData) {
- // Insert the key (func hash) and value (vector of memprof records).
+ for (auto &I : MemProfData) {
+ // Insert the key (func hash) and value (memprof record).
MemProfGenerator.insert(I.first, I.second);
}
diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp
index cdfc261256c65..92d759a88ea45 100644
--- a/llvm/lib/ProfileData/MemProf.cpp
+++ b/llvm/lib/ProfileData/MemProf.cpp
@@ -1,4 +1,6 @@
#include "llvm/ProfileData/MemProf.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Function.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/EndianStream.h"
@@ -6,43 +8,76 @@
namespace llvm {
namespace memprof {
-void serializeRecords(const ArrayRef<MemProfRecord> Records,
- const MemProfSchema &Schema, raw_ostream &OS) {
+void MemProfRecord::serialize(const MemProfSchema &Schema, raw_ostream &OS) {
using namespace support;
endian::Writer LE(OS, little);
- LE.write<uint64_t>(Records.size());
- for (const MemProfRecord &MR : Records) {
- LE.write<uint64_t>(MR.CallStack.size());
- for (const MemProfRecord::Frame &F : MR.CallStack) {
+ LE.write<uint64_t>(AllocSites.size());
+ for (const AllocationInfo &N : AllocSites) {
+ LE.write<uint64_t>(N.CallStack.size());
+ for (const Frame &F : N.CallStack)
+ F.serialize(OS);
+ N.Info.serialize(Schema, OS);
+ }
+
+ // Related contexts.
+ LE.write<uint64_t>(CallSites.size());
+ for (const auto &Frames : CallSites) {
+ LE.write<uint64_t>(Frames.size());
+ for (const Frame &F : Frames)
F.serialize(OS);
- }
- MR.Info.serialize(Schema, OS);
}
}
-SmallVector<MemProfRecord, 4> deserializeRecords(const MemProfSchema &Schema,
- const unsigned char *Ptr) {
+MemProfRecord MemProfRecord::deserialize(const MemProfSchema &Schema,
+ const unsigned char *Ptr) {
using namespace support;
- SmallVector<MemProfRecord, 4> Records;
- const uint64_t NumRecords =
- endian::readNext<uint64_t, little, unaligned>(Ptr);
- for (uint64_t I = 0; I < NumRecords; I++) {
- MemProfRecord MR;
+ MemProfRecord Record;
+
+ // Read the meminfo nodes.
+ const uint64_t NumNodes = endian::readNext<uint64_t, little, unaligned>(Ptr);
+ for (uint64_t I = 0; I < NumNodes; I++) {
+ MemProfRecord::AllocationInfo Node;
const uint64_t NumFrames =
endian::readNext<uint64_t, little, unaligned>(Ptr);
for (uint64_t J = 0; J < NumFrames; J++) {
const auto F = MemProfRecord::Frame::deserialize(Ptr);
Ptr += MemProfRecord::Frame::serializedSize();
- MR.CallStack.push_back(F);
+ Node.CallStack.push_back(F);
}
- MR.Info.deserialize(Schema, Ptr);
+ Node.Info.deserialize(Schema, Ptr);
Ptr += PortableMemInfoBlock::serializedSize();
- Records.push_back(MR);
+ Record.AllocSites.push_back(Node);
}
- return Records;
+
+ // Read the callsite information.
+ const uint64_t NumCtxs = endian::readNext<uint64_t, little, unaligned>(Ptr);
+ for (uint64_t J = 0; J < NumCtxs; J++) {
+ const uint64_t NumFrames =
+ endian::readNext<uint64_t, little, unaligned>(Ptr);
+ llvm::SmallVector<Frame> Frames;
+ for (uint64_t K = 0; K < NumFrames; K++) {
+ const auto F = MemProfRecord::Frame::deserialize(Ptr);
+ Ptr += MemProfRecord::Frame::serializedSize();
+ Frames.push_back(F);
+ }
+ Record.CallSites.push_back(Frames);
+ }
+
+ return Record;
+}
+
+GlobalValue::GUID MemProfRecord::getGUID(const StringRef FunctionName) {
+ const auto Pos = FunctionName.find(".llvm.");
+
+ // We use the function guid which we expect to be a uint64_t. At
+ // this time, it is the lower 64 bits of the md5 of the function
+ // name. Any suffix with .llvm. is trimmed since these are added by
+ // thinLTO global promotion. At the time the profile is consumed,
+ // these suffixes will not be present.
+ return Function::getGUID(FunctionName.take_front(Pos));
}
Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer) {
diff --git a/llvm/lib/ProfileData/RawMemProfReader.cpp b/llvm/lib/ProfileData/RawMemProfReader.cpp
index a87410f6e5099..a5b7ea9e728c3 100644
--- a/llvm/lib/ProfileData/RawMemProfReader.cpp
+++ b/llvm/lib/ProfileData/RawMemProfReader.cpp
@@ -14,13 +14,13 @@
#include <cstdint>
#include <type_traits>
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
#include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h"
-#include "llvm/IR/Function.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/ObjectFile.h"
@@ -163,11 +163,6 @@ bool mergeStackMap(const CallStackMap &From, CallStackMap &To) {
return false;
}
-StringRef trimSuffix(const StringRef Name) {
- const auto Pos = Name.find(".llvm.");
- return Name.take_front(Pos);
-}
-
Error report(Error E, const StringRef Context) {
return joinErrors(createStringError(inconvertibleErrorCode(), Context),
std::move(E));
@@ -233,9 +228,10 @@ void RawMemProfReader::printYAML(raw_ostream &OS) {
printSummaries(OS);
// Print out the merged contents of the profiles.
OS << " Records:\n";
- for (const auto &Record : *this) {
+ for (const auto &Entry : *this) {
OS << " -\n";
- Record.print(OS);
+ OS << " FunctionGUID: " << Entry.first << "\n";
+ Entry.second.print(OS);
}
}
@@ -288,7 +284,90 @@ Error RawMemProfReader::initialize() {
if (Error E = readRawProfile())
return E;
- return symbolizeAndFilterStackFrames();
+ if (Error E = symbolizeAndFilterStackFrames())
+ return E;
+
+ return mapRawProfileToRecords();
+}
+
+Error RawMemProfReader::mapRawProfileToRecords() {
+ // Hold a mapping from function to each callsite location we encounter within
+ // it that is part of some dynamic allocation context. The location is stored
+ // as a pointer to a symbolized list of inline frames.
+ using LocationPtr = const llvm::SmallVector<MemProfRecord::Frame> *;
+ llvm::DenseMap<GlobalValue::GUID, llvm::SetVector<LocationPtr>>
+ PerFunctionCallSites;
+
+ // Convert the raw profile callstack data into memprof records. While doing so
+ // keep track of related contexts so that we can fill these in later.
+ for (const auto &Entry : CallstackProfileData) {
+ const uint64_t StackId = Entry.first;
+
+ auto It = StackMap.find(StackId);
+ if (It == StackMap.end())
+ return make_error<InstrProfError>(
+ instrprof_error::malformed,
+ "memprof callstack record does not contain id: " + Twine(StackId));
+
+ // Construct the symbolized callstack.
+ llvm::SmallVector<MemProfRecord::Frame> Callstack;
+ Callstack.reserve(It->getSecond().size());
+
+ llvm::ArrayRef<uint64_t> Addresses = It->getSecond();
+ for (size_t I = 0; I < Addresses.size(); I++) {
+ const uint64_t Address = Addresses[I];
+ assert(SymbolizedFrame.count(Address) > 0 &&
+ "Address not found in SymbolizedFrame map");
+ const SmallVector<MemProfRecord::Frame> &Frames =
+ SymbolizedFrame[Address];
+
+ assert(!Frames.back().IsInlineFrame &&
+ "The last frame should not be inlined");
+
+ // Record the callsites for each function. Skip the first frame of the
+ // first address since it is the allocation site itself that is recorded
+ // as an alloc site.
+ for (size_t J = 0; J < Frames.size(); J++) {
+ if (I == 0 && J == 0)
+ continue;
+ // We attach the entire bottom-up frame here for the callsite even
+ // though we only need the frames up to and including the frame for
+ // Frames[J].Function. This will enable better deduplication for
+ // compression in the future.
+ PerFunctionCallSites[Frames[J].Function].insert(&Frames);
+ }
+
+ // Add all the frames to the current allocation callstack.
+ Callstack.append(Frames.begin(), Frames.end());
+ }
+
+ // We attach the memprof record to each function bottom-up including the
+ // first non-inline frame.
+ for (size_t I = 0; /*Break out using the condition below*/; I++) {
+ auto Result =
+ FunctionProfileData.insert({Callstack[I].Function, MemProfRecord()});
+ MemProfRecord &Record = Result.first->second;
+ Record.AllocSites.emplace_back(Callstack, Entry.second);
+
+ if (!Callstack[I].IsInlineFrame)
+ break;
+ }
+ }
+
+ // Fill in the related callsites per function.
+ for (auto I = PerFunctionCallSites.begin(), E = PerFunctionCallSites.end();
+ I != E; I++) {
+ const GlobalValue::GUID Id = I->first;
+ // Some functions may have only callsite data and no allocation data. Here
+ // we insert a new entry for callsite data if we need to.
+ auto Result = FunctionProfileData.insert({Id, MemProfRecord()});
+ MemProfRecord &Record = Result.first->second;
+ for (LocationPtr Loc : I->getSecond()) {
+ Record.CallSites.push_back(*Loc);
+ }
+ }
+
+ return Error::success();
}
Error RawMemProfReader::symbolizeAndFilterStackFrames() {
@@ -331,15 +410,10 @@ Error RawMemProfReader::symbolizeAndFilterStackFrames() {
LLVM_DEBUG(
// Print out the name to guid mapping for debugging.
llvm::dbgs() << "FunctionName: " << Frame.FunctionName << " GUID: "
- << Function::getGUID(trimSuffix(Frame.FunctionName))
+ << MemProfRecord::getGUID(Frame.FunctionName)
<< "\n";);
SymbolizedFrame[VAddr].emplace_back(
- // We use the function guid which we expect to be a uint64_t. At
- // this time, it is the lower 64 bits of the md5 of the function
- // name. Any suffix with .llvm. is trimmed since these are added by
- // thinLTO global promotion. At the time the profile is consumed,
- // these suffixes will not be present.
- Function::getGUID(trimSuffix(Frame.FunctionName)),
+ MemProfRecord::getGUID(Frame.FunctionName),
Frame.Line - Frame.StartLine, Frame.Column,
// Only the last entry is not an inlined location.
I != NumFrames - 1);
@@ -359,7 +433,7 @@ Error RawMemProfReader::symbolizeAndFilterStackFrames() {
// Drop the entries where the callstack is empty.
for (const uint64_t Id : EntriesToErase) {
StackMap.erase(Id);
- ProfileData.erase(Id);
+ CallstackProfileData.erase(Id);
}
if (StackMap.empty())
@@ -394,10 +468,10 @@ Error RawMemProfReader::readRawProfile() {
// raw profiles in the same binary file are from the same process so the
// stackdepot ids are the same.
for (const auto &Value : readMemInfoBlocks(Next + Header->MIBOffset)) {
- if (ProfileData.count(Value.first)) {
- ProfileData[Value.first].Merge(Value.second);
+ if (CallstackProfileData.count(Value.first)) {
+ CallstackProfileData[Value.first].Merge(Value.second);
} else {
- ProfileData[Value.first] = Value.second;
+ CallstackProfileData[Value.first] = Value.second;
}
}
@@ -438,29 +512,14 @@ RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) {
return object::SectionedAddress{VirtualAddress};
}
-Error RawMemProfReader::fillRecord(const uint64_t Id, const MemInfoBlock &MIB,
- MemProfRecord &Record) {
- auto &CallStack = StackMap[Id];
- for (const uint64_t Address : CallStack) {
- assert(SymbolizedFrame.count(Address) &&
- "Address not found in symbolized frame cache.");
- Record.CallStack.append(SymbolizedFrame[Address]);
- }
- Record.Info = PortableMemInfoBlock(MIB);
- return Error::success();
-}
-
-Error RawMemProfReader::readNextRecord(MemProfRecord &Record) {
- if (ProfileData.empty())
+Error RawMemProfReader::readNextRecord(GuidMemProfRecordPair &GuidRecord) {
+ if (FunctionProfileData.empty())
return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
- if (Iter == ProfileData.end())
+ if (Iter == FunctionProfileData.end())
return make_error<InstrProfError>(instrprof_error::eof);
- Record.clear();
- if (Error E = fillRecord(Iter->first, Iter->second, Record)) {
- return E;
- }
+ GuidRecord = {Iter->first, Iter->second};
Iter++;
return Error::success();
}
diff --git a/llvm/test/tools/llvm-profdata/Inputs/inline.memprofexe b/llvm/test/tools/llvm-profdata/Inputs/inline.memprofexe
new file mode 100755
index 0000000000000..9b6fd16e9a272
Binary files /dev/null and b/llvm/test/tools/llvm-profdata/Inputs/inline.memprofexe
diff er
diff --git a/llvm/test/tools/llvm-profdata/memprof-basic.test b/llvm/test/tools/llvm-profdata/memprof-basic.test
index af22c3b6c39b3..e72728af101dd 100644
--- a/llvm/test/tools/llvm-profdata/memprof-basic.test
+++ b/llvm/test/tools/llvm-profdata/memprof-basic.test
@@ -26,7 +26,7 @@ recorded.
```
clang -fuse-ld=lld -Wl,--no-rosegment -gmlt -fdebug-info-for-profiling \
-fmemory-profile -mno-omit-leaf-frame-pointer -fno-omit-frame-pointer \
- -fno-optimize-sibling-calls -m64 -Wl,-build-id source.c -o basic.memprofexe
+ -fno-optimize-sibling-calls -m64 -Wl,-build-id source.c -o basic.memprofexe
env MEMPROF_OPTIONS=log_path=stdout ./rawprofile.out > basic.memprofraw
```
@@ -46,56 +46,59 @@ CHECK-NEXT: NumMibInfo: 3
CHECK-NEXT: NumStackOffsets: 3
CHECK-NEXT: Records:
CHECK-NEXT: -
-CHECK-NEXT: Callstack:
+CHECK-NEXT: FunctionGUID: {{[0-9]+}}
+CHECK-NEXT: AllocSites:
CHECK-NEXT: -
-CHECK-NEXT: Function: {{[0-9]+}}
-CHECK-NEXT: LineOffset: 1
-CHECK-NEXT: Column: 21
-CHECK-NEXT: Inline: 0
-CHECK-NEXT: MemInfoBlock:
-CHECK-NEXT: AllocCount: 1
-CHECK-NEXT: TotalAccessCount: 2
-CHECK-NEXT: MinAccessCount: 2
-CHECK-NEXT: MaxAccessCount: 2
-CHECK-NEXT: TotalSize: 10
-CHECK-NEXT: MinSize: 10
-CHECK-NEXT: MaxSize: 10
-CHECK-NEXT: AllocTimestamp: 986
-CHECK-NEXT: DeallocTimestamp: 986
-CHECK-NEXT: TotalLifetime: 0
-CHECK-NEXT: MinLifetime: 0
-CHECK-NEXT: MaxLifetime: 0
-CHECK-NEXT: AllocCpuId: 56
-CHECK-NEXT: DeallocCpuId: 56
-CHECK-NEXT: NumMigratedCpu: 0
-CHECK-NEXT: NumLifetimeOverlaps: 0
-CHECK-NEXT: NumSameAllocCpu: 0
-CHECK-NEXT: NumSameDeallocCpu: 0
-CHECK-NEXT: DataTypeId: {{[0-9]+}}
-CHECK-NEXT: -
-CHECK-NEXT: Callstack:
+CHECK-NEXT: Callstack:
+CHECK-NEXT: -
+CHECK-NEXT: Function: {{[0-9]+}}
+CHECK-NEXT: LineOffset: 1
+CHECK-NEXT: Column: 21
+CHECK-NEXT: Inline: 0
+CHECK-NEXT: MemInfoBlock:
+CHECK-NEXT: AllocCount: 1
+CHECK-NEXT: TotalAccessCount: 2
+CHECK-NEXT: MinAccessCount: 2
+CHECK-NEXT: MaxAccessCount: 2
+CHECK-NEXT: TotalSize: 10
+CHECK-NEXT: MinSize: 10
+CHECK-NEXT: MaxSize: 10
+CHECK-NEXT: AllocTimestamp: 986
+CHECK-NEXT: DeallocTimestamp: 986
+CHECK-NEXT: TotalLifetime: 0
+CHECK-NEXT: MinLifetime: 0
+CHECK-NEXT: MaxLifetime: 0
+CHECK-NEXT: AllocCpuId: 56
+CHECK-NEXT: DeallocCpuId: 56
+CHECK-NEXT: NumMigratedCpu: 0
+CHECK-NEXT: NumLifetimeOverlaps: 0
+CHECK-NEXT: NumSameAllocCpu: 0
+CHECK-NEXT: NumSameDeallocCpu: 0
+CHECK-NEXT: DataTypeId: {{[0-9]+}}
CHECK-NEXT: -
-CHECK-NEXT: Function: {{[0-9]+}}
-CHECK-NEXT: LineOffset: 5
-CHECK-NEXT: Column: 15
-CHECK-NEXT: Inline: 0
-CHECK-NEXT: MemInfoBlock:
-CHECK-NEXT: AllocCount: 1
-CHECK-NEXT: TotalAccessCount: 2
-CHECK-NEXT: MinAccessCount: 2
-CHECK-NEXT: MaxAccessCount: 2
-CHECK-NEXT: TotalSize: 10
-CHECK-NEXT: MinSize: 10
-CHECK-NEXT: MaxSize: 10
-CHECK-NEXT: AllocTimestamp: 987
-CHECK-NEXT: DeallocTimestamp: 987
-CHECK-NEXT: TotalLifetime: 0
-CHECK-NEXT: MinLifetime: 0
-CHECK-NEXT: MaxLifetime: 0
-CHECK-NEXT: AllocCpuId: 56
-CHECK-NEXT: DeallocCpuId: 56
-CHECK-NEXT: NumMigratedCpu: 0
-CHECK-NEXT: NumLifetimeOverlaps: 0
-CHECK-NEXT: NumSameAllocCpu: 0
-CHECK-NEXT: NumSameDeallocCpu: 0
-CHECK-NEXT: DataTypeId: {{[0-9]+}}
+CHECK-NEXT: Callstack:
+CHECK-NEXT: -
+CHECK-NEXT: Function: {{[0-9]+}}
+CHECK-NEXT: LineOffset: 5
+CHECK-NEXT: Column: 15
+CHECK-NEXT: Inline: 0
+CHECK-NEXT: MemInfoBlock:
+CHECK-NEXT: AllocCount: 1
+CHECK-NEXT: TotalAccessCount: 2
+CHECK-NEXT: MinAccessCount: 2
+CHECK-NEXT: MaxAccessCount: 2
+CHECK-NEXT: TotalSize: 10
+CHECK-NEXT: MinSize: 10
+CHECK-NEXT: MaxSize: 10
+CHECK-NEXT: AllocTimestamp: 987
+CHECK-NEXT: DeallocTimestamp: 987
+CHECK-NEXT: TotalLifetime: 0
+CHECK-NEXT: MinLifetime: 0
+CHECK-NEXT: MaxLifetime: 0
+CHECK-NEXT: AllocCpuId: 56
+CHECK-NEXT: DeallocCpuId: 56
+CHECK-NEXT: NumMigratedCpu: 0
+CHECK-NEXT: NumLifetimeOverlaps: 0
+CHECK-NEXT: NumSameAllocCpu: 0
+CHECK-NEXT: NumSameDeallocCpu: 0
+CHECK-NEXT: DataTypeId: {{[0-9]+}}
diff --git a/llvm/test/tools/llvm-profdata/memprof-inline.test b/llvm/test/tools/llvm-profdata/memprof-inline.test
index ecf4f178a9743..a31903e120c72 100644
--- a/llvm/test/tools/llvm-profdata/memprof-inline.test
+++ b/llvm/test/tools/llvm-profdata/memprof-inline.test
@@ -50,7 +50,7 @@ FunctionName: main GUID: 15822663052811949562
[..omit output here which is checked below..]
```
-RUN: llvm-profdata show --memory %p/Inputs/inline.memprofraw --profiled-binary %p/Inputs/memprof-inline.exe
+RUN: llvm-profdata show --memory %p/Inputs/inline.memprofraw --profiled-binary %p/Inputs/inline.memprofexe | FileCheck %s
CHECK: MemprofProfile:
CHECK-NEXT: -
@@ -62,45 +62,123 @@ CHECK-NEXT: NumMibInfo: 2
CHECK-NEXT: NumStackOffsets: 2
CHECK-NEXT: Records:
CHECK-NEXT: -
-CHECK-NEXT: Callstack:
+CHECK-NEXT: FunctionGUID: 15505678318020221912
+CHECK-NEXT: AllocSites:
CHECK-NEXT: -
-CHECK-NEXT: Function: 15505678318020221912
-CHECK-NEXT: LineOffset: 1
-CHECK-NEXT: Column: 15
-CHECK-NEXT: Inline: 0
+CHECK-NEXT: Callstack:
+CHECK-NEXT: -
+CHECK-NEXT: Function: 15505678318020221912
+CHECK-NEXT: LineOffset: 1
+CHECK-NEXT: Column: 15
+CHECK-NEXT: Inline: 1
+CHECK-NEXT: -
+CHECK-NEXT: Function: 6699318081062747564
+CHECK-NEXT: LineOffset: 0
+CHECK-NEXT: Column: 18
+CHECK-NEXT: Inline: 0
+CHECK-NEXT: -
+CHECK-NEXT: Function: 16434608426314478903
+CHECK-NEXT: LineOffset: 0
+CHECK-NEXT: Column: 19
+CHECK-NEXT: Inline: 0
+CHECK-NEXT: -
+CHECK-NEXT: Function: 15822663052811949562
+CHECK-NEXT: LineOffset: 1
+CHECK-NEXT: Column: 3
+CHECK-NEXT: Inline: 0
+CHECK-NEXT: MemInfoBlock:
+CHECK-NEXT: AllocCount: 1
+CHECK-NEXT: TotalAccessCount: 1
+CHECK-NEXT: MinAccessCount: 1
+CHECK-NEXT: MaxAccessCount: 1
+CHECK-NEXT: TotalSize: 1
+CHECK-NEXT: MinSize: 1
+CHECK-NEXT: MaxSize: 1
+CHECK-NEXT: AllocTimestamp: 894
+CHECK-NEXT: DeallocTimestamp: 894
+CHECK-NEXT: TotalLifetime: 0
+CHECK-NEXT: MinLifetime: 0
+CHECK-NEXT: MaxLifetime: 0
+CHECK-NEXT: AllocCpuId: 23
+CHECK-NEXT: DeallocCpuId: 23
+CHECK-NEXT: NumMigratedCpu: 0
+CHECK-NEXT: NumLifetimeOverlaps: 0
+CHECK-NEXT: NumSameAllocCpu: 0
+CHECK-NEXT: NumSameDeallocCpu: 0
+CHECK-NEXT: DataTypeId: {{[0-9]+}}
+CHECK-NEXT: -
+CHECK-NEXT: FunctionGUID: 6699318081062747564
+CHECK-NEXT: AllocSites:
CHECK-NEXT: -
-CHECK-NEXT: Function: 6699318081062747564
-CHECK-NEXT: LineOffset: 0
-CHECK-NEXT: Column: 18
-CHECK-NEXT: Inline: 1
+CHECK-NEXT: Callstack:
+CHECK-NEXT: -
+CHECK-NEXT: Function: 15505678318020221912
+CHECK-NEXT: LineOffset: 1
+CHECK-NEXT: Column: 15
+CHECK-NEXT: Inline: 1
+CHECK-NEXT: -
+CHECK-NEXT: Function: 6699318081062747564
+CHECK-NEXT: LineOffset: 0
+CHECK-NEXT: Column: 18
+CHECK-NEXT: Inline: 0
+CHECK-NEXT: -
+CHECK-NEXT: Function: 16434608426314478903
+CHECK-NEXT: LineOffset: 0
+CHECK-NEXT: Column: 19
+CHECK-NEXT: Inline: 0
+CHECK-NEXT: -
+CHECK-NEXT: Function: 15822663052811949562
+CHECK-NEXT: LineOffset: 1
+CHECK-NEXT: Column: 3
+CHECK-NEXT: Inline: 0
+CHECK-NEXT: MemInfoBlock:
+CHECK-NEXT: AllocCount: 1
+CHECK-NEXT: TotalAccessCount: 1
+CHECK-NEXT: MinAccessCount: 1
+CHECK-NEXT: MaxAccessCount: 1
+CHECK-NEXT: TotalSize: 1
+CHECK-NEXT: MinSize: 1
+CHECK-NEXT: MaxSize: 1
+CHECK-NEXT: AllocTimestamp: 894
+CHECK-NEXT: DeallocTimestamp: 894
+CHECK-NEXT: TotalLifetime: 0
+CHECK-NEXT: MinLifetime: 0
+CHECK-NEXT: MaxLifetime: 0
+CHECK-NEXT: AllocCpuId: 23
+CHECK-NEXT: DeallocCpuId: 23
+CHECK-NEXT: NumMigratedCpu: 0
+CHECK-NEXT: NumLifetimeOverlaps: 0
+CHECK-NEXT: NumSameAllocCpu: 0
+CHECK-NEXT: NumSameDeallocCpu: 0
+CHECK-NEXT: DataTypeId: {{[0-9]+}}
+CHECK-NEXT: CallSites:
CHECK-NEXT: -
-CHECK-NEXT: Function: 16434608426314478903
-CHECK-NEXT: LineOffset: 0
-CHECK-NEXT: Column: 19
-CHECK-NEXT: Inline: 0
+CHECK-NEXT: -
+CHECK-NEXT: Function: 15505678318020221912
+CHECK-NEXT: LineOffset: 1
+CHECK-NEXT: Column: 15
+CHECK-NEXT: Inline: 1
CHECK-NEXT: -
-CHECK-NEXT: Function: 15822663052811949562
-CHECK-NEXT: LineOffset: 1
-CHECK-NEXT: Column: 3
-CHECK-NEXT: Inline: 0
-CHECK-NEXT: MemInfoBlock:
-CHECK-NEXT: AllocCount: 1
-CHECK-NEXT: TotalAccessCount: 1
-CHECK-NEXT: MinAccessCount: 1
-CHECK-NEXT: MaxAccessCount: 1
-CHECK-NEXT: TotalSize: 1
-CHECK-NEXT: MinSize: 1
-CHECK-NEXT: MaxSize: 1
-CHECK-NEXT: AllocTimestamp: 894
-CHECK-NEXT: DeallocTimestamp: 894
-CHECK-NEXT: TotalLifetime: 0
-CHECK-NEXT: MinLifetime: 0
-CHECK-NEXT: MaxLifetime: 0
-CHECK-NEXT: AllocCpuId: 23
-CHECK-NEXT: DeallocCpuId: 23
-CHECK-NEXT: NumMigratedCpu: 0
-CHECK-NEXT: NumLifetimeOverlaps: 0
-CHECK-NEXT: NumSameAllocCpu: 0
-CHECK-NEXT: NumSameDeallocCpu: 0
-CHECK-NEXT: DataTypeId: {{[0-9]+}}
-
+CHECK-NEXT: -
+CHECK-NEXT: Function: 6699318081062747564
+CHECK-NEXT: LineOffset: 0
+CHECK-NEXT: Column: 18
+CHECK-NEXT: Inline: 0
+CHECK-NEXT: -
+CHECK-NEXT: FunctionGUID: 15822663052811949562
+CHECK-NEXT: CallSites:
+CHECK-NEXT: -
+CHECK-NEXT: -
+CHECK-NEXT: Function: 15822663052811949562
+CHECK-NEXT: LineOffset: 1
+CHECK-NEXT: Column: 3
+CHECK-NEXT: Inline: 0
+CHECK-NEXT: -
+CHECK-NEXT: FunctionGUID: 16434608426314478903
+CHECK-NEXT: CallSites:
+CHECK-NEXT: -
+CHECK-NEXT: -
+CHECK-NEXT: Function: 16434608426314478903
+CHECK-NEXT: LineOffset: 0
+CHECK-NEXT: Column: 19
+CHECK-NEXT: Inline: 0
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index ba2f1b6038c48..df56a76b1552e 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -267,8 +267,8 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
}
// Add the records into the writer context.
- for (const memprof::MemProfRecord &MR : *Reader) {
- WC->Writer.addRecord(MR, [&](Error E) {
+ for (auto I = Reader->begin(), E = Reader->end(); I != E; ++I) {
+ WC->Writer.addRecord(/*Id=*/I->first, /*Record=*/I->second, [&](Error E) {
instrprof_error IPE = InstrProfError::take(std::move(E));
WC->Errors.emplace_back(make_error<InstrProfError>(IPE), Filename);
});
diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp
index 434e6aaee8b02..422492266797e 100644
--- a/llvm/unittests/ProfileData/InstrProfTest.cpp
+++ b/llvm/unittests/ProfileData/InstrProfTest.cpp
@@ -13,6 +13,7 @@
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/ProfileData/InstrProfWriter.h"
#include "llvm/ProfileData/MemProf.h"
+#include "llvm/ProfileData/MemProfData.inc"
#include "llvm/Support/Compression.h"
#include "llvm/Testing/Support/Error.h"
#include "llvm/Testing/Support/SupportHelpers.h"
@@ -222,18 +223,41 @@ TEST_F(InstrProfTest, test_writer_merge) {
ASSERT_EQ(0U, R->Counts[1]);
}
+using ::llvm::memprof::MemInfoBlock;
+using ::llvm::memprof::MemProfRecord;
+MemProfRecord
+makeRecord(std::initializer_list<std::initializer_list<MemProfRecord::Frame>>
+ AllocFrames,
+ std::initializer_list<std::initializer_list<MemProfRecord::Frame>>
+ CallSiteFrames,
+ const MemInfoBlock &Block = MemInfoBlock()) {
+ llvm::memprof::MemProfRecord MR;
+ for (const auto &Frames : AllocFrames)
+ MR.AllocSites.emplace_back(Frames, Block);
+ for (const auto &Frames : CallSiteFrames)
+ MR.CallSites.push_back(Frames);
+ return MR;
+}
+
TEST_F(InstrProfTest, test_memprof) {
ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf),
Succeeded());
- llvm::memprof::MemProfRecord MR;
- MR.CallStack.push_back({0x123, 1, 2, false});
- MR.CallStack.push_back({0x345, 3, 4, true});
- Writer.addRecord(MR, Err);
+
+ const MemProfRecord MR = makeRecord(
+ /*AllocFrames=*/
+ {
+ {{0x123, 1, 2, false}, {0x345, 3, 4, true}},
+ {{0x125, 5, 6, false}, {0x567, 7, 8, true}},
+ },
+ /*CallSiteFrames=*/{
+ {{0x124, 5, 6, false}, {0x789, 8, 9, true}},
+ });
+ Writer.addRecord(/*Id=*/0x9999, MR, Err);
auto Profile = Writer.writeBuffer();
readProfile(std::move(Profile));
- auto RecordsOr = Reader->getMemProfRecord(0x123);
+ auto RecordsOr = Reader->getMemProfRecord(0x9999);
ASSERT_THAT_ERROR(RecordsOr.takeError(), Succeeded());
const auto Records = RecordsOr.get();
ASSERT_EQ(Records.size(), 1U);
@@ -247,10 +271,16 @@ TEST_F(InstrProfTest, test_memprof_merge) {
ASSERT_THAT_ERROR(Writer2.mergeProfileKind(InstrProfKind::MemProf),
Succeeded());
- llvm::memprof::MemProfRecord MR;
- MR.CallStack.push_back({0x123, 1, 2, false});
- MR.CallStack.push_back({0x345, 3, 4, true});
- Writer2.addRecord(MR, Err);
+ const MemProfRecord MR = makeRecord(
+ /*AllocFrames=*/
+ {
+ {{0x123, 1, 2, false}, {0x345, 3, 4, true}},
+ {{0x125, 5, 6, false}, {0x567, 7, 8, true}},
+ },
+ /*CallSiteFrames=*/{
+ {{0x124, 5, 6, false}, {0x789, 8, 9, true}},
+ });
+ Writer2.addRecord(/*Id=*/0x9999, MR, Err);
ASSERT_THAT_ERROR(Writer.mergeProfileKind(Writer2.getProfileKind()),
Succeeded());
@@ -264,25 +294,13 @@ TEST_F(InstrProfTest, test_memprof_merge) {
ASSERT_EQ(1U, R->Counts.size());
ASSERT_EQ(42U, R->Counts[0]);
- auto RecordsOr = Reader->getMemProfRecord(0x123);
+ auto RecordsOr = Reader->getMemProfRecord(0x9999);
ASSERT_THAT_ERROR(RecordsOr.takeError(), Succeeded());
const auto Records = RecordsOr.get();
ASSERT_EQ(Records.size(), 1U);
EXPECT_EQ(Records[0], MR);
}
-TEST_F(InstrProfTest, test_memprof_invalid_add_record) {
- llvm::memprof::MemProfRecord MR;
- // At least one of the frames should be a non-inline frame.
- MR.CallStack.push_back({0x123, 1, 2, true});
- MR.CallStack.push_back({0x345, 3, 4, true});
-
- auto CheckErr = [](Error &&E) {
- EXPECT_TRUE(ErrorEquals(instrprof_error::invalid_prof, std::move(E)));
- };
- Writer.addRecord(MR, CheckErr);
-}
-
static const char callee1[] = "callee1";
static const char callee2[] = "callee2";
static const char callee3[] = "callee3";
diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp
index 8921235d063fb..7f7cd64f54065 100644
--- a/llvm/unittests/ProfileData/MemProfTest.cpp
+++ b/llvm/unittests/ProfileData/MemProfTest.cpp
@@ -4,6 +4,7 @@
#include "llvm/DebugInfo/DIContext.h"
#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/Value.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/MemProfData.inc"
@@ -133,6 +134,13 @@ MemProfSchema getFullSchema() {
TEST(MemProf, FillsValue) {
std::unique_ptr<MockSymbolizer> Symbolizer(new MockSymbolizer());
+ EXPECT_CALL(*Symbolizer, symbolizeInlinedCode(SectionedAddress{0x1000},
+ specifier(), false))
+ .Times(1) // Only once since we remember invalid PCs.
+ .WillRepeatedly(Return(makeInliningInfo({
+ {"new", 70, 57, 3, "memprof/memprof_new_delete.cpp"},
+ })));
+
EXPECT_CALL(*Symbolizer, symbolizeInlinedCode(SectionedAddress{0x2000},
specifier(), false))
.Times(1) // Only once since we cache the result for future lookups.
@@ -141,41 +149,98 @@ TEST(MemProf, FillsValue) {
{"bar", 201, 150, 20},
})));
- EXPECT_CALL(*Symbolizer, symbolizeInlinedCode(SectionedAddress{0x6000},
+ EXPECT_CALL(*Symbolizer, symbolizeInlinedCode(SectionedAddress{0x3000},
specifier(), false))
.Times(1)
.WillRepeatedly(Return(makeInliningInfo({
- {"baz", 10, 5, 30},
- {"qux.llvm.12345", 75, 70, 10},
+ {"xyz", 10, 5, 30},
+ {"abc", 10, 5, 30},
})));
CallStackMap CSM;
- CSM[0x1] = {0x2000};
- CSM[0x2] = {0x6000, 0x2000};
+ CSM[0x1] = {0x1000, 0x2000, 0x3000};
llvm::MapVector<uint64_t, MemInfoBlock> Prof;
Prof[0x1].AllocCount = 1;
- Prof[0x2].AllocCount = 2;
auto Seg = makeSegments();
RawMemProfReader Reader(std::move(Symbolizer), Seg, Prof, CSM);
- std::vector<MemProfRecord> Records;
- for (const MemProfRecord &R : Reader) {
- Records.push_back(R);
+ llvm::DenseMap<llvm::GlobalValue::GUID, MemProfRecord> Records;
+ for (const auto &Pair : Reader) {
+ Records.insert({Pair.first, Pair.second});
}
- EXPECT_EQ(Records.size(), 2U);
-
- EXPECT_EQ(Records[0].Info.getAllocCount(), 1U);
- EXPECT_EQ(Records[1].Info.getAllocCount(), 2U);
- EXPECT_THAT(Records[0].CallStack[0], FrameContains("foo", 5U, 30U, true));
- EXPECT_THAT(Records[0].CallStack[1], FrameContains("bar", 51U, 20U, false));
- EXPECT_THAT(Records[1].CallStack[0], FrameContains("baz", 5U, 30U, true));
- EXPECT_THAT(Records[1].CallStack[1], FrameContains("qux", 5U, 10U, false));
- EXPECT_THAT(Records[1].CallStack[2], FrameContains("foo", 5U, 30U, true));
- EXPECT_THAT(Records[1].CallStack[3], FrameContains("bar", 51U, 20U, false));
+ // Mock program psuedocode and expected memprof record contents.
+ //
+ // AllocSite CallSite
+ // inline foo() { new(); } Y N
+ // bar() { foo(); } Y Y
+ // inline xyz() { bar(); } N Y
+ // abc() { xyz(); } N Y
+
+ // We expect 4 records. We attach alloc site data to foo and bar, i.e.
+ // all frames bottom up until we find a non-inline frame. We attach call site
+ // data to bar, xyz and abc.
+ ASSERT_EQ(Records.size(), 4U);
+
+ // Check the memprof record for foo.
+ const llvm::GlobalValue::GUID FooId = MemProfRecord::getGUID("foo");
+ ASSERT_EQ(Records.count(FooId), 1U);
+ const MemProfRecord &Foo = Records[FooId];
+ ASSERT_EQ(Foo.AllocSites.size(), 1U);
+ EXPECT_EQ(Foo.AllocSites[0].Info.getAllocCount(), 1U);
+ EXPECT_THAT(Foo.AllocSites[0].CallStack[0],
+ FrameContains("foo", 5U, 30U, true));
+ EXPECT_THAT(Foo.AllocSites[0].CallStack[1],
+ FrameContains("bar", 51U, 20U, false));
+ EXPECT_THAT(Foo.AllocSites[0].CallStack[2],
+ FrameContains("xyz", 5U, 30U, true));
+ EXPECT_THAT(Foo.AllocSites[0].CallStack[3],
+ FrameContains("abc", 5U, 30U, false));
+ EXPECT_TRUE(Foo.CallSites.empty());
+
+ // Check the memprof record for bar.
+ const llvm::GlobalValue::GUID BarId = MemProfRecord::getGUID("bar");
+ ASSERT_EQ(Records.count(BarId), 1U);
+ const MemProfRecord &Bar = Records[BarId];
+ ASSERT_EQ(Bar.AllocSites.size(), 1U);
+ EXPECT_EQ(Bar.AllocSites[0].Info.getAllocCount(), 1U);
+ EXPECT_THAT(Bar.AllocSites[0].CallStack[0],
+ FrameContains("foo", 5U, 30U, true));
+ EXPECT_THAT(Bar.AllocSites[0].CallStack[1],
+ FrameContains("bar", 51U, 20U, false));
+ EXPECT_THAT(Bar.AllocSites[0].CallStack[2],
+ FrameContains("xyz", 5U, 30U, true));
+ EXPECT_THAT(Bar.AllocSites[0].CallStack[3],
+ FrameContains("abc", 5U, 30U, false));
+
+ ASSERT_EQ(Bar.CallSites.size(), 1U);
+ ASSERT_EQ(Bar.CallSites[0].size(), 2U);
+ EXPECT_THAT(Bar.CallSites[0][0], FrameContains("foo", 5U, 30U, true));
+ EXPECT_THAT(Bar.CallSites[0][1], FrameContains("bar", 51U, 20U, false));
+
+ // Check the memprof record for xyz.
+ const llvm::GlobalValue::GUID XyzId = MemProfRecord::getGUID("xyz");
+ ASSERT_EQ(Records.count(XyzId), 1U);
+ const MemProfRecord &Xyz = Records[XyzId];
+ ASSERT_EQ(Xyz.CallSites.size(), 1U);
+ ASSERT_EQ(Xyz.CallSites[0].size(), 2U);
+ // Expect the entire frame even though in practice we only need the first
+ // entry here.
+ EXPECT_THAT(Xyz.CallSites[0][0], FrameContains("xyz", 5U, 30U, true));
+ EXPECT_THAT(Xyz.CallSites[0][1], FrameContains("abc", 5U, 30U, false));
+
+ // Check the memprof record for abc.
+ const llvm::GlobalValue::GUID AbcId = MemProfRecord::getGUID("abc");
+ ASSERT_EQ(Records.count(AbcId), 1U);
+ const MemProfRecord &Abc = Records[AbcId];
+ EXPECT_TRUE(Abc.AllocSites.empty());
+ ASSERT_EQ(Abc.CallSites.size(), 1U);
+ ASSERT_EQ(Abc.CallSites[0].size(), 2U);
+ EXPECT_THAT(Abc.CallSites[0][0], FrameContains("xyz", 5U, 30U, true));
+ EXPECT_THAT(Abc.CallSites[0][1], FrameContains("abc", 5U, 30U, false));
}
TEST(MemProf, PortableWrapper) {
@@ -206,36 +271,33 @@ TEST(MemProf, PortableWrapper) {
TEST(MemProf, RecordSerializationRoundTrip) {
const MemProfSchema Schema = getFullSchema();
- llvm::SmallVector<MemProfRecord, 3> Records;
- MemProfRecord MR;
-
MemInfoBlock Info(/*size=*/16, /*access_count=*/7, /*alloc_timestamp=*/1000,
/*dealloc_timestamp=*/2000, /*alloc_cpu=*/3,
/*dealloc_cpu=*/4);
- MR.Info = PortableMemInfoBlock(Info);
- MR.CallStack.push_back({0x123, 1, 2, false});
- MR.CallStack.push_back({0x345, 3, 4, false});
- Records.push_back(MR);
+ llvm::SmallVector<llvm::SmallVector<MemProfRecord::Frame>> AllocCallStacks = {
+ {{0x123, 1, 2, false}, {0x345, 3, 4, false}},
+ {{0x123, 1, 2, false}, {0x567, 5, 6, false}}};
- MR.clear();
- MR.Info = PortableMemInfoBlock(Info);
- MR.CallStack.push_back({0x567, 5, 6, false});
- MR.CallStack.push_back({0x789, 7, 8, false});
- Records.push_back(MR);
+ llvm::SmallVector<llvm::SmallVector<MemProfRecord::Frame>> CallSites = {
+ {{0x333, 1, 2, false}, {0x777, 3, 4, true}}};
+
+ MemProfRecord Record;
+ for (const auto &ACS : AllocCallStacks) {
+ // Use the same info block for both allocation sites.
+ Record.AllocSites.emplace_back(ACS, Info);
+ }
+ Record.CallSites.assign(CallSites);
std::string Buffer;
llvm::raw_string_ostream OS(Buffer);
- serializeRecords(Records, Schema, OS);
+ Record.serialize(Schema, OS);
OS.flush();
- const llvm::SmallVector<MemProfRecord, 4> GotRecords = deserializeRecords(
+ const MemProfRecord GotRecord = MemProfRecord::deserialize(
Schema, reinterpret_cast<const unsigned char *>(Buffer.data()));
- ASSERT_TRUE(!GotRecords.empty());
- EXPECT_EQ(GotRecords.size(), Records.size());
- EXPECT_THAT(GotRecords[0], EqualsRecord(Records[0]));
- EXPECT_THAT(GotRecords[1], EqualsRecord(Records[1]));
+ EXPECT_THAT(GotRecord, EqualsRecord(Record));
}
TEST(MemProf, SymbolizationFilter) {
@@ -283,12 +345,15 @@ TEST(MemProf, SymbolizationFilter) {
RawMemProfReader Reader(std::move(Symbolizer), Seg, Prof, CSM);
- std::vector<MemProfRecord> Records;
- for (const MemProfRecord &R : Reader) {
- Records.push_back(R);
+ llvm::SmallVector<MemProfRecord, 1> Records;
+ for (const auto &KeyRecordPair : Reader) {
+ Records.push_back(KeyRecordPair.second);
}
+
ASSERT_EQ(Records.size(), 1U);
- ASSERT_EQ(Records[0].CallStack.size(), 1U);
- EXPECT_THAT(Records[0].CallStack[0], FrameContains("foo", 5U, 30U, false));
+ ASSERT_EQ(Records[0].AllocSites.size(), 1U);
+ ASSERT_EQ(Records[0].AllocSites[0].CallStack.size(), 1U);
+ EXPECT_THAT(Records[0].AllocSites[0].CallStack[0],
+ FrameContains("foo", 5U, 30U, false));
}
} // namespace
More information about the llvm-commits
mailing list