[llvm] [memprof] Move writeMemProf to a separate file (PR #137051)
Kazu Hirata via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 23 13:14:01 PDT 2025
https://github.com/kazutakahirata created https://github.com/llvm/llvm-project/pull/137051
This patch moves writeMemProf and its subroutines to a separate file.
The intent is as follows:
- Reduce the size of InstrProfWriter.cpp.
- Move the subroutines to a separate file because they don't interact
with anything else in InstrProfWriter.cpp.
Remarks:
- The new file is named IndexedMemProfData.cpp without "Writer" in the
name so that we can move the reader code to this file in the future.
- This patch just moves code without changing the function signatures
for now. It might make sense to implement a class encompassing
"serialize" and "deserialize" methods for IndexedMemProfData, but
that's left to subsequent patches.
>From 7c45e633e4670ccd3bb10a916dfe144a68040425 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Wed, 23 Apr 2025 12:30:32 -0700
Subject: [PATCH] [memprof] Move writeMemProf to a separate file
This patch moves writeMemProf and its subroutines to a separate file.
The intent is as follows:
- Reduce the size of InstrProfWriter.cpp.
- Move the subroutines to a separate file because they don't interact
with anything else in InstrProfWriter.cpp.
Remarks:
- The new file is named IndexedMemProfData.cpp without "Writer" in the
name so that we can move the reader code to this file in the future.
- This patch just moves code without changing the function signatures
for now. It might make sense to implement a class encompassing
"serialize" and "deserialize" methods for IndexedMemProfData, but
that's left to subsequent patches.
---
.../llvm/ProfileData/IndexedMemProfData.h | 23 ++
llvm/lib/ProfileData/CMakeLists.txt | 1 +
llvm/lib/ProfileData/IndexedMemProfData.cpp | 300 ++++++++++++++++++
llvm/lib/ProfileData/InstrProfWriter.cpp | 283 +----------------
4 files changed, 325 insertions(+), 282 deletions(-)
create mode 100644 llvm/include/llvm/ProfileData/IndexedMemProfData.h
create mode 100644 llvm/lib/ProfileData/IndexedMemProfData.cpp
diff --git a/llvm/include/llvm/ProfileData/IndexedMemProfData.h b/llvm/include/llvm/ProfileData/IndexedMemProfData.h
new file mode 100644
index 0000000000000..3c6c329d1c49d
--- /dev/null
+++ b/llvm/include/llvm/ProfileData/IndexedMemProfData.h
@@ -0,0 +1,23 @@
+//===- IndexedMemProfData.h - MemProf format support ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// MemProf data is serialized in writeMemProf provided in this header file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/MemProf.h"
+
+namespace llvm {
+
+// Write the MemProf data to OS.
+Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
+ memprof::IndexedVersion MemProfVersionRequested,
+ bool MemProfFullSchema);
+
+} // namespace llvm
diff --git a/llvm/lib/ProfileData/CMakeLists.txt b/llvm/lib/ProfileData/CMakeLists.txt
index 4fa1b76f0a062..eb7c2a3c1a28a 100644
--- a/llvm/lib/ProfileData/CMakeLists.txt
+++ b/llvm/lib/ProfileData/CMakeLists.txt
@@ -1,5 +1,6 @@
add_llvm_component_library(LLVMProfileData
GCOV.cpp
+ IndexedMemProfData.cpp
InstrProf.cpp
InstrProfCorrelator.cpp
InstrProfReader.cpp
diff --git a/llvm/lib/ProfileData/IndexedMemProfData.cpp b/llvm/lib/ProfileData/IndexedMemProfData.cpp
new file mode 100644
index 0000000000000..fb4a891a2eb95
--- /dev/null
+++ b/llvm/lib/ProfileData/IndexedMemProfData.cpp
@@ -0,0 +1,300 @@
+//===- IndexedMemProfData.h - MemProf format support ------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// MemProf data is serialized in writeMemProf provided in this file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/MemProf.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/OnDiskHashTable.h"
+
+namespace llvm {
+
+// Serialize Schema.
+static void writeMemProfSchema(ProfOStream &OS,
+ const memprof::MemProfSchema &Schema) {
+ OS.write(static_cast<uint64_t>(Schema.size()));
+ for (const auto Id : Schema)
+ OS.write(static_cast<uint64_t>(Id));
+}
+
+// Serialize MemProfRecordData. Return RecordTableOffset.
+static uint64_t writeMemProfRecords(
+ ProfOStream &OS,
+ llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord>
+ &MemProfRecordData,
+ memprof::MemProfSchema *Schema, memprof::IndexedVersion Version,
+ llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
+ *MemProfCallStackIndexes = nullptr) {
+ memprof::RecordWriterTrait RecordWriter(Schema, Version,
+ MemProfCallStackIndexes);
+ OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait>
+ RecordTableGenerator;
+ for (auto &[GUID, Record] : MemProfRecordData) {
+ // Insert the key (func hash) and value (memprof record).
+ RecordTableGenerator.insert(GUID, Record, RecordWriter);
+ }
+ // Release the memory of this MapVector as it is no longer needed.
+ MemProfRecordData.clear();
+
+ // The call to Emit invokes RecordWriterTrait::EmitData which destructs
+ // the memprof record copies owned by the RecordTableGenerator. This works
+ // because the RecordTableGenerator is not used after this point.
+ return RecordTableGenerator.Emit(OS.OS, RecordWriter);
+}
+
+// Serialize MemProfFrameData. Return FrameTableOffset.
+static uint64_t writeMemProfFrames(
+ ProfOStream &OS,
+ llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData) {
+ OnDiskChainedHashTableGenerator<memprof::FrameWriterTrait>
+ FrameTableGenerator;
+ for (auto &[FrameId, Frame] : MemProfFrameData) {
+ // Insert the key (frame id) and value (frame contents).
+ FrameTableGenerator.insert(FrameId, Frame);
+ }
+ // Release the memory of this MapVector as it is no longer needed.
+ MemProfFrameData.clear();
+
+ return FrameTableGenerator.Emit(OS.OS);
+}
+
+// Serialize MemProfFrameData. Return the mapping from FrameIds to their
+// indexes within the frame array.
+static llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId>
+writeMemProfFrameArray(
+ ProfOStream &OS,
+ llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData,
+ llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram) {
+ // Mappings from FrameIds to array indexes.
+ llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes;
+
+ // Compute the order in which we serialize Frames. The order does not matter
+ // in terms of correctness, but we still compute it for deserialization
+ // performance. Specifically, if we serialize frequently used Frames one
+ // after another, we have better cache utilization. For two Frames that
+ // appear equally frequently, we break a tie by serializing the one that tends
+ // to appear earlier in call stacks. We implement the tie-breaking mechanism
+ // by computing the sum of indexes within call stacks for each Frame. If we
+ // still have a tie, then we just resort to compare two FrameIds, which is
+ // just for stability of output.
+ std::vector<std::pair<memprof::FrameId, const memprof::Frame *>> FrameIdOrder;
+ FrameIdOrder.reserve(MemProfFrameData.size());
+ for (const auto &[Id, Frame] : MemProfFrameData)
+ FrameIdOrder.emplace_back(Id, &Frame);
+ assert(MemProfFrameData.size() == FrameIdOrder.size());
+ llvm::sort(FrameIdOrder,
+ [&](const std::pair<memprof::FrameId, const memprof::Frame *> &L,
+ const std::pair<memprof::FrameId, const memprof::Frame *> &R) {
+ const auto &SL = FrameHistogram[L.first];
+ const auto &SR = FrameHistogram[R.first];
+ // Popular FrameIds should come first.
+ if (SL.Count != SR.Count)
+ return SL.Count > SR.Count;
+ // If they are equally popular, then the one that tends to appear
+ // earlier in call stacks should come first.
+ if (SL.PositionSum != SR.PositionSum)
+ return SL.PositionSum < SR.PositionSum;
+ // Compare their FrameIds for sort stability.
+ return L.first < R.first;
+ });
+
+ // Serialize all frames while creating mappings from linear IDs to FrameIds.
+ uint64_t Index = 0;
+ MemProfFrameIndexes.reserve(FrameIdOrder.size());
+ for (const auto &[Id, F] : FrameIdOrder) {
+ F->serialize(OS.OS);
+ MemProfFrameIndexes.insert({Id, Index});
+ ++Index;
+ }
+ assert(MemProfFrameData.size() == Index);
+ assert(MemProfFrameData.size() == MemProfFrameIndexes.size());
+
+ // Release the memory of this MapVector as it is no longer needed.
+ MemProfFrameData.clear();
+
+ return MemProfFrameIndexes;
+}
+
+static uint64_t writeMemProfCallStacks(
+ ProfOStream &OS,
+ llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
+ &MemProfCallStackData) {
+ OnDiskChainedHashTableGenerator<memprof::CallStackWriterTrait>
+ CallStackTableGenerator;
+ for (auto &[CSId, CallStack] : MemProfCallStackData)
+ CallStackTableGenerator.insert(CSId, CallStack);
+ // Release the memory of this vector as it is no longer needed.
+ MemProfCallStackData.clear();
+
+ return CallStackTableGenerator.Emit(OS.OS);
+}
+
+static llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
+writeMemProfCallStackArray(
+ ProfOStream &OS,
+ llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
+ &MemProfCallStackData,
+ llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId>
+ &MemProfFrameIndexes,
+ llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram,
+ unsigned &NumElements) {
+ llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
+ MemProfCallStackIndexes;
+
+ memprof::CallStackRadixTreeBuilder<memprof::FrameId> Builder;
+ Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes,
+ FrameHistogram);
+ for (auto I : Builder.getRadixArray())
+ OS.write32(I);
+ NumElements = Builder.getRadixArray().size();
+ MemProfCallStackIndexes = Builder.takeCallStackPos();
+
+ // Release the memory of this vector as it is no longer needed.
+ MemProfCallStackData.clear();
+
+ return MemProfCallStackIndexes;
+}
+
+// Write out MemProf Version2 as follows:
+// uint64_t Version
+// uint64_t RecordTableOffset = RecordTableGenerator.Emit
+// uint64_t FramePayloadOffset = Offset for the frame payload
+// uint64_t FrameTableOffset = FrameTableGenerator.Emit
+// uint64_t CallStackPayloadOffset = Offset for the call stack payload (NEW V2)
+// uint64_t CallStackTableOffset = CallStackTableGenerator.Emit (NEW in V2)
+// uint64_t Num schema entries
+// uint64_t Schema entry 0
+// uint64_t Schema entry 1
+// ....
+// uint64_t Schema entry N - 1
+// OnDiskChainedHashTable MemProfRecordData
+// OnDiskChainedHashTable MemProfFrameData
+// OnDiskChainedHashTable MemProfCallStackData (NEW in V2)
+static Error writeMemProfV2(ProfOStream &OS,
+ memprof::IndexedMemProfData &MemProfData,
+ bool MemProfFullSchema) {
+ OS.write(memprof::Version2);
+ uint64_t HeaderUpdatePos = OS.tell();
+ OS.write(0ULL); // Reserve space for the memprof record table offset.
+ OS.write(0ULL); // Reserve space for the memprof frame payload offset.
+ OS.write(0ULL); // Reserve space for the memprof frame table offset.
+ OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
+ OS.write(0ULL); // Reserve space for the memprof call stack table offset.
+
+ auto Schema = memprof::getHotColdSchema();
+ if (MemProfFullSchema)
+ Schema = memprof::getFullSchema();
+ writeMemProfSchema(OS, Schema);
+
+ uint64_t RecordTableOffset =
+ writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version2);
+
+ uint64_t FramePayloadOffset = OS.tell();
+ uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.Frames);
+
+ uint64_t CallStackPayloadOffset = OS.tell();
+ uint64_t CallStackTableOffset =
+ writeMemProfCallStacks(OS, MemProfData.CallStacks);
+
+ uint64_t Header[] = {
+ RecordTableOffset, FramePayloadOffset, FrameTableOffset,
+ CallStackPayloadOffset, CallStackTableOffset,
+ };
+ OS.patch({{HeaderUpdatePos, Header}});
+
+ return Error::success();
+}
+
+// Write out MemProf Version3 as follows:
+// uint64_t Version
+// uint64_t CallStackPayloadOffset = Offset for the call stack payload
+// uint64_t RecordPayloadOffset = Offset for the record payload
+// uint64_t RecordTableOffset = RecordTableGenerator.Emit
+// uint64_t Num schema entries
+// uint64_t Schema entry 0
+// uint64_t Schema entry 1
+// ....
+// uint64_t Schema entry N - 1
+// Frames serialized one after another
+// Call stacks encoded as a radix tree
+// OnDiskChainedHashTable MemProfRecordData
+static Error writeMemProfV3(ProfOStream &OS,
+ memprof::IndexedMemProfData &MemProfData,
+ bool MemProfFullSchema) {
+ OS.write(memprof::Version3);
+ uint64_t HeaderUpdatePos = OS.tell();
+ OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
+ OS.write(0ULL); // Reserve space for the memprof record payload offset.
+ OS.write(0ULL); // Reserve space for the memprof record table offset.
+
+ auto Schema = memprof::getHotColdSchema();
+ if (MemProfFullSchema)
+ Schema = memprof::getFullSchema();
+ writeMemProfSchema(OS, Schema);
+
+ llvm::DenseMap<memprof::FrameId, memprof::FrameStat> FrameHistogram =
+ memprof::computeFrameHistogram(MemProfData.CallStacks);
+ assert(MemProfData.Frames.size() == FrameHistogram.size());
+
+ llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes =
+ writeMemProfFrameArray(OS, MemProfData.Frames, FrameHistogram);
+
+ uint64_t CallStackPayloadOffset = OS.tell();
+ // The number of elements in the call stack array.
+ unsigned NumElements = 0;
+ llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
+ MemProfCallStackIndexes =
+ writeMemProfCallStackArray(OS, MemProfData.CallStacks,
+ MemProfFrameIndexes, FrameHistogram,
+ NumElements);
+
+ uint64_t RecordPayloadOffset = OS.tell();
+ uint64_t RecordTableOffset =
+ writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version3,
+ &MemProfCallStackIndexes);
+
+ // IndexedMemProfReader::deserializeV3 computes the number of elements in the
+ // call stack array from the difference between CallStackPayloadOffset and
+ // RecordPayloadOffset. Verify that the computation works.
+ assert(CallStackPayloadOffset +
+ NumElements * sizeof(memprof::LinearFrameId) ==
+ RecordPayloadOffset);
+
+ uint64_t Header[] = {
+ CallStackPayloadOffset,
+ RecordPayloadOffset,
+ RecordTableOffset,
+ };
+ OS.patch({{HeaderUpdatePos, Header}});
+
+ return Error::success();
+}
+
+// Write out the MemProf data in a requested version.
+Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
+ memprof::IndexedVersion MemProfVersionRequested,
+ bool MemProfFullSchema) {
+ switch (MemProfVersionRequested) {
+ case memprof::Version2:
+ return writeMemProfV2(OS, MemProfData, MemProfFullSchema);
+ case memprof::Version3:
+ return writeMemProfV3(OS, MemProfData, MemProfFullSchema);
+ }
+
+ return make_error<InstrProfError>(
+ instrprof_error::unsupported_version,
+ formatv("MemProf version {} not supported; "
+ "requires version between {} and {}, inclusive",
+ MemProfVersionRequested, memprof::MinimumSupportedVersion,
+ memprof::MaximumSupportedVersion));
+}
+
+} // namespace llvm
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index f1882dc3628cc..2759346935b14 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/ProfileSummary.h"
+#include "llvm/ProfileData/IndexedMemProfData.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/MemProf.h"
#include "llvm/ProfileData/ProfileCommon.h"
@@ -23,7 +24,6 @@
#include "llvm/Support/Endian.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/Error.h"
-#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/OnDiskHashTable.h"
#include "llvm/Support/raw_ostream.h"
@@ -449,287 +449,6 @@ static void setSummary(IndexedInstrProf::Summary *TheSummary,
TheSummary->setEntry(I, Res[I]);
}
-// Serialize Schema.
-static void writeMemProfSchema(ProfOStream &OS,
- const memprof::MemProfSchema &Schema) {
- OS.write(static_cast<uint64_t>(Schema.size()));
- for (const auto Id : Schema)
- OS.write(static_cast<uint64_t>(Id));
-}
-
-// Serialize MemProfRecordData. Return RecordTableOffset.
-static uint64_t writeMemProfRecords(
- ProfOStream &OS,
- llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord>
- &MemProfRecordData,
- memprof::MemProfSchema *Schema, memprof::IndexedVersion Version,
- llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
- *MemProfCallStackIndexes = nullptr) {
- memprof::RecordWriterTrait RecordWriter(Schema, Version,
- MemProfCallStackIndexes);
- OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait>
- RecordTableGenerator;
- for (auto &[GUID, Record] : MemProfRecordData) {
- // Insert the key (func hash) and value (memprof record).
- RecordTableGenerator.insert(GUID, Record, RecordWriter);
- }
- // Release the memory of this MapVector as it is no longer needed.
- MemProfRecordData.clear();
-
- // The call to Emit invokes RecordWriterTrait::EmitData which destructs
- // the memprof record copies owned by the RecordTableGenerator. This works
- // because the RecordTableGenerator is not used after this point.
- return RecordTableGenerator.Emit(OS.OS, RecordWriter);
-}
-
-// Serialize MemProfFrameData. Return FrameTableOffset.
-static uint64_t writeMemProfFrames(
- ProfOStream &OS,
- llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData) {
- OnDiskChainedHashTableGenerator<memprof::FrameWriterTrait>
- FrameTableGenerator;
- for (auto &[FrameId, Frame] : MemProfFrameData) {
- // Insert the key (frame id) and value (frame contents).
- FrameTableGenerator.insert(FrameId, Frame);
- }
- // Release the memory of this MapVector as it is no longer needed.
- MemProfFrameData.clear();
-
- return FrameTableGenerator.Emit(OS.OS);
-}
-
-// Serialize MemProfFrameData. Return the mapping from FrameIds to their
-// indexes within the frame array.
-static llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId>
-writeMemProfFrameArray(
- ProfOStream &OS,
- llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData,
- llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram) {
- // Mappings from FrameIds to array indexes.
- llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes;
-
- // Compute the order in which we serialize Frames. The order does not matter
- // in terms of correctness, but we still compute it for deserialization
- // performance. Specifically, if we serialize frequently used Frames one
- // after another, we have better cache utilization. For two Frames that
- // appear equally frequently, we break a tie by serializing the one that tends
- // to appear earlier in call stacks. We implement the tie-breaking mechanism
- // by computing the sum of indexes within call stacks for each Frame. If we
- // still have a tie, then we just resort to compare two FrameIds, which is
- // just for stability of output.
- std::vector<std::pair<memprof::FrameId, const memprof::Frame *>> FrameIdOrder;
- FrameIdOrder.reserve(MemProfFrameData.size());
- for (const auto &[Id, Frame] : MemProfFrameData)
- FrameIdOrder.emplace_back(Id, &Frame);
- assert(MemProfFrameData.size() == FrameIdOrder.size());
- llvm::sort(FrameIdOrder,
- [&](const std::pair<memprof::FrameId, const memprof::Frame *> &L,
- const std::pair<memprof::FrameId, const memprof::Frame *> &R) {
- const auto &SL = FrameHistogram[L.first];
- const auto &SR = FrameHistogram[R.first];
- // Popular FrameIds should come first.
- if (SL.Count != SR.Count)
- return SL.Count > SR.Count;
- // If they are equally popular, then the one that tends to appear
- // earlier in call stacks should come first.
- if (SL.PositionSum != SR.PositionSum)
- return SL.PositionSum < SR.PositionSum;
- // Compare their FrameIds for sort stability.
- return L.first < R.first;
- });
-
- // Serialize all frames while creating mappings from linear IDs to FrameIds.
- uint64_t Index = 0;
- MemProfFrameIndexes.reserve(FrameIdOrder.size());
- for (const auto &[Id, F] : FrameIdOrder) {
- F->serialize(OS.OS);
- MemProfFrameIndexes.insert({Id, Index});
- ++Index;
- }
- assert(MemProfFrameData.size() == Index);
- assert(MemProfFrameData.size() == MemProfFrameIndexes.size());
-
- // Release the memory of this MapVector as it is no longer needed.
- MemProfFrameData.clear();
-
- return MemProfFrameIndexes;
-}
-
-static uint64_t writeMemProfCallStacks(
- ProfOStream &OS,
- llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
- &MemProfCallStackData) {
- OnDiskChainedHashTableGenerator<memprof::CallStackWriterTrait>
- CallStackTableGenerator;
- for (auto &[CSId, CallStack] : MemProfCallStackData)
- CallStackTableGenerator.insert(CSId, CallStack);
- // Release the memory of this vector as it is no longer needed.
- MemProfCallStackData.clear();
-
- return CallStackTableGenerator.Emit(OS.OS);
-}
-
-static llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
-writeMemProfCallStackArray(
- ProfOStream &OS,
- llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
- &MemProfCallStackData,
- llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId>
- &MemProfFrameIndexes,
- llvm::DenseMap<memprof::FrameId, memprof::FrameStat> &FrameHistogram,
- unsigned &NumElements) {
- llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
- MemProfCallStackIndexes;
-
- memprof::CallStackRadixTreeBuilder<memprof::FrameId> Builder;
- Builder.build(std::move(MemProfCallStackData), &MemProfFrameIndexes,
- FrameHistogram);
- for (auto I : Builder.getRadixArray())
- OS.write32(I);
- NumElements = Builder.getRadixArray().size();
- MemProfCallStackIndexes = Builder.takeCallStackPos();
-
- // Release the memory of this vector as it is no longer needed.
- MemProfCallStackData.clear();
-
- return MemProfCallStackIndexes;
-}
-
-// Write out MemProf Version2 as follows:
-// uint64_t Version
-// uint64_t RecordTableOffset = RecordTableGenerator.Emit
-// uint64_t FramePayloadOffset = Offset for the frame payload
-// uint64_t FrameTableOffset = FrameTableGenerator.Emit
-// uint64_t CallStackPayloadOffset = Offset for the call stack payload (NEW V2)
-// uint64_t CallStackTableOffset = CallStackTableGenerator.Emit (NEW in V2)
-// uint64_t Num schema entries
-// uint64_t Schema entry 0
-// uint64_t Schema entry 1
-// ....
-// uint64_t Schema entry N - 1
-// OnDiskChainedHashTable MemProfRecordData
-// OnDiskChainedHashTable MemProfFrameData
-// OnDiskChainedHashTable MemProfCallStackData (NEW in V2)
-static Error writeMemProfV2(ProfOStream &OS,
- memprof::IndexedMemProfData &MemProfData,
- bool MemProfFullSchema) {
- OS.write(memprof::Version2);
- uint64_t HeaderUpdatePos = OS.tell();
- OS.write(0ULL); // Reserve space for the memprof record table offset.
- OS.write(0ULL); // Reserve space for the memprof frame payload offset.
- OS.write(0ULL); // Reserve space for the memprof frame table offset.
- OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
- OS.write(0ULL); // Reserve space for the memprof call stack table offset.
-
- auto Schema = memprof::getHotColdSchema();
- if (MemProfFullSchema)
- Schema = memprof::getFullSchema();
- writeMemProfSchema(OS, Schema);
-
- uint64_t RecordTableOffset =
- writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version2);
-
- uint64_t FramePayloadOffset = OS.tell();
- uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.Frames);
-
- uint64_t CallStackPayloadOffset = OS.tell();
- uint64_t CallStackTableOffset =
- writeMemProfCallStacks(OS, MemProfData.CallStacks);
-
- uint64_t Header[] = {
- RecordTableOffset, FramePayloadOffset, FrameTableOffset,
- CallStackPayloadOffset, CallStackTableOffset,
- };
- OS.patch({{HeaderUpdatePos, Header}});
-
- return Error::success();
-}
-
-// Write out MemProf Version3 as follows:
-// uint64_t Version
-// uint64_t CallStackPayloadOffset = Offset for the call stack payload
-// uint64_t RecordPayloadOffset = Offset for the record payload
-// uint64_t RecordTableOffset = RecordTableGenerator.Emit
-// uint64_t Num schema entries
-// uint64_t Schema entry 0
-// uint64_t Schema entry 1
-// ....
-// uint64_t Schema entry N - 1
-// Frames serialized one after another
-// Call stacks encoded as a radix tree
-// OnDiskChainedHashTable MemProfRecordData
-static Error writeMemProfV3(ProfOStream &OS,
- memprof::IndexedMemProfData &MemProfData,
- bool MemProfFullSchema) {
- OS.write(memprof::Version3);
- uint64_t HeaderUpdatePos = OS.tell();
- OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
- OS.write(0ULL); // Reserve space for the memprof record payload offset.
- OS.write(0ULL); // Reserve space for the memprof record table offset.
-
- auto Schema = memprof::getHotColdSchema();
- if (MemProfFullSchema)
- Schema = memprof::getFullSchema();
- writeMemProfSchema(OS, Schema);
-
- llvm::DenseMap<memprof::FrameId, memprof::FrameStat> FrameHistogram =
- memprof::computeFrameHistogram(MemProfData.CallStacks);
- assert(MemProfData.Frames.size() == FrameHistogram.size());
-
- llvm::DenseMap<memprof::FrameId, memprof::LinearFrameId> MemProfFrameIndexes =
- writeMemProfFrameArray(OS, MemProfData.Frames, FrameHistogram);
-
- uint64_t CallStackPayloadOffset = OS.tell();
- // The number of elements in the call stack array.
- unsigned NumElements = 0;
- llvm::DenseMap<memprof::CallStackId, memprof::LinearCallStackId>
- MemProfCallStackIndexes =
- writeMemProfCallStackArray(OS, MemProfData.CallStacks,
- MemProfFrameIndexes, FrameHistogram,
- NumElements);
-
- uint64_t RecordPayloadOffset = OS.tell();
- uint64_t RecordTableOffset =
- writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version3,
- &MemProfCallStackIndexes);
-
- // IndexedMemProfReader::deserializeV3 computes the number of elements in the
- // call stack array from the difference between CallStackPayloadOffset and
- // RecordPayloadOffset. Verify that the computation works.
- assert(CallStackPayloadOffset +
- NumElements * sizeof(memprof::LinearFrameId) ==
- RecordPayloadOffset);
-
- uint64_t Header[] = {
- CallStackPayloadOffset,
- RecordPayloadOffset,
- RecordTableOffset,
- };
- OS.patch({{HeaderUpdatePos, Header}});
-
- return Error::success();
-}
-
-// Write out the MemProf data in a requested version.
-static Error writeMemProf(ProfOStream &OS,
- memprof::IndexedMemProfData &MemProfData,
- memprof::IndexedVersion MemProfVersionRequested,
- bool MemProfFullSchema) {
- switch (MemProfVersionRequested) {
- case memprof::Version2:
- return writeMemProfV2(OS, MemProfData, MemProfFullSchema);
- case memprof::Version3:
- return writeMemProfV3(OS, MemProfData, MemProfFullSchema);
- }
-
- return make_error<InstrProfError>(
- instrprof_error::unsupported_version,
- formatv("MemProf version {} not supported; "
- "requires version between {} and {}, inclusive",
- MemProfVersionRequested, memprof::MinimumSupportedVersion,
- memprof::MaximumSupportedVersion));
-}
-
uint64_t InstrProfWriter::writeHeader(const IndexedInstrProf::Header &Header,
const bool WritePrevVersion,
ProfOStream &OS) {
More information about the llvm-commits
mailing list