[llvm] [MemProf] Add basic summary section support (PR #141805)
Teresa Johnson via llvm-commits
llvm-commits at lists.llvm.org
Wed May 28 11:47:24 PDT 2025
https://github.com/teresajohnson updated https://github.com/llvm/llvm-project/pull/141805
>From 32a632607f1b433de302a3fddd2def0bc3fcb127 Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Tue, 27 May 2025 16:35:17 -0700
Subject: [PATCH 1/3] [MemProf] Add basic summary section support
This patch adds support for a basic MemProf summary section, which is
built along with the indexed MemProf profile (e.g. when reading the raw
or YAML profiles), and serialized through the indexed profile just after
the header.
Currently only 6 fields are written, specifically the number of contexts
(total, cold, hot), and the max context size (cold, warm, hot).
To support forwards and backwards compatibility for added fields in the
indexed profile, the number of fields serialized first. The code is
written to support forwards compatibility (reading newer profiles with
additional summary fields), and comments indicate how to implement
backwards compatibility (reading older profiles with fewer summary
fields) as needed.
Support is added to print the summary as YAML comments when displaying
both the raw and indexed profiles via `llvm-profdata show`. Because they
are YAML comments, the YAML reader ignores these (the summary is always
recomputed when building the indexed profile as described above).
This necessitated moving some options and a couple of interfaces out of
Analysis/MemoryProfileInfo.cpp and into the new
ProfileData/MemProfSummary.cpp file, as we need to classify context
hotness earlier and also compute context ids to build the summary from
older indexed profiles.
---
.../include/llvm/Analysis/MemoryProfileInfo.h | 5 -
.../llvm/ProfileData/IndexedMemProfData.h | 5 +-
.../llvm/ProfileData/InstrProfReader.h | 10 ++
.../llvm/ProfileData/InstrProfWriter.h | 5 +
.../include/llvm/ProfileData/MemProfSummary.h | 70 +++++++++
.../llvm/ProfileData/MemProfSummaryBuilder.h | 47 ++++++
llvm/lib/Analysis/MemoryProfileInfo.cpp | 50 ------
llvm/lib/ProfileData/CMakeLists.txt | 2 +
llvm/lib/ProfileData/IndexedMemProfData.cpp | 25 ++-
llvm/lib/ProfileData/InstrProfWriter.cpp | 18 ++-
llvm/lib/ProfileData/MemProfReader.cpp | 7 +
llvm/lib/ProfileData/MemProfSummary.cpp | 148 ++++++++++++++++++
.../lib/ProfileData/MemProfSummaryBuilder.cpp | 61 ++++++++
.../Instrumentation/MemProfiler.cpp | 14 +-
llvm/test/Transforms/PGOProfile/memprof.ll | 11 ++
.../memprof_undrift_missing_leaf.ll | 2 +
.../tools/llvm-profdata/memprof-yaml.test | 24 +++
llvm/tools/llvm-profdata/llvm-profdata.cpp | 14 ++
.../Analysis/MemoryProfileInfoTest.cpp | 71 +--------
llvm/unittests/ProfileData/MemProfTest.cpp | 77 +++++++++
20 files changed, 513 insertions(+), 153 deletions(-)
create mode 100644 llvm/include/llvm/ProfileData/MemProfSummary.h
create mode 100644 llvm/include/llvm/ProfileData/MemProfSummaryBuilder.h
create mode 100644 llvm/lib/ProfileData/MemProfSummary.cpp
create mode 100644 llvm/lib/ProfileData/MemProfSummaryBuilder.cpp
diff --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h
index bf1cfb1ee52bb..d0a9d0b169e99 100644
--- a/llvm/include/llvm/Analysis/MemoryProfileInfo.h
+++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h
@@ -21,11 +21,6 @@
namespace llvm {
namespace memprof {
-/// Return the allocation type for a given set of memory profile values.
-LLVM_ABI AllocationType getAllocType(uint64_t TotalLifetimeAccessDensity,
- uint64_t AllocCount,
- uint64_t TotalLifetime);
-
/// Build callstack metadata from the provided list of call stack ids. Returns
/// the resulting metadata node.
LLVM_ABI MDNode *buildCallstackMetadata(ArrayRef<uint64_t> CallStack,
diff --git a/llvm/include/llvm/ProfileData/IndexedMemProfData.h b/llvm/include/llvm/ProfileData/IndexedMemProfData.h
index 2b40094a9bc21..304b1c4734af6 100644
--- a/llvm/include/llvm/ProfileData/IndexedMemProfData.h
+++ b/llvm/include/llvm/ProfileData/IndexedMemProfData.h
@@ -24,6 +24,7 @@
namespace llvm {
namespace memprof {
+class MemProfSummary;
struct IndexedMemProfData {
// A map to hold memprof data per function. The lower 64 bits obtained from
// the md5 hash of the function name is used to index into the map.
@@ -89,7 +90,7 @@ struct IndexedMemProfData {
Error writeMemProf(
ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema,
- std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData);
-
+ std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData,
+ memprof::MemProfSummary *MemProfSum);
} // namespace llvm
#endif
diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index d104ab51430d1..99ea3c1808f5e 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -22,6 +22,7 @@
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/InstrProfCorrelator.h"
#include "llvm/ProfileData/MemProf.h"
+#include "llvm/ProfileData/MemProfSummary.h"
#include "llvm/ProfileData/MemProfYAML.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
@@ -690,6 +691,8 @@ class IndexedMemProfReader {
/// The MemProf version.
memprof::IndexedVersion Version =
static_cast<memprof::IndexedVersion>(memprof::MinimumSupportedVersion);
+ /// MemProf summary (if available, version >= 4).
+ std::unique_ptr<memprof::MemProfSummary> MemProfSum;
/// MemProf profile schema (if available).
memprof::MemProfSchema Schema;
/// MemProf record profile data on-disk indexed via llvm::md5(FunctionName).
@@ -725,6 +728,8 @@ class IndexedMemProfReader {
// Return the entire MemProf profile.
memprof::AllMemProfData getAllMemProfData() const;
+
+ memprof::MemProfSummary *getSummary() const { return MemProfSum.get(); }
};
/// Reader for the indexed binary instrprof format.
@@ -887,6 +892,11 @@ class IndexedInstrProfReader : public InstrProfReader {
}
}
+ /// Return the MemProf summary. Will be null if unavailable (version < 4).
+ memprof::MemProfSummary *getMemProfSummary() const {
+ return MemProfReader.getSummary();
+ }
+
Error readBinaryIds(std::vector<llvm::object::BuildID> &BinaryIds) override;
Error printBinaryIds(raw_ostream &OS) override;
};
diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h
index cdb7afb623378..8bf1efffc7c8c 100644
--- a/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -22,6 +22,7 @@
#include "llvm/ProfileData/DataAccessProf.h"
#include "llvm/ProfileData/IndexedMemProfData.h"
#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/MemProfSummaryBuilder.h"
#include "llvm/Support/Error.h"
#include <cstdint>
#include <memory>
@@ -84,6 +85,10 @@ class InstrProfWriter {
std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData;
+ // MemProf ummary builder to which records are added as MemProf data is added
+ // to the writer.
+ memprof::MemProfSummaryBuilder MemProfSumBuilder;
+
public:
// For memprof testing, random hotness can be assigned to the contexts if
// MemprofGenerateRandomHotness is enabled. The random seed can be either
diff --git a/llvm/include/llvm/ProfileData/MemProfSummary.h b/llvm/include/llvm/ProfileData/MemProfSummary.h
new file mode 100644
index 0000000000000..c65c04dd1f85c
--- /dev/null
+++ b/llvm/include/llvm/ProfileData/MemProfSummary.h
@@ -0,0 +1,70 @@
+//===- MemProfSummary.h - MemProf summary support ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains MemProf summary support and related interfaces.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PROFILEDATA_MEMPROFSUMMARY_H
+#define LLVM_PROFILEDATA_MEMPROFSUMMARY_H
+
+#include "llvm/IR/ModuleSummaryIndex.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/MemProf.h"
+
+namespace llvm {
+namespace memprof {
+
+/// Return the allocation type for a given set of memory profile values.
+AllocationType getAllocType(uint64_t TotalLifetimeAccessDensity,
+ uint64_t AllocCount, uint64_t TotalLifetime);
+
+/// Helper to generate a single hash id for a given callstack, used for emitting
+/// matching statistics and useful for uniquing such statistics across modules.
+/// Also used to dedup contexts when computing the summary.
+uint64_t computeFullStackId(ArrayRef<Frame> CallStack);
+
+class MemProfSummary {
+private:
+ /// The number of summary fields below, which is used to enable some forwards
+ /// and backwards compatibility for the summary when serialized in the indexed
+ /// MemProf format. As long as no existing summary fields are removed or
+ /// reordered, and new summary fields are added after existing summary fields,
+ /// the MemProf indexed profile version does not need to be bumped to
+ /// accommodate new summary fields.
+ static const unsigned NumSummaryFields = 6;
+
+ const uint64_t NumContexts, NumColdContexts, NumHotContexts;
+ const uint64_t MaxColdTotalSize, MaxWarmTotalSize, MaxHotTotalSize;
+
+public:
+ MemProfSummary(uint64_t NumContexts, uint64_t NumColdContexts,
+ uint64_t NumHotContexts, uint64_t MaxColdTotalSize,
+ uint64_t MaxWarmTotalSize, uint64_t MaxHotTotalSize)
+ : NumContexts(NumContexts), NumColdContexts(NumColdContexts),
+ NumHotContexts(NumHotContexts), MaxColdTotalSize(MaxColdTotalSize),
+ MaxWarmTotalSize(MaxWarmTotalSize), MaxHotTotalSize(MaxHotTotalSize) {}
+
+ static unsigned getNumSummaryFields() { return NumSummaryFields; }
+ uint64_t getNumContexts() const { return NumContexts; }
+ uint64_t getNumColdContexts() const { return NumColdContexts; }
+ uint64_t getNumHotContexts() const { return NumHotContexts; }
+ uint64_t getMaxColdTotalSize() const { return MaxColdTotalSize; }
+ uint64_t getMaxWarmTotalSize() const { return MaxWarmTotalSize; }
+ uint64_t getMaxHotTotalSize() const { return MaxHotTotalSize; }
+ void printSummaryYaml(raw_ostream &OS) const;
+ /// Write to indexed MemProf profile.
+ void write(ProfOStream &OS) const;
+ /// Read from indexed MemProf profile.
+ static std::unique_ptr<MemProfSummary> deserialize(const unsigned char *&);
+};
+
+} // namespace memprof
+} // namespace llvm
+
+#endif // LLVM_PROFILEDATA_MEMPROFSUMMARY_H
diff --git a/llvm/include/llvm/ProfileData/MemProfSummaryBuilder.h b/llvm/include/llvm/ProfileData/MemProfSummaryBuilder.h
new file mode 100644
index 0000000000000..61cc46cfbc214
--- /dev/null
+++ b/llvm/include/llvm/ProfileData/MemProfSummaryBuilder.h
@@ -0,0 +1,47 @@
+//===- MemProfSummaryBuilder.h - MemProf summary building -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains MemProf summary builder.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PROFILEDATA_MEMPROFSUMMARYBUILDER_H
+#define LLVM_PROFILEDATA_MEMPROFSUMMARYBUILDER_H
+
+#include "llvm/ProfileData/MemProf.h"
+#include "llvm/ProfileData/MemProfSummary.h"
+
+namespace llvm {
+namespace memprof {
+
+class MemProfSummaryBuilder {
+private:
+ DenseSet<uint64_t> Contexts;
+ void addRecord(uint64_t, const PortableMemInfoBlock &);
+
+protected:
+ uint64_t MaxColdTotalSize = 0;
+ uint64_t MaxWarmTotalSize = 0;
+ uint64_t MaxHotTotalSize = 0;
+ uint64_t NumContexts = 0;
+ uint64_t NumColdContexts = 0;
+ uint64_t NumHotContexts = 0;
+
+public:
+ MemProfSummaryBuilder() = default;
+ ~MemProfSummaryBuilder() = default;
+
+ void addRecord(const IndexedMemProfRecord &);
+ void addRecord(const MemProfRecord &);
+ std::unique_ptr<MemProfSummary> getSummary();
+};
+
+} // namespace memprof
+} // namespace llvm
+
+#endif // LLVM_PROFILEDATA_MEMPROFSUMMARYBUILDER_H
diff --git a/llvm/lib/Analysis/MemoryProfileInfo.cpp b/llvm/lib/Analysis/MemoryProfileInfo.cpp
index 773d0b2f53e09..263241c3edf5a 100644
--- a/llvm/lib/Analysis/MemoryProfileInfo.cpp
+++ b/llvm/lib/Analysis/MemoryProfileInfo.cpp
@@ -21,34 +21,6 @@ using namespace llvm::memprof;
#define DEBUG_TYPE "memory-profile-info"
-// Upper bound on lifetime access density (accesses per byte per lifetime sec)
-// for marking an allocation cold.
-LLVM_ABI cl::opt<float> MemProfLifetimeAccessDensityColdThreshold(
- "memprof-lifetime-access-density-cold-threshold", cl::init(0.05),
- cl::Hidden,
- cl::desc("The threshold the lifetime access density (accesses per byte per "
- "lifetime sec) must be under to consider an allocation cold"));
-
-// Lower bound on lifetime to mark an allocation cold (in addition to accesses
-// per byte per sec above). This is to avoid pessimizing short lived objects.
-LLVM_ABI cl::opt<unsigned> MemProfAveLifetimeColdThreshold(
- "memprof-ave-lifetime-cold-threshold", cl::init(200), cl::Hidden,
- cl::desc("The average lifetime (s) for an allocation to be considered "
- "cold"));
-
-// Lower bound on average lifetime accesses density (total life time access
-// density / alloc count) for marking an allocation hot.
-LLVM_ABI cl::opt<unsigned> MemProfMinAveLifetimeAccessDensityHotThreshold(
- "memprof-min-ave-lifetime-access-density-hot-threshold", cl::init(1000),
- cl::Hidden,
- cl::desc("The minimum TotalLifetimeAccessDensity / AllocCount for an "
- "allocation to be considered hot"));
-
-LLVM_ABI cl::opt<bool>
- MemProfUseHotHints("memprof-use-hot-hints", cl::init(false), cl::Hidden,
- cl::desc("Enable use of hot hints (only supported for "
- "unambigously hot allocations)"));
-
cl::opt<bool> MemProfReportHintedSizes(
"memprof-report-hinted-sizes", cl::init(false), cl::Hidden,
cl::desc("Report total allocation sizes of hinted allocations"));
@@ -73,28 +45,6 @@ cl::opt<unsigned> MinCallsiteColdBytePercent(
cl::desc("Min percent of cold bytes at a callsite to discard non-cold "
"contexts"));
-AllocationType llvm::memprof::getAllocType(uint64_t TotalLifetimeAccessDensity,
- uint64_t AllocCount,
- uint64_t TotalLifetime) {
- // The access densities are multiplied by 100 to hold 2 decimal places of
- // precision, so need to divide by 100.
- if (((float)TotalLifetimeAccessDensity) / AllocCount / 100 <
- MemProfLifetimeAccessDensityColdThreshold
- // Lifetime is expected to be in ms, so convert the threshold to ms.
- && ((float)TotalLifetime) / AllocCount >=
- MemProfAveLifetimeColdThreshold * 1000)
- return AllocationType::Cold;
-
- // The access densities are multiplied by 100 to hold 2 decimal places of
- // precision, so need to divide by 100.
- if (MemProfUseHotHints &&
- ((float)TotalLifetimeAccessDensity) / AllocCount / 100 >
- MemProfMinAveLifetimeAccessDensityHotThreshold)
- return AllocationType::Hot;
-
- return AllocationType::NotCold;
-}
-
MDNode *llvm::memprof::buildCallstackMetadata(ArrayRef<uint64_t> CallStack,
LLVMContext &Ctx) {
SmallVector<Metadata *, 8> StackVals;
diff --git a/llvm/lib/ProfileData/CMakeLists.txt b/llvm/lib/ProfileData/CMakeLists.txt
index ca9ea3205ee1d..b1a0d8d707287 100644
--- a/llvm/lib/ProfileData/CMakeLists.txt
+++ b/llvm/lib/ProfileData/CMakeLists.txt
@@ -10,6 +10,8 @@ add_llvm_component_library(LLVMProfileData
MemProf.cpp
MemProfReader.cpp
MemProfRadixTree.cpp
+ MemProfSummary.cpp
+ MemProfSummaryBuilder.cpp
PGOCtxProfReader.cpp
PGOCtxProfWriter.cpp
ProfileSummaryBuilder.cpp
diff --git a/llvm/lib/ProfileData/IndexedMemProfData.cpp b/llvm/lib/ProfileData/IndexedMemProfData.cpp
index 7398e4c468bbe..25786cc6969d6 100644
--- a/llvm/lib/ProfileData/IndexedMemProfData.cpp
+++ b/llvm/lib/ProfileData/IndexedMemProfData.cpp
@@ -15,6 +15,7 @@
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/ProfileData/MemProf.h"
#include "llvm/ProfileData/MemProfRadixTree.h"
+#include "llvm/ProfileData/MemProfSummary.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/OnDiskHashTable.h"
@@ -220,7 +221,8 @@ static Error writeMemProfRadixTreeBased(
ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
memprof::IndexedVersion Version, bool MemProfFullSchema,
std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData =
- nullptr) {
+ nullptr,
+ memprof::MemProfSummary *MemProfSum = nullptr) {
assert((Version == memprof::Version3 || Version == memprof::Version4) &&
"Unsupported version for radix tree format");
@@ -232,6 +234,9 @@ static Error writeMemProfRadixTreeBased(
if (Version >= memprof::Version4)
OS.write(0ULL); // Reserve space for the data access profile offset.
+ if (Version == memprof::Version4)
+ MemProfSum->write(OS);
+
auto Schema = memprof::getHotColdSchema();
if (MemProfFullSchema)
Schema = memprof::getFullSchema();
@@ -297,17 +302,19 @@ static Error writeMemProfV3(ProfOStream &OS,
static Error writeMemProfV4(
ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
bool MemProfFullSchema,
- std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData) {
- return writeMemProfRadixTreeBased(OS, MemProfData, memprof::Version4,
- MemProfFullSchema,
- std::move(DataAccessProfileData));
+ std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData,
+ memprof::MemProfSummary *MemProfSum) {
+ return writeMemProfRadixTreeBased(
+ OS, MemProfData, memprof::Version4, MemProfFullSchema,
+ std::move(DataAccessProfileData), MemProfSum);
}
// Write out the MemProf data in a requested version.
Error writeMemProf(
ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema,
- std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData) {
+ std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData,
+ memprof::MemProfSummary *MemProfSum) {
switch (MemProfVersionRequested) {
case memprof::Version2:
return writeMemProfV2(OS, MemProfData, MemProfFullSchema);
@@ -315,7 +322,7 @@ Error writeMemProf(
return writeMemProfV3(OS, MemProfData, MemProfFullSchema);
case memprof::Version4:
return writeMemProfV4(OS, MemProfData, MemProfFullSchema,
- std::move(DataAccessProfileData));
+ std::move(DataAccessProfileData), MemProfSum);
}
return make_error<InstrProfError>(
@@ -395,9 +402,11 @@ Error IndexedMemProfReader::deserializeRadixTreeBased(
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
uint64_t DataAccessProfOffset = 0;
- if (Version == memprof::Version4)
+ if (Version == memprof::Version4) {
DataAccessProfOffset =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+ MemProfSum = memprof::MemProfSummary::deserialize(Ptr);
+ }
// Read the schema.
auto SchemaOr = memprof::readMemProfSchema(Ptr);
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 039e1bc955cd4..7a4981124762c 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -240,6 +240,7 @@ void InstrProfWriter::addMemProfRecord(
Alloc.Info.setTotalLifetime(NewTL);
}
}
+ MemProfSumBuilder.addRecord(NewRecord);
auto [Iter, Inserted] = MemProfData.Records.insert({Id, NewRecord});
// If we inserted a new record then we are done.
if (Inserted) {
@@ -308,11 +309,16 @@ bool InstrProfWriter::addMemProfData(memprof::IndexedMemProfData Incoming,
return false;
// Add one record at a time if randomization is requested.
- if (MemProfData.Records.empty() && !MemprofGenerateRandomHotness)
+ if (MemProfData.Records.empty() && !MemprofGenerateRandomHotness) {
+ // Need to manually add each record to the builder, which is otherwise done
+ // in addMemProfRecord.
+ for (const auto &[GUID, Record] : Incoming.Records)
+ MemProfSumBuilder.addRecord(Record);
MemProfData.Records = std::move(Incoming.Records);
- else
+ } else {
for (const auto &[GUID, Record] : Incoming.Records)
addMemProfRecord(GUID, Record);
+ }
return true;
}
@@ -612,10 +618,12 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf)) {
MemProfSectionStart = OS.tell();
- if (auto E =
- writeMemProf(OS, MemProfData, MemProfVersionRequested,
- MemProfFullSchema, std::move(DataAccessProfileData)))
+ // Get the finalized MemProf summary that was built when adding records.
+ auto MemProfSum = MemProfSumBuilder.getSummary();
+ if (auto E = writeMemProf(
+ OS, MemProfData, MemProfVersionRequested, MemProfFullSchema,
+ std::move(DataAccessProfileData), MemProfSum.get()))
return E;
}
diff --git a/llvm/lib/ProfileData/MemProfReader.cpp b/llvm/lib/ProfileData/MemProfReader.cpp
index 9c723e495e7f9..235b1347e0077 100644
--- a/llvm/lib/ProfileData/MemProfReader.cpp
+++ b/llvm/lib/ProfileData/MemProfReader.cpp
@@ -32,6 +32,7 @@
#include "llvm/ProfileData/MemProf.h"
#include "llvm/ProfileData/MemProfData.inc"
#include "llvm/ProfileData/MemProfReader.h"
+#include "llvm/ProfileData/MemProfSummaryBuilder.h"
#include "llvm/ProfileData/MemProfYAML.h"
#include "llvm/ProfileData/SampleProf.h"
#include "llvm/Support/Debug.h"
@@ -306,8 +307,10 @@ bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) {
}
void RawMemProfReader::printYAML(raw_ostream &OS) {
+ MemProfSummaryBuilder MemProfSumBuilder;
uint64_t NumAllocFunctions = 0, NumMibInfo = 0;
for (const auto &KV : MemProfData.Records) {
+ MemProfSumBuilder.addRecord(KV.second);
const size_t NumAllocSites = KV.second.AllocSites.size();
if (NumAllocSites > 0) {
NumAllocFunctions++;
@@ -315,6 +318,10 @@ void RawMemProfReader::printYAML(raw_ostream &OS) {
}
}
+ // Print the summary first, as it is printed as YAML comments.
+ auto MemProfSum = MemProfSumBuilder.getSummary();
+ MemProfSum->printSummaryYaml(OS);
+
OS << "MemprofProfile:\n";
OS << " Summary:\n";
OS << " Version: " << MemprofRawVersion << "\n";
diff --git a/llvm/lib/ProfileData/MemProfSummary.cpp b/llvm/lib/ProfileData/MemProfSummary.cpp
new file mode 100644
index 0000000000000..ac1396bfcbfd0
--- /dev/null
+++ b/llvm/lib/ProfileData/MemProfSummary.cpp
@@ -0,0 +1,148 @@
+//=-- MemProfSummary.cpp - MemProf summary support ---------------=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains MemProf summary support and related interfaces.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ProfileData/MemProfSummary.h"
+#include "llvm/Support/BLAKE3.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/HashBuilder.h"
+
+using namespace llvm;
+using namespace llvm::memprof;
+
+// Upper bound on lifetime access density (accesses per byte per lifetime sec)
+// for marking an allocation cold.
+cl::opt<float> MemProfLifetimeAccessDensityColdThreshold(
+ "memprof-lifetime-access-density-cold-threshold", cl::init(0.05),
+ cl::Hidden,
+ cl::desc("The threshold the lifetime access density (accesses per byte per "
+ "lifetime sec) must be under to consider an allocation cold"));
+
+// Lower bound on lifetime to mark an allocation cold (in addition to accesses
+// per byte per sec above). This is to avoid pessimizing short lived objects.
+cl::opt<unsigned> MemProfAveLifetimeColdThreshold(
+ "memprof-ave-lifetime-cold-threshold", cl::init(200), cl::Hidden,
+ cl::desc("The average lifetime (s) for an allocation to be considered "
+ "cold"));
+
+// Lower bound on average lifetime accesses density (total life time access
+// density / alloc count) for marking an allocation hot.
+cl::opt<unsigned> MemProfMinAveLifetimeAccessDensityHotThreshold(
+ "memprof-min-ave-lifetime-access-density-hot-threshold", cl::init(1000),
+ cl::Hidden,
+ cl::desc("The minimum TotalLifetimeAccessDensity / AllocCount for an "
+ "allocation to be considered hot"));
+
+cl::opt<bool>
+ MemProfUseHotHints("memprof-use-hot-hints", cl::init(false), cl::Hidden,
+ cl::desc("Enable use of hot hints (only supported for "
+ "unambigously hot allocations)"));
+
+AllocationType llvm::memprof::getAllocType(uint64_t TotalLifetimeAccessDensity,
+ uint64_t AllocCount,
+ uint64_t TotalLifetime) {
+ // The access densities are multiplied by 100 to hold 2 decimal places of
+ // precision, so need to divide by 100.
+ if (((float)TotalLifetimeAccessDensity) / AllocCount / 100 <
+ MemProfLifetimeAccessDensityColdThreshold
+ // Lifetime is expected to be in ms, so convert the threshold to ms.
+ && ((float)TotalLifetime) / AllocCount >=
+ MemProfAveLifetimeColdThreshold * 1000)
+ return AllocationType::Cold;
+
+ // The access densities are multiplied by 100 to hold 2 decimal places of
+ // precision, so need to divide by 100.
+ if (MemProfUseHotHints &&
+ ((float)TotalLifetimeAccessDensity) / AllocCount / 100 >
+ MemProfMinAveLifetimeAccessDensityHotThreshold)
+ return AllocationType::Hot;
+
+ return AllocationType::NotCold;
+}
+
+uint64_t llvm::memprof::computeFullStackId(ArrayRef<Frame> CallStack) {
+ llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little>
+ HashBuilder;
+ for (auto &F : CallStack)
+ HashBuilder.add(F.Function, F.LineOffset, F.Column);
+ llvm::BLAKE3Result<8> Hash = HashBuilder.final();
+ uint64_t Id;
+ std::memcpy(&Id, Hash.data(), sizeof(Hash));
+ return Id;
+}
+
+void MemProfSummary::printSummaryYaml(raw_ostream &OS) const {
+ // For now emit as YAML comments, since they aren't read on input.
+ OS << "---\n";
+ OS << "# MemProfSummary:\n";
+ OS << "# Total contexts: " << NumContexts << "\n";
+ OS << "# Total cold contexts: " << NumColdContexts << "\n";
+ OS << "# Total hot contexts: " << NumHotContexts << "\n";
+ OS << "# Maximum cold context total size: " << MaxColdTotalSize << "\n";
+ OS << "# Maximum warm context total size: " << MaxWarmTotalSize << "\n";
+ OS << "# Maximum hot context total size: " << MaxHotTotalSize << "\n";
+}
+
+void MemProfSummary::write(ProfOStream &OS) const {
+ // Write the current number of fields first, which helps enable backwards and
+ // forwards compatibility (see comment in header).
+ OS.write32(memprof::MemProfSummary::getNumSummaryFields());
+ auto StartPos = OS.tell();
+ (void)StartPos;
+ OS.write(NumContexts);
+ OS.write(NumColdContexts);
+ OS.write(NumHotContexts);
+ OS.write(MaxColdTotalSize);
+ OS.write(MaxWarmTotalSize);
+ OS.write(MaxHotTotalSize);
+ // Sanity check that the number of fields was kept in sync with actual fields.
+ assert((OS.tell() - StartPos) / 8 == MemProfSummary::getNumSummaryFields());
+}
+
+std::unique_ptr<MemProfSummary>
+MemProfSummary::deserialize(const unsigned char *&Ptr) {
+ auto NumSummaryFields =
+ support::endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
+ // The initial version of the summary contains 6 fields. To support backwards
+ // compatibility with older profiles, if new summary fields are added (until a
+ // version bump) this code will need to check NumSummaryFields against the
+ // current value of MemProfSummary::getNumSummaryFields(). If NumSummaryFields
+ // is lower then default values will need to be filled in for the newer fields
+ // instead of trying to read them from the profile.
+ //
+ // For now, assert that the profile contains at least as many fields as
+ // expected by the code.
+ assert(NumSummaryFields >= MemProfSummary::getNumSummaryFields());
+
+ auto NumFieldsReadAndSaved =
+ std::min(NumSummaryFields, MemProfSummary::getNumSummaryFields());
+ (void)NumFieldsReadAndSaved;
+ auto StartPos = Ptr;
+ (void)StartPos;
+
+ auto MemProfSum = std::make_unique<MemProfSummary>(
+ support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr),
+ support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr),
+ support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr),
+ support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr),
+ support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr),
+ support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr));
+ // Sanity check that the number of fields specified in summary was kept in
+ // sync with the fields being read and saved.
+ assert((Ptr - StartPos) / 8 == NumFieldsReadAndSaved);
+
+ // Enable forwards compatibility by reading and discarding any additional
+ // fields in the profile's summary.
+ while (NumSummaryFields-- > MemProfSummary::getNumSummaryFields())
+ (void)support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+
+ return MemProfSum;
+}
diff --git a/llvm/lib/ProfileData/MemProfSummaryBuilder.cpp b/llvm/lib/ProfileData/MemProfSummaryBuilder.cpp
new file mode 100644
index 0000000000000..591bcb6b8a3e5
--- /dev/null
+++ b/llvm/lib/ProfileData/MemProfSummaryBuilder.cpp
@@ -0,0 +1,61 @@
+//=-- MemProfSummaryBuilder.cpp - MemProf summary building ---------------=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains MemProf summary builder.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ProfileData/MemProfSummaryBuilder.h"
+
+using namespace llvm;
+using namespace llvm::memprof;
+
+std::unique_ptr<MemProfSummary> MemProfSummaryBuilder::getSummary() {
+ return std::make_unique<MemProfSummary>(NumContexts, NumColdContexts,
+ NumHotContexts, MaxColdTotalSize,
+ MaxWarmTotalSize, MaxHotTotalSize);
+}
+
+void MemProfSummaryBuilder::addRecord(uint64_t CSId,
+ const PortableMemInfoBlock &Info) {
+ auto I = Contexts.insert(CSId);
+ if (!I.second)
+ return;
+ NumContexts++;
+ auto AllocType = getAllocType(Info.getTotalLifetimeAccessDensity(),
+ Info.getAllocCount(), Info.getTotalLifetime());
+ auto TotalSize = Info.getTotalSize();
+ switch (AllocType) {
+ case AllocationType::Cold:
+ NumColdContexts++;
+ if (TotalSize > MaxColdTotalSize)
+ MaxColdTotalSize = TotalSize;
+ break;
+ case AllocationType::NotCold:
+ if (TotalSize > MaxWarmTotalSize)
+ MaxWarmTotalSize = TotalSize;
+ break;
+ case AllocationType::Hot:
+ NumHotContexts++;
+ if (TotalSize > MaxHotTotalSize)
+ MaxHotTotalSize = TotalSize;
+ break;
+ default:
+ assert(false);
+ }
+}
+
+void MemProfSummaryBuilder::addRecord(const IndexedMemProfRecord &Record) {
+ for (auto &Alloc : Record.AllocSites)
+ addRecord(Alloc.CSId, Alloc.Info);
+}
+
+void MemProfSummaryBuilder::addRecord(const MemProfRecord &Record) {
+ for (auto &Alloc : Record.AllocSites)
+ addRecord(computeFullStackId(Alloc.CallStack), Alloc.Info);
+}
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 5982476f3994e..06e4beb98b80a 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -35,6 +35,7 @@
#include "llvm/IR/Value.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/InstrProfReader.h"
+#include "llvm/ProfileData/MemProfSummary.h"
#include "llvm/Support/BLAKE3.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -740,19 +741,6 @@ static uint64_t computeStackId(const memprof::Frame &Frame) {
return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column);
}
-// Helper to generate a single hash id for a given callstack, used for emitting
-// matching statistics and useful for uniquing such statistics across modules.
-static uint64_t computeFullStackId(ArrayRef<Frame> CallStack) {
- llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little>
- HashBuilder;
- for (auto &F : CallStack)
- HashBuilder.add(F.Function, F.LineOffset, F.Column);
- llvm::BLAKE3Result<8> Hash = HashBuilder.final();
- uint64_t Id;
- std::memcpy(&Id, Hash.data(), sizeof(Hash));
- return Id;
-}
-
static AllocationType addCallStack(CallStackTrie &AllocTrie,
const AllocationInfo *AllocInfo,
uint64_t FullStackId) {
diff --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll
index 4a3ddcc38b263..a1241046d7585 100644
--- a/llvm/test/Transforms/PGOProfile/memprof.ll
+++ b/llvm/test/Transforms/PGOProfile/memprof.ll
@@ -20,6 +20,17 @@
; RUN: llvm-profdata merge %S/Inputs/memprof_pgo.proftext -o %t.pgoprofdata
; RUN: llvm-profdata merge %S/Inputs/memprof.nocolinfo.memprofraw --profiled-binary %S/Inputs/memprof.nocolinfo.exe -o %t.nocolinfo.memprofdata
+;; Check that the summary can be shown (and is identical) for both the raw and indexed profiles.
+; RUN: llvm-profdata show --memory %S/Inputs/memprof.memprofraw --profiled-binary %S/Inputs/memprof.exe | FileCheck %s --check-prefixes=SUMMARY
+; RUN: llvm-profdata show --memory %t.memprofdata | FileCheck %s --check-prefixes=SUMMARY
+; SUMMARY: # MemProfSummary:
+; SUMMARY: # Total contexts: 8
+; SUMMARY: # Total cold contexts: 5
+; SUMMARY: # Total hot contexts: 0
+; SUMMARY: # Maximum cold context total size: 10
+; SUMMARY: # Maximum warm context total size: 10
+; SUMMARY: # Maximum hot context total size: 0
+
;; In all below cases we should not get any messages about missing profile data
;; for any functions. Either we are not performing any matching for a particular
;; profile type or we are performing the matching and it should be successful.
diff --git a/llvm/test/Transforms/PGOProfile/memprof_undrift_missing_leaf.ll b/llvm/test/Transforms/PGOProfile/memprof_undrift_missing_leaf.ll
index ec8ee3d7930cf..e512728d30628 100644
--- a/llvm/test/Transforms/PGOProfile/memprof_undrift_missing_leaf.ll
+++ b/llvm/test/Transforms/PGOProfile/memprof_undrift_missing_leaf.ll
@@ -16,6 +16,8 @@ HeapProfileRecords:
MemInfoBlock:
AllocCount: 1
TotalSize: 1
+ TotalLifetime: 1
+ TotalLifetimeAccessDensity: 0
CallSites: []
...
;--- memprof_missing_leaf.ll
diff --git a/llvm/test/tools/llvm-profdata/memprof-yaml.test b/llvm/test/tools/llvm-profdata/memprof-yaml.test
index 0caa1fe5d9fd3..2e52b796ceedc 100644
--- a/llvm/test/tools/llvm-profdata/memprof-yaml.test
+++ b/llvm/test/tools/llvm-profdata/memprof-yaml.test
@@ -20,6 +20,14 @@
;--- memprof-in.yaml
---
+# MemProfSummary:
+# Total contexts: 2
+# Total cold contexts: 0
+# Total hot contexts: 0
+# Maximum cold context total size: 0
+# Maximum warm context total size: 666
+# Maximum hot context total size: 0
+---
HeapProfileRecords:
- GUID: 0xdeadbeef12345678
AllocSites:
@@ -69,6 +77,14 @@ DataAccessProfiles:
...
;--- memprof-in-v3.yaml
---
+# MemProfSummary:
+# Total contexts: 2
+# Total cold contexts: 0
+# Total hot contexts: 0
+# Maximum cold context total size: 0
+# Maximum warm context total size: 666
+# Maximum hot context total size: 0
+---
HeapProfileRecords:
- GUID: 0xdeadbeef12345678
AllocSites:
@@ -98,6 +114,14 @@ HeapProfileRecords:
...
;--- memprof-in-no-dap.yaml
---
+# MemProfSummary:
+# Total contexts: 2
+# Total cold contexts: 0
+# Total hot contexts: 0
+# Maximum cold context total size: 0
+# Maximum warm context total size: 666
+# Maximum hot context total size: 0
+---
HeapProfileRecords:
- GUID: 0xdeadbeef12345678
AllocSites:
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 8660eed6be2bf..70a6888ac663d 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -22,6 +22,7 @@
#include "llvm/ProfileData/InstrProfWriter.h"
#include "llvm/ProfileData/MemProf.h"
#include "llvm/ProfileData/MemProfReader.h"
+#include "llvm/ProfileData/MemProfSummaryBuilder.h"
#include "llvm/ProfileData/MemProfYAML.h"
#include "llvm/ProfileData/ProfileCommon.h"
#include "llvm/ProfileData/SampleProfReader.h"
@@ -3312,6 +3313,19 @@ static int showMemProfProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
auto Reader = std::move(ReaderOrErr.get());
memprof::AllMemProfData Data = Reader->getAllMemProfData();
+
+ // For v4 and above the summary is serialized in the indexed profile, and can
+ // be accessed from the reader. Earlier versions build the summary below.
+ // The summary is emitted as YAML comments at the start of the output.
+ auto *MemProfSum = Reader->getMemProfSummary();
+ if (MemProfSum) {
+ MemProfSum->printSummaryYaml(OS);
+ } else {
+ memprof::MemProfSummaryBuilder MemProfSumBuilder;
+ for (auto &Pair : Data.HeapProfileRecords)
+ MemProfSumBuilder.addRecord(Pair.Record);
+ MemProfSumBuilder.getSummary()->printSummaryYaml(OS);
+ }
// Construct yaml::Output with the maximum column width of 80 so that each
// Frame fits in one line.
yaml::Output Yout(OS, nullptr, 80);
diff --git a/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp b/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp
index ee2342d164e8c..6eef0b5f91719 100644
--- a/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp
+++ b/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp
@@ -23,12 +23,7 @@
using namespace llvm;
using namespace llvm::memprof;
-LLVM_ABI extern cl::opt<float> MemProfLifetimeAccessDensityColdThreshold;
-LLVM_ABI extern cl::opt<unsigned> MemProfAveLifetimeColdThreshold;
-LLVM_ABI extern cl::opt<unsigned>
- MemProfMinAveLifetimeAccessDensityHotThreshold;
-LLVM_ABI extern cl::opt<bool> MemProfUseHotHints;
-LLVM_ABI extern cl::opt<bool> MemProfKeepAllNotColdContexts;
+extern cl::opt<bool> MemProfKeepAllNotColdContexts;
namespace {
@@ -63,70 +58,6 @@ class MemoryProfileInfoTest : public testing::Test {
}
};
-// Test getAllocType helper.
-// Basic checks on the allocation type for values just above and below
-// the thresholds.
-TEST_F(MemoryProfileInfoTest, GetAllocType) {
- const uint64_t AllocCount = 2;
- // To be cold we require that
- // ((float)TotalLifetimeAccessDensity) / AllocCount / 100 <
- // MemProfLifetimeAccessDensityColdThreshold
- // so compute the ColdTotalLifetimeAccessDensityThreshold at the threshold.
- const uint64_t ColdTotalLifetimeAccessDensityThreshold =
- (uint64_t)(MemProfLifetimeAccessDensityColdThreshold * AllocCount * 100);
- // To be cold we require that
- // ((float)TotalLifetime) / AllocCount >=
- // MemProfAveLifetimeColdThreshold * 1000
- // so compute the TotalLifetime right at the threshold.
- const uint64_t ColdTotalLifetimeThreshold =
- MemProfAveLifetimeColdThreshold * AllocCount * 1000;
- // To be hot we require that
- // ((float)TotalLifetimeAccessDensity) / AllocCount / 100 >
- // MemProfMinAveLifetimeAccessDensityHotThreshold
- // so compute the HotTotalLifetimeAccessDensityThreshold at the threshold.
- const uint64_t HotTotalLifetimeAccessDensityThreshold =
- (uint64_t)(MemProfMinAveLifetimeAccessDensityHotThreshold * AllocCount *
- 100);
-
- // Make sure the option for detecting hot allocations is set.
- bool OrigMemProfUseHotHints = MemProfUseHotHints;
- MemProfUseHotHints = true;
-
- // Test Hot
- // More accesses per byte per sec than hot threshold is hot.
- EXPECT_EQ(getAllocType(HotTotalLifetimeAccessDensityThreshold + 1, AllocCount,
- ColdTotalLifetimeThreshold + 1),
- AllocationType::Hot);
-
- // Restore original option value.
- MemProfUseHotHints = OrigMemProfUseHotHints;
-
- // Without MemProfUseHotHints (default) we should treat simply as NotCold.
- EXPECT_EQ(getAllocType(HotTotalLifetimeAccessDensityThreshold + 1, AllocCount,
- ColdTotalLifetimeThreshold + 1),
- AllocationType::NotCold);
-
- // Test Cold
- // Long lived with less accesses per byte per sec than cold threshold is cold.
- EXPECT_EQ(getAllocType(ColdTotalLifetimeAccessDensityThreshold - 1, AllocCount,
- ColdTotalLifetimeThreshold + 1),
- AllocationType::Cold);
-
- // Test NotCold
- // Long lived with more accesses per byte per sec than cold threshold is not cold.
- EXPECT_EQ(getAllocType(ColdTotalLifetimeAccessDensityThreshold + 1, AllocCount,
- ColdTotalLifetimeThreshold + 1),
- AllocationType::NotCold);
- // Short lived with more accesses per byte per sec than cold threshold is not cold.
- EXPECT_EQ(getAllocType(ColdTotalLifetimeAccessDensityThreshold + 1, AllocCount,
- ColdTotalLifetimeThreshold - 1),
- AllocationType::NotCold);
- // Short lived with less accesses per byte per sec than cold threshold is not cold.
- EXPECT_EQ(getAllocType(ColdTotalLifetimeAccessDensityThreshold - 1, AllocCount,
- ColdTotalLifetimeThreshold - 1),
- AllocationType::NotCold);
-}
-
// Test the hasSingleAllocType helper.
TEST_F(MemoryProfileInfoTest, SingleAllocType) {
uint8_t NotCold = (uint8_t)AllocationType::NotCold;
diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp
index 7a2c4aff32c53..ee06c2391e207 100644
--- a/llvm/unittests/ProfileData/MemProfTest.cpp
+++ b/llvm/unittests/ProfileData/MemProfTest.cpp
@@ -18,14 +18,23 @@
#include "llvm/ProfileData/MemProfData.inc"
#include "llvm/ProfileData/MemProfRadixTree.h"
#include "llvm/ProfileData/MemProfReader.h"
+#include "llvm/ProfileData/MemProfSummary.h"
#include "llvm/Support/raw_ostream.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include <initializer_list>
+using namespace llvm;
+
+extern cl::opt<float> MemProfLifetimeAccessDensityColdThreshold;
+extern cl::opt<unsigned> MemProfAveLifetimeColdThreshold;
+extern cl::opt<unsigned> MemProfMinAveLifetimeAccessDensityHotThreshold;
+extern cl::opt<bool> MemProfUseHotHints;
+
namespace llvm {
namespace memprof {
+
namespace {
using ::llvm::DIGlobal;
@@ -859,6 +868,74 @@ TotalLifetimeAccessDensity: 444
...
)YAML");
}
+
+// Test getAllocType helper.
+// Basic checks on the allocation type for values just above and below
+// the thresholds.
+TEST(MemProf, GetAllocType) {
+ const uint64_t AllocCount = 2;
+ // To be cold we require that
+ // ((float)TotalLifetimeAccessDensity) / AllocCount / 100 <
+ // MemProfLifetimeAccessDensityColdThreshold
+ // so compute the ColdTotalLifetimeAccessDensityThreshold at the threshold.
+ const uint64_t ColdTotalLifetimeAccessDensityThreshold =
+ (uint64_t)(MemProfLifetimeAccessDensityColdThreshold * AllocCount * 100);
+ // To be cold we require that
+ // ((float)TotalLifetime) / AllocCount >=
+ // MemProfAveLifetimeColdThreshold * 1000
+ // so compute the TotalLifetime right at the threshold.
+ const uint64_t ColdTotalLifetimeThreshold =
+ MemProfAveLifetimeColdThreshold * AllocCount * 1000;
+ // To be hot we require that
+ // ((float)TotalLifetimeAccessDensity) / AllocCount / 100 >
+ // MemProfMinAveLifetimeAccessDensityHotThreshold
+ // so compute the HotTotalLifetimeAccessDensityThreshold at the threshold.
+ const uint64_t HotTotalLifetimeAccessDensityThreshold =
+ (uint64_t)(MemProfMinAveLifetimeAccessDensityHotThreshold * AllocCount *
+ 100);
+
+ // Make sure the option for detecting hot allocations is set.
+ bool OrigMemProfUseHotHints = MemProfUseHotHints;
+ MemProfUseHotHints = true;
+
+ // Test Hot
+ // More accesses per byte per sec than hot threshold is hot.
+ EXPECT_EQ(getAllocType(HotTotalLifetimeAccessDensityThreshold + 1, AllocCount,
+ ColdTotalLifetimeThreshold + 1),
+ AllocationType::Hot);
+
+ // Restore original option value.
+ MemProfUseHotHints = OrigMemProfUseHotHints;
+
+ // Without MemProfUseHotHints (default) we should treat simply as NotCold.
+ EXPECT_EQ(getAllocType(HotTotalLifetimeAccessDensityThreshold + 1, AllocCount,
+ ColdTotalLifetimeThreshold + 1),
+ AllocationType::NotCold);
+
+ // Test Cold
+ // Long lived with less accesses per byte per sec than cold threshold is cold.
+ EXPECT_EQ(getAllocType(ColdTotalLifetimeAccessDensityThreshold - 1,
+ AllocCount, ColdTotalLifetimeThreshold + 1),
+ AllocationType::Cold);
+
+ // Test NotCold
+ // Long lived with more accesses per byte per sec than cold threshold is not
+ // cold.
+ EXPECT_EQ(getAllocType(ColdTotalLifetimeAccessDensityThreshold + 1,
+ AllocCount, ColdTotalLifetimeThreshold + 1),
+ AllocationType::NotCold);
+ // Short lived with more accesses per byte per sec than cold threshold is not
+ // cold.
+ EXPECT_EQ(getAllocType(ColdTotalLifetimeAccessDensityThreshold + 1,
+ AllocCount, ColdTotalLifetimeThreshold - 1),
+ AllocationType::NotCold);
+ // Short lived with less accesses per byte per sec than cold threshold is not
+ // cold.
+ EXPECT_EQ(getAllocType(ColdTotalLifetimeAccessDensityThreshold - 1,
+ AllocCount, ColdTotalLifetimeThreshold - 1),
+ AllocationType::NotCold);
+}
+
} // namespace
} // namespace memprof
} // namespace llvm
>From 2155eef9c49d693f129a5b1a944ac42175804fdd Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Wed, 28 May 2025 09:52:04 -0700
Subject: [PATCH 2/3] A couple of misc fixes after reviewing the changes
myself: - fix comment typo - combine version check in writer with earlier one
for the data access profile offset, which also corrects it to a >= v4 check -
fix the corresponding (pre-existing) check in the reader to also be >= v4
instead of == v4.
---
llvm/include/llvm/ProfileData/InstrProfWriter.h | 2 +-
llvm/lib/ProfileData/IndexedMemProfData.cpp | 6 +++---
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h
index 8bf1efffc7c8c..ee541c465bd16 100644
--- a/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -85,7 +85,7 @@ class InstrProfWriter {
std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData;
- // MemProf ummary builder to which records are added as MemProf data is added
+ // MemProf summary builder to which records are added as MemProf data is added
// to the writer.
memprof::MemProfSummaryBuilder MemProfSumBuilder;
diff --git a/llvm/lib/ProfileData/IndexedMemProfData.cpp b/llvm/lib/ProfileData/IndexedMemProfData.cpp
index 25786cc6969d6..cbff51e3cbdc6 100644
--- a/llvm/lib/ProfileData/IndexedMemProfData.cpp
+++ b/llvm/lib/ProfileData/IndexedMemProfData.cpp
@@ -231,11 +231,11 @@ static Error writeMemProfRadixTreeBased(
OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
OS.write(0ULL); // Reserve space for the memprof record payload offset.
OS.write(0ULL); // Reserve space for the memprof record table offset.
- if (Version >= memprof::Version4)
+ if (Version >= memprof::Version4) {
OS.write(0ULL); // Reserve space for the data access profile offset.
- if (Version == memprof::Version4)
MemProfSum->write(OS);
+ }
auto Schema = memprof::getHotColdSchema();
if (MemProfFullSchema)
@@ -402,7 +402,7 @@ Error IndexedMemProfReader::deserializeRadixTreeBased(
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
uint64_t DataAccessProfOffset = 0;
- if (Version == memprof::Version4) {
+ if (Version >= memprof::Version4) {
DataAccessProfOffset =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
MemProfSum = memprof::MemProfSummary::deserialize(Ptr);
>From b294d0cac7cb833b0f28111056f2a2da2e9578a3 Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Wed, 28 May 2025 11:47:07 -0700
Subject: [PATCH 3/3] Address comments
---
.../include/llvm/ProfileData/MemProfSummary.h | 4 +--
.../llvm/ProfileData/MemProfSummaryBuilder.h | 4 +++
llvm/lib/ProfileData/MemProfSummary.cpp | 30 +++++++------------
llvm/tools/llvm-profdata/llvm-profdata.cpp | 3 +-
4 files changed, 17 insertions(+), 24 deletions(-)
diff --git a/llvm/include/llvm/ProfileData/MemProfSummary.h b/llvm/include/llvm/ProfileData/MemProfSummary.h
index c65c04dd1f85c..112c2ae9b4b6e 100644
--- a/llvm/include/llvm/ProfileData/MemProfSummary.h
+++ b/llvm/include/llvm/ProfileData/MemProfSummary.h
@@ -37,7 +37,7 @@ class MemProfSummary {
/// reordered, and new summary fields are added after existing summary fields,
/// the MemProf indexed profile version does not need to be bumped to
/// accommodate new summary fields.
- static const unsigned NumSummaryFields = 6;
+ static constexpr unsigned NumSummaryFields = 6;
const uint64_t NumContexts, NumColdContexts, NumHotContexts;
const uint64_t MaxColdTotalSize, MaxWarmTotalSize, MaxHotTotalSize;
@@ -50,7 +50,7 @@ class MemProfSummary {
NumHotContexts(NumHotContexts), MaxColdTotalSize(MaxColdTotalSize),
MaxWarmTotalSize(MaxWarmTotalSize), MaxHotTotalSize(MaxHotTotalSize) {}
- static unsigned getNumSummaryFields() { return NumSummaryFields; }
+ static constexpr unsigned getNumSummaryFields() { return NumSummaryFields; }
uint64_t getNumContexts() const { return NumContexts; }
uint64_t getNumColdContexts() const { return NumColdContexts; }
uint64_t getNumHotContexts() const { return NumHotContexts; }
diff --git a/llvm/include/llvm/ProfileData/MemProfSummaryBuilder.h b/llvm/include/llvm/ProfileData/MemProfSummaryBuilder.h
index 61cc46cfbc214..3c615930d2210 100644
--- a/llvm/include/llvm/ProfileData/MemProfSummaryBuilder.h
+++ b/llvm/include/llvm/ProfileData/MemProfSummaryBuilder.h
@@ -21,7 +21,11 @@ namespace memprof {
class MemProfSummaryBuilder {
private:
+ // The set of full context IDs that we've recorded so far. This is needed to
+ // dedup the MIBs, which are duplicated between functions containing inline
+ // instances of the same allocations.
DenseSet<uint64_t> Contexts;
+
void addRecord(uint64_t, const PortableMemInfoBlock &);
protected:
diff --git a/llvm/lib/ProfileData/MemProfSummary.cpp b/llvm/lib/ProfileData/MemProfSummary.cpp
index ac1396bfcbfd0..116eb2f805afa 100644
--- a/llvm/lib/ProfileData/MemProfSummary.cpp
+++ b/llvm/lib/ProfileData/MemProfSummary.cpp
@@ -122,27 +122,17 @@ MemProfSummary::deserialize(const unsigned char *&Ptr) {
// expected by the code.
assert(NumSummaryFields >= MemProfSummary::getNumSummaryFields());
- auto NumFieldsReadAndSaved =
- std::min(NumSummaryFields, MemProfSummary::getNumSummaryFields());
- (void)NumFieldsReadAndSaved;
- auto StartPos = Ptr;
- (void)StartPos;
-
auto MemProfSum = std::make_unique<MemProfSummary>(
- support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr),
- support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr),
- support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr),
- support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr),
- support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr),
- support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr));
- // Sanity check that the number of fields specified in summary was kept in
- // sync with the fields being read and saved.
- assert((Ptr - StartPos) / 8 == NumFieldsReadAndSaved);
-
- // Enable forwards compatibility by reading and discarding any additional
- // fields in the profile's summary.
- while (NumSummaryFields-- > MemProfSummary::getNumSummaryFields())
- (void)support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+ support::endian::read<uint64_t, llvm::endianness::little>(Ptr),
+ support::endian::read<uint64_t, llvm::endianness::little>(Ptr + 8),
+ support::endian::read<uint64_t, llvm::endianness::little>(Ptr + 16),
+ support::endian::read<uint64_t, llvm::endianness::little>(Ptr + 24),
+ support::endian::read<uint64_t, llvm::endianness::little>(Ptr + 32),
+ support::endian::read<uint64_t, llvm::endianness::little>(Ptr + 40));
+
+ // Enable forwards compatibility by skipping past any additional fields in the
+ // profile's summary.
+ Ptr += NumSummaryFields * sizeof(uint64_t);
return MemProfSum;
}
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 70a6888ac663d..a7cbd4bfc8387 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -3317,8 +3317,7 @@ static int showMemProfProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
// For v4 and above the summary is serialized in the indexed profile, and can
// be accessed from the reader. Earlier versions build the summary below.
// The summary is emitted as YAML comments at the start of the output.
- auto *MemProfSum = Reader->getMemProfSummary();
- if (MemProfSum) {
+ if (auto *MemProfSum = Reader->getMemProfSummary()) {
MemProfSum->printSummaryYaml(OS);
} else {
memprof::MemProfSummaryBuilder MemProfSumBuilder;
More information about the llvm-commits
mailing list