[llvm-branch-commits] [llvm] [StaticDataLayout][PGO]Implement reader and writer change for data access profiles (PR #139997)
Mingming Liu via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed May 14 21:00:12 PDT 2025
https://github.com/mingmingl-llvm created https://github.com/llvm/llvm-project/pull/139997
None
>From 75878647c2c36cca00e9d003dc84bf4597e19187 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Tue, 13 May 2025 22:54:59 -0700
Subject: [PATCH] [StaticDataLayout][PGO]Implement reader and writer change for
data access profiles
---
.../include/llvm/ProfileData/DataAccessProf.h | 12 +++-
.../llvm/ProfileData/IndexedMemProfData.h | 12 +++-
.../llvm/ProfileData/InstrProfReader.h | 6 +-
.../llvm/ProfileData/InstrProfWriter.h | 6 ++
llvm/include/llvm/ProfileData/MemProfReader.h | 12 ++++
llvm/include/llvm/ProfileData/MemProfYAML.h | 65 +++++++++++++++++++
llvm/lib/ProfileData/DataAccessProf.cpp | 6 +-
llvm/lib/ProfileData/IndexedMemProfData.cpp | 61 +++++++++++++----
llvm/lib/ProfileData/InstrProfReader.cpp | 14 ++++
llvm/lib/ProfileData/InstrProfWriter.cpp | 20 ++++--
llvm/lib/ProfileData/MemProfReader.cpp | 34 ++++++++++
.../tools/llvm-profdata/memprof-yaml.test | 11 ++++
llvm/tools/llvm-profdata/llvm-profdata.cpp | 5 ++
.../ProfileData/DataAccessProfTest.cpp | 11 ++--
14 files changed, 244 insertions(+), 31 deletions(-)
diff --git a/llvm/include/llvm/ProfileData/DataAccessProf.h b/llvm/include/llvm/ProfileData/DataAccessProf.h
index e8504102238d1..f5f6abf0a2817 100644
--- a/llvm/include/llvm/ProfileData/DataAccessProf.h
+++ b/llvm/include/llvm/ProfileData/DataAccessProf.h
@@ -41,6 +41,8 @@ namespace data_access_prof {
struct SourceLocation {
SourceLocation(StringRef FileNameRef, uint32_t Line)
: FileName(FileNameRef.str()), Line(Line) {}
+
+ SourceLocation() {}
/// The filename where the data is located.
std::string FileName;
/// The line number in the source code.
@@ -53,6 +55,8 @@ namespace internal {
// which strings are owned by `DataAccessProfData`. Used by `DataAccessProfData`
// to represent data locations internally.
struct SourceLocationRef {
+ SourceLocationRef(StringRef FileNameRef, uint32_t Line)
+ : FileName(FileNameRef), Line(Line) {}
// The filename where the data is located.
StringRef FileName;
// The line number in the source code.
@@ -100,8 +104,9 @@ using SymbolHandle = std::variant<std::string, uint64_t>;
/// The data access profiles for a symbol.
struct DataAccessProfRecord {
public:
- DataAccessProfRecord(SymbolHandleRef SymHandleRef,
- ArrayRef<internal::SourceLocationRef> LocRefs) {
+ DataAccessProfRecord(SymbolHandleRef SymHandleRef, uint64_t AccessCount,
+ ArrayRef<internal::SourceLocationRef> LocRefs)
+ : AccessCount(AccessCount) {
if (std::holds_alternative<StringRef>(SymHandleRef)) {
SymHandle = std::get<StringRef>(SymHandleRef).str();
} else
@@ -110,8 +115,9 @@ struct DataAccessProfRecord {
for (auto Loc : LocRefs)
Locations.push_back(SourceLocation(Loc.FileName, Loc.Line));
}
+ DataAccessProfRecord() {}
SymbolHandle SymHandle;
-
+ uint64_t AccessCount;
// The locations of data in the source code. Optional.
SmallVector<SourceLocation> Locations;
};
diff --git a/llvm/include/llvm/ProfileData/IndexedMemProfData.h b/llvm/include/llvm/ProfileData/IndexedMemProfData.h
index 3c6c329d1c49d..66fa38472059b 100644
--- a/llvm/include/llvm/ProfileData/IndexedMemProfData.h
+++ b/llvm/include/llvm/ProfileData/IndexedMemProfData.h
@@ -10,14 +10,20 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ProfileData/DataAccessProf.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/MemProf.h"
+#include <functional>
+#include <optional>
+
namespace llvm {
// Write the MemProf data to OS.
-Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
- memprof::IndexedVersion MemProfVersionRequested,
- bool MemProfFullSchema);
+Error writeMemProf(
+ ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
+ memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema,
+ std::optional<std::reference_wrapper<data_access_prof::DataAccessProfData>>
+ DataAccessProfileData);
} // namespace llvm
diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index c250a9ede39bc..210df6be46f04 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/ProfileSummary.h"
#include "llvm/Object/BuildID.h"
+#include "llvm/ProfileData/DataAccessProf.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/InstrProfCorrelator.h"
#include "llvm/ProfileData/MemProf.h"
@@ -704,9 +705,12 @@ class IndexedMemProfReader {
// The number of elements in the radix tree array.
unsigned RadixTreeSize = 0;
+ std::unique_ptr<data_access_prof::DataAccessProfData> DataAccessProfileData;
+
Error deserializeV2(const unsigned char *Start, const unsigned char *Ptr);
Error deserializeRadixTreeBased(const unsigned char *Start,
- const unsigned char *Ptr);
+ const unsigned char *Ptr,
+ memprof::IndexedVersion Version);
public:
IndexedMemProfReader() = default;
diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h
index 67d85daa81623..cf1cec25c3cac 100644
--- a/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -19,6 +19,7 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/Object/BuildID.h"
+#include "llvm/ProfileData/DataAccessProf.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/MemProf.h"
#include "llvm/Support/Error.h"
@@ -81,6 +82,8 @@ class InstrProfWriter {
// Whether to generated random memprof hotness for testing.
bool MemprofGenerateRandomHotness;
+ std::unique_ptr<data_access_prof::DataAccessProfData> DataAccessProfileData;
+
public:
// For memprof testing, random hotness can be assigned to the contexts if
// MemprofGenerateRandomHotness is enabled. The random seed can be either
@@ -122,6 +125,9 @@ class InstrProfWriter {
// Add a binary id to the binary ids list.
void addBinaryIds(ArrayRef<llvm::object::BuildID> BIs);
+ void addDataAccessProfData(
+ std::unique_ptr<data_access_prof::DataAccessProfData> DataAccessProfile);
+
/// Merge existing function counts from the given writer.
void mergeRecordsFromWriter(InstrProfWriter &&IPW,
function_ref<void(Error)> Warn);
diff --git a/llvm/include/llvm/ProfileData/MemProfReader.h b/llvm/include/llvm/ProfileData/MemProfReader.h
index 29d9e57cae3e3..3ff5cfe76b6cb 100644
--- a/llvm/include/llvm/ProfileData/MemProfReader.h
+++ b/llvm/include/llvm/ProfileData/MemProfReader.h
@@ -50,6 +50,11 @@ class MemProfReader {
// MemProfReader no longer owns the MemProf profile.
IndexedMemProfData takeMemProfData() { return std::move(MemProfData); }
+ std::unique_ptr<data_access_prof::DataAccessProfData>
+ takeDataAccessProfData() {
+ return std::move(DataAccessProfileData);
+ }
+
virtual Error
readNextRecord(GuidMemProfRecordPair &GuidRecord,
std::function<const Frame(const FrameId)> Callback = nullptr) {
@@ -86,6 +91,11 @@ class MemProfReader {
MemProfReader(IndexedMemProfData &&MemProfData)
: MemProfData(std::move(MemProfData)) {}
+ void setDataAccessProfileData(
+ std::unique_ptr<data_access_prof::DataAccessProfData> Data) {
+ DataAccessProfileData = std::move(Data);
+ }
+
protected:
// A helper method to extract the frame from the IdToFrame map.
const Frame &idToFrame(const FrameId Id) const {
@@ -97,6 +107,8 @@ class MemProfReader {
IndexedMemProfData MemProfData;
// An iterator to the internal function profile data structure.
llvm::MapVector<GlobalValue::GUID, IndexedMemProfRecord>::iterator Iter;
+
+ std::unique_ptr<data_access_prof::DataAccessProfData> DataAccessProfileData;
};
// Map from id (recorded from sanitizer stack depot) to virtual addresses for
diff --git a/llvm/include/llvm/ProfileData/MemProfYAML.h b/llvm/include/llvm/ProfileData/MemProfYAML.h
index 08dee253f615a..8db73f4bb5a1b 100644
--- a/llvm/include/llvm/ProfileData/MemProfYAML.h
+++ b/llvm/include/llvm/ProfileData/MemProfYAML.h
@@ -2,6 +2,7 @@
#define LLVM_PROFILEDATA_MEMPROFYAML_H_
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ProfileData/DataAccessProf.h"
#include "llvm/ProfileData/MemProf.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/YAMLTraits.h"
@@ -12,6 +13,9 @@ namespace memprof {
// serialized and deserialized in YAML.
LLVM_YAML_STRONG_TYPEDEF(uint64_t, GUIDHex64)
+LLVM_YAML_STRONG_TYPEDEF(uint64_t, SymbolContentHash)
+LLVM_YAML_STRONG_TYPEDEF(std::string, OwnedSymbolName)
+
// Helper struct for AllMemProfData. In YAML, we treat the GUID and the fields
// within MemProfRecord at the same level as if the GUID were part of
// MemProfRecord.
@@ -20,9 +24,25 @@ struct GUIDMemProfRecordPair {
MemProfRecord Record;
};
+// Helper struct to yamlify data_access_prof::DataAccessProfData. The struct
+// members use owned strings. This is for simplicity and assumes that most real
+// world use cases do look-ups and regression test scale is small, so string
+// efficiency is not a priority.
+struct YamlDataAccessProfData {
+ std::vector<data_access_prof::DataAccessProfRecord> Records;
+ std::vector<uint64_t> KnownColdHashes;
+ std::vector<std::string> KnownColdSymbols;
+
+ bool isEmpty() const {
+ return Records.empty() && KnownColdHashes.empty() &&
+ KnownColdSymbols.empty();
+ }
+};
+
// The top-level data structure, only used with YAML for now.
struct AllMemProfData {
std::vector<GUIDMemProfRecordPair> HeapProfileRecords;
+ YamlDataAccessProfData YamlifiedDataAccessProfiles;
};
} // namespace memprof
@@ -206,9 +226,50 @@ template <> struct MappingTraits<memprof::GUIDMemProfRecordPair> {
}
};
+template <> struct MappingTraits<data_access_prof::SourceLocation> {
+ static void mapping(IO &Io, data_access_prof::SourceLocation &Loc) {
+ Io.mapOptional("FileName", Loc.FileName);
+ Io.mapOptional("Line", Loc.Line);
+ }
+};
+
+template <> struct MappingTraits<data_access_prof::DataAccessProfRecord> {
+ static void mapping(IO &Io, data_access_prof::DataAccessProfRecord &Rec) {
+ if (Io.outputting()) {
+ if (std::holds_alternative<std::string>(Rec.SymHandle)) {
+ Io.mapOptional("Symbol", std::get<std::string>(Rec.SymHandle));
+ } else {
+ Io.mapOptional("Hash", std::get<uint64_t>(Rec.SymHandle));
+ }
+ } else {
+ std::string SymName;
+ uint64_t Hash = 0;
+ Io.mapOptional("Symbol", SymName);
+ Io.mapOptional("Hash", Hash);
+ if (!SymName.empty()) {
+ Rec.SymHandle = SymName;
+ } else {
+ Rec.SymHandle = Hash;
+ }
+ }
+
+ Io.mapOptional("Locations", Rec.Locations);
+ }
+};
+
+template <> struct MappingTraits<memprof::YamlDataAccessProfData> {
+ static void mapping(IO &Io, memprof::YamlDataAccessProfData &Data) {
+ Io.mapOptional("SampledRecords", Data.Records);
+ Io.mapOptional("KnownColdSymbols", Data.KnownColdSymbols);
+ Io.mapOptional("KnownColdHashes", Data.KnownColdHashes);
+ }
+};
+
template <> struct MappingTraits<memprof::AllMemProfData> {
static void mapping(IO &Io, memprof::AllMemProfData &Data) {
Io.mapRequired("HeapProfileRecords", Data.HeapProfileRecords);
+
+ Io.mapOptional("DataAccessProfiles", Data.YamlifiedDataAccessProfiles);
}
};
@@ -234,5 +295,9 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::AllocationInfo)
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::CallSiteInfo)
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDMemProfRecordPair)
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDHex64) // Used for CalleeGuids
+LLVM_YAML_IS_SEQUENCE_VECTOR(data_access_prof::DataAccessProfRecord)
+LLVM_YAML_IS_SEQUENCE_VECTOR(data_access_prof::SourceLocation)
+LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::SymbolContentHash)
+LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::OwnedSymbolName)
#endif // LLVM_PROFILEDATA_MEMPROFYAML_H_
diff --git a/llvm/lib/ProfileData/DataAccessProf.cpp b/llvm/lib/ProfileData/DataAccessProf.cpp
index c5d0099977cfa..61a73fab7269f 100644
--- a/llvm/lib/ProfileData/DataAccessProf.cpp
+++ b/llvm/lib/ProfileData/DataAccessProf.cpp
@@ -48,7 +48,8 @@ DataAccessProfData::getProfileRecord(const SymbolHandleRef SymbolID) const {
auto It = Records.find(Key);
if (It != Records.end()) {
- return DataAccessProfRecord(Key, It->second.Locations);
+ return DataAccessProfRecord(Key, It->second.AccessCount,
+ It->second.Locations);
}
return std::nullopt;
@@ -111,7 +112,8 @@ Error DataAccessProfData::addKnownSymbolWithoutSamples(
auto CanonicalName = getCanonicalName(std::get<StringRef>(SymbolID));
if (!CanonicalName)
return CanonicalName.takeError();
- KnownColdSymbols.insert(*CanonicalName);
+ KnownColdSymbols.insert(
+ saveStringToMap(StrToIndexMap, Saver, *CanonicalName).first);
return Error::success();
}
diff --git a/llvm/lib/ProfileData/IndexedMemProfData.cpp b/llvm/lib/ProfileData/IndexedMemProfData.cpp
index 3d20f7a7a5778..cc1b03101c880 100644
--- a/llvm/lib/ProfileData/IndexedMemProfData.cpp
+++ b/llvm/lib/ProfileData/IndexedMemProfData.cpp
@@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ProfileData/DataAccessProf.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/ProfileData/MemProf.h"
@@ -216,7 +217,9 @@ static Error writeMemProfV2(ProfOStream &OS,
static Error writeMemProfRadixTreeBased(
ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
- memprof::IndexedVersion Version, bool MemProfFullSchema) {
+ memprof::IndexedVersion Version, bool MemProfFullSchema,
+ std::optional<std::reference_wrapper<data_access_prof::DataAccessProfData>>
+ DataAccessProfileData) {
assert((Version == memprof::Version3 || Version == memprof::Version4) &&
"Unsupported version for radix tree format");
@@ -225,6 +228,8 @@ static Error writeMemProfRadixTreeBased(
OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
OS.write(0ULL); // Reserve space for the memprof record payload offset.
OS.write(0ULL); // Reserve space for the memprof record table offset.
+ if (Version == memprof::Version4)
+ OS.write(0ULL); // Reserve space for the data access profile offset.
auto Schema = memprof::getHotColdSchema();
if (MemProfFullSchema)
@@ -251,17 +256,26 @@ static Error writeMemProfRadixTreeBased(
uint64_t RecordTableOffset = writeMemProfRecords(
OS, MemProfData.Records, &Schema, Version, &MemProfCallStackIndexes);
+ uint64_t DataAccessProfOffset = 0;
+ if (DataAccessProfileData.has_value()) {
+ DataAccessProfOffset = OS.tell();
+ if (Error E = (*DataAccessProfileData).get().serialize(OS))
+ return E;
+ }
+
// Verify that the computation for the number of elements in the call stack
// array works.
assert(CallStackPayloadOffset +
NumElements * sizeof(memprof::LinearFrameId) ==
RecordPayloadOffset);
- uint64_t Header[] = {
+ SmallVector<uint64_t, 4> Header = {
CallStackPayloadOffset,
RecordPayloadOffset,
RecordTableOffset,
};
+ if (Version == memprof::Version4)
+ Header.push_back(DataAccessProfOffset);
OS.patch({{HeaderUpdatePos, Header}});
return Error::success();
@@ -272,28 +286,33 @@ static Error writeMemProfV3(ProfOStream &OS,
memprof::IndexedMemProfData &MemProfData,
bool MemProfFullSchema) {
return writeMemProfRadixTreeBased(OS, MemProfData, memprof::Version3,
- MemProfFullSchema);
+ MemProfFullSchema, std::nullopt);
}
// Write out MemProf Version4
-static Error writeMemProfV4(ProfOStream &OS,
- memprof::IndexedMemProfData &MemProfData,
- bool MemProfFullSchema) {
+static Error writeMemProfV4(
+ ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
+ bool MemProfFullSchema,
+ std::optional<std::reference_wrapper<data_access_prof::DataAccessProfData>>
+ DataAccessProfileData) {
return writeMemProfRadixTreeBased(OS, MemProfData, memprof::Version4,
- MemProfFullSchema);
+ MemProfFullSchema, DataAccessProfileData);
}
// Write out the MemProf data in a requested version.
-Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
- memprof::IndexedVersion MemProfVersionRequested,
- bool MemProfFullSchema) {
+Error writeMemProf(
+ ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
+ memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema,
+ std::optional<std::reference_wrapper<data_access_prof::DataAccessProfData>>
+ DataAccessProfileData) {
switch (MemProfVersionRequested) {
case memprof::Version2:
return writeMemProfV2(OS, MemProfData, MemProfFullSchema);
case memprof::Version3:
return writeMemProfV3(OS, MemProfData, MemProfFullSchema);
case memprof::Version4:
- return writeMemProfV4(OS, MemProfData, MemProfFullSchema);
+ return writeMemProfV4(OS, MemProfData, MemProfFullSchema,
+ DataAccessProfileData);
}
return make_error<InstrProfError>(
@@ -357,7 +376,10 @@ Error IndexedMemProfReader::deserializeV2(const unsigned char *Start,
}
Error IndexedMemProfReader::deserializeRadixTreeBased(
- const unsigned char *Start, const unsigned char *Ptr) {
+ const unsigned char *Start, const unsigned char *Ptr,
+ memprof::IndexedVersion Version) {
+ assert((Version == memprof::Version3 || Version == memprof::Version4) &&
+ "Unsupported version for radix tree format");
// The offset in the stream right before invoking
// CallStackTableGenerator.Emit.
const uint64_t CallStackPayloadOffset =
@@ -369,6 +391,11 @@ Error IndexedMemProfReader::deserializeRadixTreeBased(
const uint64_t RecordTableOffset =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+ uint64_t DataAccessProfOffset = 0;
+ if (Version == memprof::Version4)
+ DataAccessProfOffset =
+ support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+
// Read the schema.
auto SchemaOr = memprof::readMemProfSchema(Ptr);
if (!SchemaOr)
@@ -390,6 +417,14 @@ Error IndexedMemProfReader::deserializeRadixTreeBased(
/*Payload=*/Start + RecordPayloadOffset,
/*Base=*/Start, memprof::RecordLookupTrait(Version, Schema)));
+ if (DataAccessProfOffset > RecordTableOffset) {
+ DataAccessProfileData =
+ std::make_unique<data_access_prof::DataAccessProfData>();
+ const unsigned char *DAPPtr = Start + DataAccessProfOffset;
+ if (Error E = DataAccessProfileData->deserialize(DAPPtr))
+ return E;
+ }
+
return Error::success();
}
@@ -423,7 +458,7 @@ Error IndexedMemProfReader::deserialize(const unsigned char *Start,
case memprof::Version3:
case memprof::Version4:
// V3 and V4 share the same high-level structure (radix tree, linear IDs).
- if (Error E = deserializeRadixTreeBased(Start, Ptr))
+ if (Error E = deserializeRadixTreeBased(Start, Ptr, Version))
return E;
break;
}
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index e6c83430cd8e9..696a2518e148e 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -1551,6 +1551,20 @@ memprof::AllMemProfData IndexedMemProfReader::getAllMemProfData() const {
Pair.Record = std::move(*Record);
AllMemProfData.HeapProfileRecords.push_back(std::move(Pair));
}
+ // Populate the data access profiles.
+ if (DataAccessProfileData != nullptr) {
+ for (const auto &[SymHandleRef, RecordRef] :
+ DataAccessProfileData->getRecords())
+ AllMemProfData.YamlifiedDataAccessProfiles.Records.push_back(
+ data_access_prof::DataAccessProfRecord(
+ SymHandleRef, RecordRef.AccessCount, RecordRef.Locations));
+ for (StringRef ColdSymbol : DataAccessProfileData->getKnownColdSymbols())
+ AllMemProfData.YamlifiedDataAccessProfiles.KnownColdSymbols.push_back(
+ ColdSymbol.str());
+ for (uint64_t Hash : DataAccessProfileData->getKnownColdHashes())
+ AllMemProfData.YamlifiedDataAccessProfiles.KnownColdHashes.push_back(
+ Hash);
+ }
return AllMemProfData;
}
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 2759346935b14..7217826d550a7 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/ProfileSummary.h"
+#include "llvm/ProfileData/DataAccessProf.h"
#include "llvm/ProfileData/IndexedMemProfData.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/MemProf.h"
@@ -29,6 +30,7 @@
#include "llvm/Support/raw_ostream.h"
#include <cstdint>
#include <ctime>
+#include <functional>
#include <memory>
#include <string>
#include <tuple>
@@ -152,9 +154,7 @@ void InstrProfWriter::setValueProfDataEndianness(llvm::endianness Endianness) {
InfoObj->ValueProfDataEndianness = Endianness;
}
-void InstrProfWriter::setOutputSparse(bool Sparse) {
- this->Sparse = Sparse;
-}
+void InstrProfWriter::setOutputSparse(bool Sparse) { this->Sparse = Sparse; }
void InstrProfWriter::addRecord(NamedInstrProfRecord &&I, uint64_t Weight,
function_ref<void(Error)> Warn) {
@@ -329,6 +329,12 @@ void InstrProfWriter::addBinaryIds(ArrayRef<llvm::object::BuildID> BIs) {
llvm::append_range(BinaryIds, BIs);
}
+void InstrProfWriter::addDataAccessProfData(
+ std::unique_ptr<data_access_prof::DataAccessProfData>
+ DataAccessProfDataIn) {
+ DataAccessProfileData = std::move(DataAccessProfDataIn);
+}
+
void InstrProfWriter::addTemporalProfileTrace(TemporalProfTraceTy Trace) {
assert(Trace.FunctionNameRefs.size() <= MaxTemporalProfTraceLength);
assert(!Trace.FunctionNameRefs.empty());
@@ -614,8 +620,14 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
uint64_t MemProfSectionStart = 0;
if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf)) {
MemProfSectionStart = OS.tell();
+ std::optional<std::reference_wrapper<data_access_prof::DataAccessProfData>>
+ DAP = std::nullopt;
+ if (DataAccessProfileData.get() != nullptr) {
+ DAP = std::ref(*DataAccessProfileData.get());
+ }
if (auto E = writeMemProf(OS, MemProfData, MemProfVersionRequested,
- MemProfFullSchema))
+ MemProfFullSchema, DAP))
+
return E;
}
diff --git a/llvm/lib/ProfileData/MemProfReader.cpp b/llvm/lib/ProfileData/MemProfReader.cpp
index e0f280b9eb2f6..1562b7d5bb01b 100644
--- a/llvm/lib/ProfileData/MemProfReader.cpp
+++ b/llvm/lib/ProfileData/MemProfReader.cpp
@@ -37,6 +37,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
@@ -823,6 +824,39 @@ void YAMLMemProfReader::parse(StringRef YAMLData) {
MemProfData.Records.try_emplace(GUID, std::move(IndexedRecord));
}
+
+ if (Doc.YamlifiedDataAccessProfiles.isEmpty())
+ return;
+
+ auto ToSymHandleRef = [](data_access_prof::SymbolHandle Handle)
+ -> data_access_prof::SymbolHandleRef {
+ if (std::holds_alternative<std::string>(Handle))
+ return StringRef(std::get<std::string>(Handle));
+ return std::get<uint64_t>(Handle);
+ };
+
+ auto DataAccessProfileData =
+ std::make_unique<data_access_prof::DataAccessProfData>();
+ for (const auto &Record : Doc.YamlifiedDataAccessProfiles.Records) {
+ if (Error E = DataAccessProfileData->setDataAccessProfile(
+ ToSymHandleRef(Record.SymHandle), Record.AccessCount,
+ Record.Locations)) {
+ reportFatalInternalError(std::move(E));
+ }
+ }
+ for (const uint64_t Hash : Doc.YamlifiedDataAccessProfiles.KnownColdHashes)
+ if (Error E = DataAccessProfileData->addKnownSymbolWithoutSamples(Hash)) {
+ reportFatalInternalError(std::move(E));
+ }
+ for (const std::string &Sym :
+ Doc.YamlifiedDataAccessProfiles.KnownColdSymbols) {
+ errs() << "Sym " << Sym << "\n";
+ if (Error E = DataAccessProfileData->addKnownSymbolWithoutSamples(Sym)) {
+ reportFatalInternalError(std::move(E));
+ }
+ }
+
+ setDataAccessProfileData(std::move(DataAccessProfileData));
}
} // namespace memprof
} // namespace llvm
diff --git a/llvm/test/tools/llvm-profdata/memprof-yaml.test b/llvm/test/tools/llvm-profdata/memprof-yaml.test
index 9766cc50f37d7..5e0c7fb3ea1d8 100644
--- a/llvm/test/tools/llvm-profdata/memprof-yaml.test
+++ b/llvm/test/tools/llvm-profdata/memprof-yaml.test
@@ -35,4 +35,15 @@ HeapProfileRecords:
- { Function: 0x7777777777777777, LineOffset: 77, Column: 70, IsInlineFrame: true }
- { Function: 0x8888888888888888, LineOffset: 88, Column: 80, IsInlineFrame: false }
CalleeGuids: [ 0x300 ]
+DataAccessProfiles:
+ SampledRecords:
+ - Symbol: abcde
+ - Hash: 101010
+ Locations:
+ - FileName: file
+ Line: 233
+ KnownColdSymbols:
+ - foo
+ - bar
+ KnownColdHashes: [ 999, 1001 ]
...
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 885e06df6c390..87ddca230b304 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -16,6 +16,7 @@
#include "llvm/Debuginfod/HTTPClient.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Object/Binary.h"
+#include "llvm/ProfileData/DataAccessProf.h"
#include "llvm/ProfileData/InstrProfCorrelator.h"
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/ProfileData/InstrProfWriter.h"
@@ -756,6 +757,8 @@ loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
auto MemProfData = Reader->takeMemProfData();
+ auto DataAccessProfData = Reader->takeDataAccessProfData();
+
// Check for the empty input in case the YAML file is invalid.
if (MemProfData.Records.empty()) {
WC->Errors.emplace_back(
@@ -764,6 +767,7 @@ loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
}
WC->Writer.addMemProfData(std::move(MemProfData), MemProfError);
+ WC->Writer.addDataAccessProfData(std::move(DataAccessProfData));
return;
}
@@ -3308,6 +3312,7 @@ static int showMemProfProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
auto Reader = std::move(ReaderOrErr.get());
memprof::AllMemProfData Data = Reader->getAllMemProfData();
+
// Construct yaml::Output with the maximum column width of 80 so that each
// Frame fits in one line.
yaml::Output Yout(OS, nullptr, 80);
diff --git a/llvm/unittests/ProfileData/DataAccessProfTest.cpp b/llvm/unittests/ProfileData/DataAccessProfTest.cpp
index 127230d4805e7..084a8e96cdafe 100644
--- a/llvm/unittests/ProfileData/DataAccessProfTest.cpp
+++ b/llvm/unittests/ProfileData/DataAccessProfTest.cpp
@@ -78,7 +78,7 @@ TEST(MemProf, DataAccessProfile) {
// Test that symbol names and file names are stored in the input order.
EXPECT_THAT(
llvm::to_vector(llvm::make_first_range(Data.getStrToIndexMapRef())),
- ElementsAre("foo", "bar.__uniq.321", "file2", "file1"));
+ ElementsAre("foo", "sym2", "bar.__uniq.321", "file2", "sym1", "file1"));
EXPECT_THAT(Data.getKnownColdSymbols(), ElementsAre("sym2", "sym1"));
EXPECT_THAT(Data.getKnownColdHashes(), ElementsAre(789, 678));
@@ -134,9 +134,10 @@ TEST(MemProf, DataAccessProfile) {
testing::IsEmpty());
EXPECT_FALSE(deserializedData.deserialize(p));
- EXPECT_THAT(llvm::to_vector(llvm::make_first_range(
- deserializedData.getStrToIndexMapRef())),
- ElementsAre("foo", "bar.__uniq.321", "file2", "file1"));
+ EXPECT_THAT(
+ llvm::to_vector(
+ llvm::make_first_range(deserializedData.getStrToIndexMapRef())),
+ ElementsAre("foo", "sym2", "bar.__uniq.321", "file2", "sym1", "file1"));
EXPECT_THAT(deserializedData.getKnownColdSymbols(),
ElementsAre("sym2", "sym1"));
EXPECT_THAT(deserializedData.getKnownColdHashes(), ElementsAre(789, 678));
@@ -158,7 +159,7 @@ TEST(MemProf, DataAccessProfile) {
Field(&DataAccessProfRecordRef::AccessCount, 100),
Field(&DataAccessProfRecordRef::IsStringLiteral, false),
Field(&DataAccessProfRecordRef::Locations, testing::IsEmpty())),
- AllOf(Field(&DataAccessProfRecordRef::SymbolID, 1),
+ AllOf(Field(&DataAccessProfRecordRef::SymbolID, 2),
Field(&DataAccessProfRecordRef::AccessCount, 123),
Field(&DataAccessProfRecordRef::IsStringLiteral, false),
Field(&DataAccessProfRecordRef::Locations,
More information about the llvm-branch-commits
mailing list