[llvm] 6bbaef1 - Re-apply "[StaticDataLayout][PGO]Implement reader and writer change for data access profiles" (#141275)

via llvm-commits llvm-commits at lists.llvm.org
Sat May 24 10:33:53 PDT 2025


Author: Mingming Liu
Date: 2025-05-24T10:33:50-07:00
New Revision: 6bbaef1a95989afadc2c298018ac531591e4f159

URL: https://github.com/llvm/llvm-project/commit/6bbaef1a95989afadc2c298018ac531591e4f159
DIFF: https://github.com/llvm/llvm-project/commit/6bbaef1a95989afadc2c298018ac531591e4f159.diff

LOG: Re-apply "[StaticDataLayout][PGO]Implement reader and writer change for data access profiles" (#141275)

Re-apply https://github.com/llvm/llvm-project/pull/139997 after fixing the use-of-uninitialized-memory error
(https://lab.llvm.org/buildbot/#/builders/94/builds/7373).

Tested: The error is reproduced with
https://github.com/llvm/llvm-zorg/blob/main/zorg/buildbot/builders/sanitizers/buildbot_bootstrap_msan.sh
without the fix, and test pass with the fix.


**Original commit message:**

https://github.com/llvm/llvm-project/pull/138170 introduces classes to
operate on data access profiles. This change supports the read and write
of `DataAccessProfData` in indexed format of MemProf (v4) as well as its
the text (yaml) format.

For indexed format:
* InstrProfWriter owns (by `std::unique_ptr<DataAccessProfData>`) the
data access profiles, and gives a non-owned copy when it calls
`writeMemProf`.
* MemProf v4 header has a new `uint64_t` to record the byte offset of
data access profiles. This `uint64_t` field is zero if data access
profile is not set (nullptr).
* MemProfReader reads the offset from v4 header and de-serializes
in-memory bytes into class `DataAccessProfData`.

For textual format:
* MemProfYAML.h adds the mapping for DAP class, and make DAP optional
for both read and write.

099a0fa (by @snehasish) introduces v4 which contains CalleeGuids in
CallSiteInfo, and this change changes the v4 format in place with data
access profiles. The current plan is to bump the version and enable v4
profiles with both features, assuming waiting for this change won't
delay the callsite change too long.

---------

Co-authored-by: Kazu Hirata <kazu at google.com>

Added: 
    

Modified: 
    llvm/include/llvm/ProfileData/DataAccessProf.h
    llvm/include/llvm/ProfileData/IndexedMemProfData.h
    llvm/include/llvm/ProfileData/InstrProfReader.h
    llvm/include/llvm/ProfileData/InstrProfWriter.h
    llvm/include/llvm/ProfileData/MemProfReader.h
    llvm/include/llvm/ProfileData/MemProfYAML.h
    llvm/lib/ProfileData/DataAccessProf.cpp
    llvm/lib/ProfileData/IndexedMemProfData.cpp
    llvm/lib/ProfileData/InstrProfReader.cpp
    llvm/lib/ProfileData/InstrProfWriter.cpp
    llvm/lib/ProfileData/MemProfReader.cpp
    llvm/test/tools/llvm-profdata/memprof-yaml.test
    llvm/tools/llvm-profdata/llvm-profdata.cpp
    llvm/unittests/ProfileData/DataAccessProfTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ProfileData/DataAccessProf.h b/llvm/include/llvm/ProfileData/DataAccessProf.h
index 3cc8835a776dd..c0f0c6d9c9fc1 100644
--- a/llvm/include/llvm/ProfileData/DataAccessProf.h
+++ b/llvm/include/llvm/ProfileData/DataAccessProf.h
@@ -17,10 +17,8 @@
 #ifndef LLVM_PROFILEDATA_DATAACCESSPROF_H_
 #define LLVM_PROFILEDATA_DATAACCESSPROF_H_
 
-#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseMapInfoVariant.h"
 #include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -35,12 +33,15 @@
 
 namespace llvm {
 
-namespace data_access_prof {
+namespace memprof {
 
 /// The location of data in the source code. Used by profile lookup API.
 struct SourceLocation {
   SourceLocation(StringRef FileNameRef, uint32_t Line)
       : FileName(FileNameRef.str()), Line(Line) {}
+
+  // Empty constructor is used in yaml conversion.
+  SourceLocation() {}
   /// The filename where the data is located.
   std::string FileName;
   /// The line number in the source code.
@@ -53,6 +54,8 @@ namespace internal {
 // which strings are owned by `DataAccessProfData`. Used by `DataAccessProfData`
 // to represent data locations internally.
 struct SourceLocationRef {
+  SourceLocationRef(StringRef FileNameRef, uint32_t Line)
+      : FileName(FileNameRef), Line(Line) {}
   // The filename where the data is located.
   StringRef FileName;
   // The line number in the source code.
@@ -100,18 +103,21 @@ using SymbolHandle = std::variant<std::string, uint64_t>;
 /// The data access profiles for a symbol.
 struct DataAccessProfRecord {
 public:
-  DataAccessProfRecord(SymbolHandleRef SymHandleRef,
-                       ArrayRef<internal::SourceLocationRef> LocRefs) {
+  DataAccessProfRecord(SymbolHandleRef SymHandleRef, uint64_t AccessCount,
+                       ArrayRef<internal::SourceLocationRef> LocRefs)
+      : AccessCount(AccessCount) {
     if (std::holds_alternative<StringRef>(SymHandleRef)) {
       SymHandle = std::get<StringRef>(SymHandleRef).str();
     } else
       SymHandle = std::get<uint64_t>(SymHandleRef);
 
     for (auto Loc : LocRefs)
-      Locations.push_back(SourceLocation(Loc.FileName, Loc.Line));
+      Locations.emplace_back(Loc.FileName, Loc.Line);
   }
+  // Empty constructor is used in yaml conversion.
+  DataAccessProfRecord() : AccessCount(0) {}
   SymbolHandle SymHandle;
-
+  uint64_t AccessCount;
   // The locations of data in the source code. Optional.
   SmallVector<SourceLocation> Locations;
 };
@@ -208,7 +214,7 @@ class DataAccessProfData {
   llvm::SetVector<StringRef> KnownColdSymbols;
 };
 
-} // namespace data_access_prof
+} // namespace memprof
 } // namespace llvm
 
 #endif // LLVM_PROFILEDATA_DATAACCESSPROF_H_

diff  --git a/llvm/include/llvm/ProfileData/IndexedMemProfData.h b/llvm/include/llvm/ProfileData/IndexedMemProfData.h
index f33b160e0b6a9..2b40094a9bc21 100644
--- a/llvm/include/llvm/ProfileData/IndexedMemProfData.h
+++ b/llvm/include/llvm/ProfileData/IndexedMemProfData.h
@@ -15,9 +15,13 @@
 #ifndef LLVM_PROFILEDATA_INDEXEDMEMPROFDATA_H
 #define LLVM_PROFILEDATA_INDEXEDMEMPROFDATA_H
 
+#include "llvm/ProfileData/DataAccessProf.h"
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/ProfileData/MemProf.h"
 
+#include <functional>
+#include <optional>
+
 namespace llvm {
 namespace memprof {
 struct IndexedMemProfData {
@@ -82,8 +86,10 @@ struct IndexedMemProfData {
 } // namespace memprof
 
 // Write the MemProf data to OS.
-Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
-                   memprof::IndexedVersion MemProfVersionRequested,
-                   bool MemProfFullSchema);
+Error writeMemProf(
+    ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
+    memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema,
+    std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData);
+
 } // namespace llvm
 #endif

diff  --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index c250a9ede39bc..d104ab51430d1 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/ProfileSummary.h"
 #include "llvm/Object/BuildID.h"
+#include "llvm/ProfileData/DataAccessProf.h"
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/ProfileData/InstrProfCorrelator.h"
 #include "llvm/ProfileData/MemProf.h"
@@ -703,10 +704,13 @@ class IndexedMemProfReader {
   const unsigned char *CallStackBase = nullptr;
   // The number of elements in the radix tree array.
   unsigned RadixTreeSize = 0;
+  /// The data access profiles, deserialized from binary data.
+  std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData;
 
   Error deserializeV2(const unsigned char *Start, const unsigned char *Ptr);
   Error deserializeRadixTreeBased(const unsigned char *Start,
-                                  const unsigned char *Ptr);
+                                  const unsigned char *Ptr,
+                                  memprof::IndexedVersion Version);
 
 public:
   IndexedMemProfReader() = default;

diff  --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h
index b72c901dbb5b2..cdb7afb623378 100644
--- a/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -19,6 +19,7 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/Object/BuildID.h"
+#include "llvm/ProfileData/DataAccessProf.h"
 #include "llvm/ProfileData/IndexedMemProfData.h"
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/Support/Error.h"
@@ -81,6 +82,8 @@ class InstrProfWriter {
   // Whether to generated random memprof hotness for testing.
   bool MemprofGenerateRandomHotness;
 
+  std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData;
+
 public:
   // For memprof testing, random hotness can be assigned to the contexts if
   // MemprofGenerateRandomHotness is enabled. The random seed can be either
@@ -122,6 +125,9 @@ class InstrProfWriter {
   // Add a binary id to the binary ids list.
   void addBinaryIds(ArrayRef<llvm::object::BuildID> BIs);
 
+  void addDataAccessProfData(
+      std::unique_ptr<memprof::DataAccessProfData> DataAccessProfile);
+
   /// Merge existing function counts from the given writer.
   void mergeRecordsFromWriter(InstrProfWriter &&IPW,
                               function_ref<void(Error)> Warn);

diff  --git a/llvm/include/llvm/ProfileData/MemProfReader.h b/llvm/include/llvm/ProfileData/MemProfReader.h
index 130493ec77c08..3bfcdf0f42cde 100644
--- a/llvm/include/llvm/ProfileData/MemProfReader.h
+++ b/llvm/include/llvm/ProfileData/MemProfReader.h
@@ -229,6 +229,20 @@ class YAMLMemProfReader final : public MemProfReader {
   create(std::unique_ptr<MemoryBuffer> Buffer);
 
   void parse(StringRef YAMLData);
+
+  std::unique_ptr<memprof::DataAccessProfData> takeDataAccessProfData() {
+    return std::move(DataAccessProfileData);
+  }
+
+private:
+  // Called by `parse` to set data access profiles after parsing them from Yaml
+  // files.
+  void
+  setDataAccessProfileData(std::unique_ptr<memprof::DataAccessProfData> Data) {
+    DataAccessProfileData = std::move(Data);
+  }
+
+  std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData;
 };
 } // namespace memprof
 } // namespace llvm

diff  --git a/llvm/include/llvm/ProfileData/MemProfYAML.h b/llvm/include/llvm/ProfileData/MemProfYAML.h
index b642e3098aa0e..ad5d7c0e22751 100644
--- a/llvm/include/llvm/ProfileData/MemProfYAML.h
+++ b/llvm/include/llvm/ProfileData/MemProfYAML.h
@@ -2,6 +2,7 @@
 #define LLVM_PROFILEDATA_MEMPROFYAML_H_
 
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ProfileData/DataAccessProf.h"
 #include "llvm/ProfileData/MemProf.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/YAMLTraits.h"
@@ -20,9 +21,24 @@ struct GUIDMemProfRecordPair {
   MemProfRecord Record;
 };
 
+// Helper struct to yamlify memprof::DataAccessProfData. The struct
+// members use owned strings. This is for simplicity and assumes that most real
+// world use cases do look-ups and regression test scale is small.
+struct YamlDataAccessProfData {
+  std::vector<memprof::DataAccessProfRecord> Records;
+  std::vector<uint64_t> KnownColdStrHashes;
+  std::vector<std::string> KnownColdSymbols;
+
+  bool isEmpty() const {
+    return Records.empty() && KnownColdStrHashes.empty() &&
+           KnownColdSymbols.empty();
+  }
+};
+
 // The top-level data structure, only used with YAML for now.
 struct AllMemProfData {
   std::vector<GUIDMemProfRecordPair> HeapProfileRecords;
+  YamlDataAccessProfData YamlifiedDataAccessProfiles;
 };
 } // namespace memprof
 
@@ -206,9 +222,52 @@ template <> struct MappingTraits<memprof::GUIDMemProfRecordPair> {
   }
 };
 
+template <> struct MappingTraits<memprof::SourceLocation> {
+  static void mapping(IO &Io, memprof::SourceLocation &Loc) {
+    Io.mapOptional("FileName", Loc.FileName);
+    Io.mapOptional("Line", Loc.Line);
+  }
+};
+
+template <> struct MappingTraits<memprof::DataAccessProfRecord> {
+  static void mapping(IO &Io, memprof::DataAccessProfRecord &Rec) {
+    if (Io.outputting()) {
+      if (std::holds_alternative<std::string>(Rec.SymHandle)) {
+        Io.mapOptional("Symbol", std::get<std::string>(Rec.SymHandle));
+      } else {
+        Io.mapOptional("Hash", std::get<uint64_t>(Rec.SymHandle));
+      }
+    } else {
+      std::string SymName;
+      uint64_t Hash = 0;
+      Io.mapOptional("Symbol", SymName);
+      Io.mapOptional("Hash", Hash);
+      if (!SymName.empty()) {
+        Rec.SymHandle = SymName;
+      } else {
+        Rec.SymHandle = Hash;
+      }
+    }
+    Io.mapRequired("AccessCount", Rec.AccessCount);
+    Io.mapOptional("Locations", Rec.Locations);
+  }
+};
+
+template <> struct MappingTraits<memprof::YamlDataAccessProfData> {
+  static void mapping(IO &Io, memprof::YamlDataAccessProfData &Data) {
+    Io.mapOptional("SampledRecords", Data.Records);
+    Io.mapOptional("KnownColdSymbols", Data.KnownColdSymbols);
+    Io.mapOptional("KnownColdStrHashes", Data.KnownColdStrHashes);
+  }
+};
+
 template <> struct MappingTraits<memprof::AllMemProfData> {
   static void mapping(IO &Io, memprof::AllMemProfData &Data) {
     Io.mapRequired("HeapProfileRecords", Data.HeapProfileRecords);
+    // Map data access profiles if reading input, or if writing output &&
+    // the struct is populated.
+    if (!Io.outputting() || !Data.YamlifiedDataAccessProfiles.isEmpty())
+      Io.mapOptional("DataAccessProfiles", Data.YamlifiedDataAccessProfiles);
   }
 };
 
@@ -234,5 +293,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::AllocationInfo)
 LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::CallSiteInfo)
 LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDMemProfRecordPair)
 LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDHex64) // Used for CalleeGuids
+LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::DataAccessProfRecord)
+LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::SourceLocation)
 
 #endif // LLVM_PROFILEDATA_MEMPROFYAML_H_

diff  --git a/llvm/lib/ProfileData/DataAccessProf.cpp b/llvm/lib/ProfileData/DataAccessProf.cpp
index a31f3db0621fb..090dcb3dcc1b9 100644
--- a/llvm/lib/ProfileData/DataAccessProf.cpp
+++ b/llvm/lib/ProfileData/DataAccessProf.cpp
@@ -11,7 +11,7 @@
 #include <sys/types.h>
 
 namespace llvm {
-namespace data_access_prof {
+namespace memprof {
 
 // If `Map` has an entry keyed by `Str`, returns the entry iterator. Otherwise,
 // creates an owned copy of `Str`, adds a map entry for it and returns the
@@ -48,7 +48,8 @@ DataAccessProfData::getProfileRecord(const SymbolHandleRef SymbolID) const {
 
   auto It = Records.find(Key);
   if (It != Records.end()) {
-    return DataAccessProfRecord(Key, It->second.Locations);
+    return DataAccessProfRecord(Key, It->second.AccessCount,
+                                It->second.Locations);
   }
 
   return std::nullopt;
@@ -261,5 +262,5 @@ Error DataAccessProfData::deserializeRecords(const unsigned char *&Ptr) {
   }
   return Error::success();
 }
-} // namespace data_access_prof
+} // namespace memprof
 } // namespace llvm

diff  --git a/llvm/lib/ProfileData/IndexedMemProfData.cpp b/llvm/lib/ProfileData/IndexedMemProfData.cpp
index 59e59720179af..7398e4c468bbe 100644
--- a/llvm/lib/ProfileData/IndexedMemProfData.cpp
+++ b/llvm/lib/ProfileData/IndexedMemProfData.cpp
@@ -10,6 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ProfileData/DataAccessProf.h"
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/ProfileData/MemProf.h"
@@ -217,7 +218,9 @@ static Error writeMemProfV2(ProfOStream &OS,
 
 static Error writeMemProfRadixTreeBased(
     ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
-    memprof::IndexedVersion Version, bool MemProfFullSchema) {
+    memprof::IndexedVersion Version, bool MemProfFullSchema,
+    std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData =
+        nullptr) {
   assert((Version == memprof::Version3 || Version == memprof::Version4) &&
          "Unsupported version for radix tree format");
 
@@ -226,6 +229,8 @@ static Error writeMemProfRadixTreeBased(
   OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
   OS.write(0ULL); // Reserve space for the memprof record payload offset.
   OS.write(0ULL); // Reserve space for the memprof record table offset.
+  if (Version >= memprof::Version4)
+    OS.write(0ULL); // Reserve space for the data access profile offset.
 
   auto Schema = memprof::getHotColdSchema();
   if (MemProfFullSchema)
@@ -252,17 +257,29 @@ static Error writeMemProfRadixTreeBased(
   uint64_t RecordTableOffset = writeMemProfRecords(
       OS, MemProfData.Records, &Schema, Version, &MemProfCallStackIndexes);
 
+  uint64_t DataAccessProfOffset = 0;
+  if (DataAccessProfileData != nullptr) {
+    assert(Version >= memprof::Version4 &&
+           "Data access profiles are added starting from v4");
+    DataAccessProfOffset = OS.tell();
+    if (Error E = DataAccessProfileData->serialize(OS))
+      return E;
+  }
+
   // Verify that the computation for the number of elements in the call stack
   // array works.
   assert(CallStackPayloadOffset +
              NumElements * sizeof(memprof::LinearFrameId) ==
          RecordPayloadOffset);
 
-  uint64_t Header[] = {
+  SmallVector<uint64_t, 4> Header = {
       CallStackPayloadOffset,
       RecordPayloadOffset,
       RecordTableOffset,
   };
+  if (Version >= memprof::Version4)
+    Header.push_back(DataAccessProfOffset);
+
   OS.patch({{HeaderUpdatePos, Header}});
 
   return Error::success();
@@ -277,24 +294,28 @@ static Error writeMemProfV3(ProfOStream &OS,
 }
 
 // Write out MemProf Version4
-static Error writeMemProfV4(ProfOStream &OS,
-                            memprof::IndexedMemProfData &MemProfData,
-                            bool MemProfFullSchema) {
+static Error writeMemProfV4(
+    ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
+    bool MemProfFullSchema,
+    std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData) {
   return writeMemProfRadixTreeBased(OS, MemProfData, memprof::Version4,
-                                    MemProfFullSchema);
+                                    MemProfFullSchema,
+                                    std::move(DataAccessProfileData));
 }
 
 // Write out the MemProf data in a requested version.
-Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
-                   memprof::IndexedVersion MemProfVersionRequested,
-                   bool MemProfFullSchema) {
+Error writeMemProf(
+    ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
+    memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema,
+    std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData) {
   switch (MemProfVersionRequested) {
   case memprof::Version2:
     return writeMemProfV2(OS, MemProfData, MemProfFullSchema);
   case memprof::Version3:
     return writeMemProfV3(OS, MemProfData, MemProfFullSchema);
   case memprof::Version4:
-    return writeMemProfV4(OS, MemProfData, MemProfFullSchema);
+    return writeMemProfV4(OS, MemProfData, MemProfFullSchema,
+                          std::move(DataAccessProfileData));
   }
 
   return make_error<InstrProfError>(
@@ -358,7 +379,10 @@ Error IndexedMemProfReader::deserializeV2(const unsigned char *Start,
 }
 
 Error IndexedMemProfReader::deserializeRadixTreeBased(
-    const unsigned char *Start, const unsigned char *Ptr) {
+    const unsigned char *Start, const unsigned char *Ptr,
+    memprof::IndexedVersion Version) {
+  assert((Version == memprof::Version3 || Version == memprof::Version4) &&
+         "Unsupported version for radix tree format");
   // The offset in the stream right before invoking
   // CallStackTableGenerator.Emit.
   const uint64_t CallStackPayloadOffset =
@@ -370,6 +394,11 @@ Error IndexedMemProfReader::deserializeRadixTreeBased(
   const uint64_t RecordTableOffset =
       support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
 
+  uint64_t DataAccessProfOffset = 0;
+  if (Version == memprof::Version4)
+    DataAccessProfOffset =
+        support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+
   // Read the schema.
   auto SchemaOr = memprof::readMemProfSchema(Ptr);
   if (!SchemaOr)
@@ -391,6 +420,15 @@ Error IndexedMemProfReader::deserializeRadixTreeBased(
       /*Payload=*/Start + RecordPayloadOffset,
       /*Base=*/Start, memprof::RecordLookupTrait(Version, Schema)));
 
+  assert((!DataAccessProfOffset || DataAccessProfOffset > RecordTableOffset) &&
+         "Data access profile is either empty or after the record table");
+  if (DataAccessProfOffset > RecordTableOffset) {
+    DataAccessProfileData = std::make_unique<memprof::DataAccessProfData>();
+    const unsigned char *DAPPtr = Start + DataAccessProfOffset;
+    if (Error E = DataAccessProfileData->deserialize(DAPPtr))
+      return E;
+  }
+
   return Error::success();
 }
 
@@ -424,7 +462,7 @@ Error IndexedMemProfReader::deserialize(const unsigned char *Start,
   case memprof::Version3:
   case memprof::Version4:
     // V3 and V4 share the same high-level structure (radix tree, linear IDs).
-    if (Error E = deserializeRadixTreeBased(Start, Ptr))
+    if (Error E = deserializeRadixTreeBased(Start, Ptr, Version))
       return E;
     break;
   }

diff  --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index a1eb08362087f..ab109cd5b13a7 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -1552,6 +1552,20 @@ memprof::AllMemProfData IndexedMemProfReader::getAllMemProfData() const {
     Pair.Record = std::move(*Record);
     AllMemProfData.HeapProfileRecords.push_back(std::move(Pair));
   }
+  // Populate the data access profiles for yaml output.
+  if (DataAccessProfileData != nullptr) {
+    for (const auto &[SymHandleRef, RecordRef] :
+         DataAccessProfileData->getRecords())
+      AllMemProfData.YamlifiedDataAccessProfiles.Records.push_back(
+          memprof::DataAccessProfRecord(SymHandleRef, RecordRef.AccessCount,
+                                        RecordRef.Locations));
+    for (StringRef ColdSymbol : DataAccessProfileData->getKnownColdSymbols())
+      AllMemProfData.YamlifiedDataAccessProfiles.KnownColdSymbols.push_back(
+          ColdSymbol.str());
+    for (uint64_t Hash : DataAccessProfileData->getKnownColdHashes())
+      AllMemProfData.YamlifiedDataAccessProfiles.KnownColdStrHashes.push_back(
+          Hash);
+  }
   return AllMemProfData;
 }
 

diff  --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 2c6640eedebd9..039e1bc955cd4 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -16,6 +16,7 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/ProfileSummary.h"
+#include "llvm/ProfileData/DataAccessProf.h"
 #include "llvm/ProfileData/IndexedMemProfData.h"
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/ProfileData/ProfileCommon.h"
@@ -320,6 +321,11 @@ void InstrProfWriter::addBinaryIds(ArrayRef<llvm::object::BuildID> BIs) {
   llvm::append_range(BinaryIds, BIs);
 }
 
+void InstrProfWriter::addDataAccessProfData(
+    std::unique_ptr<memprof::DataAccessProfData> DataAccessProfDataIn) {
+  DataAccessProfileData = std::move(DataAccessProfDataIn);
+}
+
 void InstrProfWriter::addTemporalProfileTrace(TemporalProfTraceTy Trace) {
   assert(Trace.FunctionNameRefs.size() <= MaxTemporalProfTraceLength);
   assert(!Trace.FunctionNameRefs.empty());
@@ -605,8 +611,11 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   uint64_t MemProfSectionStart = 0;
   if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf)) {
     MemProfSectionStart = OS.tell();
-    if (auto E = writeMemProf(OS, MemProfData, MemProfVersionRequested,
-                              MemProfFullSchema))
+
+    if (auto E =
+            writeMemProf(OS, MemProfData, MemProfVersionRequested,
+                         MemProfFullSchema, std::move(DataAccessProfileData)))
+
       return E;
   }
 

diff  --git a/llvm/lib/ProfileData/MemProfReader.cpp b/llvm/lib/ProfileData/MemProfReader.cpp
index d6bc4fdf5e448..9c723e495e7f9 100644
--- a/llvm/lib/ProfileData/MemProfReader.cpp
+++ b/llvm/lib/ProfileData/MemProfReader.cpp
@@ -37,6 +37,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 
@@ -822,6 +823,34 @@ void YAMLMemProfReader::parse(StringRef YAMLData) {
 
     MemProfData.Records.try_emplace(GUID, std::move(IndexedRecord));
   }
+
+  if (Doc.YamlifiedDataAccessProfiles.isEmpty())
+    return;
+
+  auto ToSymHandleRef =
+      [](const memprof::SymbolHandle &Handle) -> memprof::SymbolHandleRef {
+    if (std::holds_alternative<std::string>(Handle))
+      return StringRef(std::get<std::string>(Handle));
+    return std::get<uint64_t>(Handle);
+  };
+
+  auto DataAccessProfileData = std::make_unique<memprof::DataAccessProfData>();
+  for (const auto &Record : Doc.YamlifiedDataAccessProfiles.Records)
+    if (Error E = DataAccessProfileData->setDataAccessProfile(
+            ToSymHandleRef(Record.SymHandle), Record.AccessCount,
+            Record.Locations))
+      reportFatalInternalError(std::move(E));
+
+  for (const uint64_t Hash : Doc.YamlifiedDataAccessProfiles.KnownColdStrHashes)
+    if (Error E = DataAccessProfileData->addKnownSymbolWithoutSamples(Hash))
+      reportFatalInternalError(std::move(E));
+
+  for (const std::string &Sym :
+       Doc.YamlifiedDataAccessProfiles.KnownColdSymbols)
+    if (Error E = DataAccessProfileData->addKnownSymbolWithoutSamples(Sym))
+      reportFatalInternalError(std::move(E));
+
+  setDataAccessProfileData(std::move(DataAccessProfileData));
 }
 } // namespace memprof
 } // namespace llvm

diff  --git a/llvm/test/tools/llvm-profdata/memprof-yaml.test b/llvm/test/tools/llvm-profdata/memprof-yaml.test
index 9766cc50f37d7..0caa1fe5d9fd3 100644
--- a/llvm/test/tools/llvm-profdata/memprof-yaml.test
+++ b/llvm/test/tools/llvm-profdata/memprof-yaml.test
@@ -1,12 +1,103 @@
 ; RUN: split-file %s %t
 ; COM: The text format only supports the latest version.
+
+; Verify that the YAML output is identical to the YAML input.
+; memprof-in.yaml has both heap profile records and data access profiles.
 ; RUN: llvm-profdata merge --memprof-version=4 %t/memprof-in.yaml -o %t/memprof-out.indexed
 ; RUN: llvm-profdata show --memory %t/memprof-out.indexed > %t/memprof-out.yaml
 ; RUN: 
diff  -b %t/memprof-in.yaml %t/memprof-out.yaml
 
-; Verify that the YAML output is identical to the YAML input.
+; Merge text profile as v3 binary profile. Test that the merged v3 profile
+; are identical to memprof-in-v3.yaml, and doesn't have callee guids or dap.
+; RUN: llvm-profdata merge --memprof-version=3 %t/memprof-in.yaml -o %t/memprof-out-v3.indexed
+; RUN: llvm-profdata show --memory %t/memprof-out-v3.indexed > %t/memprof-out-v3.yaml
+; RUN: 
diff  -b %t/memprof-out-v3.yaml %t/memprof-in-v3.yaml
+
+; memprof-in-no-dap.yaml has empty data access profiles.
+; RUN: llvm-profdata merge --memprof-version=4 %t/memprof-in-no-dap.yaml -o %t/memprof-out.indexed
+; RUN: llvm-profdata show --memory %t/memprof-out.indexed > %t/memprof-out-no-dap.yaml
+; RUN: 
diff  -b %t/memprof-in-no-dap.yaml %t/memprof-out-no-dap.yaml
+
 ;--- memprof-in.yaml
 ---
+HeapProfileRecords:
+  - GUID:            0xdeadbeef12345678
+    AllocSites:
+      - Callstack:
+          - { Function: 0x1111111111111111, LineOffset: 11, Column: 10, IsInlineFrame: true }
+          - { Function: 0x2222222222222222, LineOffset: 22, Column: 20, IsInlineFrame: false }
+        MemInfoBlock:
+          AllocCount:      111
+          TotalSize:       222
+          TotalLifetime:   333
+          TotalLifetimeAccessDensity: 444
+      - Callstack:
+          - { Function: 0x3333333333333333, LineOffset: 33, Column: 30, IsInlineFrame: false }
+          - { Function: 0x4444444444444444, LineOffset: 44, Column: 40, IsInlineFrame: true }
+        MemInfoBlock:
+          AllocCount:      555
+          TotalSize:       666
+          TotalLifetime:   777
+          TotalLifetimeAccessDensity: 888
+    CallSites:
+      - Frames:
+        - { Function: 0x5555555555555555, LineOffset: 55, Column: 50, IsInlineFrame: true }
+        - { Function: 0x6666666666666666, LineOffset: 66, Column: 60, IsInlineFrame: false }
+        CalleeGuids: [ 0x100, 0x200 ]
+      - Frames:
+        - { Function: 0x7777777777777777, LineOffset: 77, Column: 70, IsInlineFrame: true }
+        - { Function: 0x8888888888888888, LineOffset: 88, Column: 80, IsInlineFrame: false }
+        CalleeGuids: [ 0x300 ]
+DataAccessProfiles:
+  SampledRecords:
+    - Symbol:          abcde
+      AccessCount:     100
+      Locations:
+      - FileName:      file2.h
+        Line:          123
+      - FileName:      file3.cpp
+        Line:          456
+    - Hash:            101010
+      AccessCount:     200
+      Locations:
+        - FileName:        file.cpp
+          Line:            233
+  KnownColdSymbols:
+    - foo
+    - bar
+  KnownColdStrHashes: [ 999, 1001 ]
+...
+;--- memprof-in-v3.yaml
+---
+HeapProfileRecords:
+  - GUID:            0xdeadbeef12345678
+    AllocSites:
+      - Callstack:
+          - { Function: 0x1111111111111111, LineOffset: 11, Column: 10, IsInlineFrame: true }
+          - { Function: 0x2222222222222222, LineOffset: 22, Column: 20, IsInlineFrame: false }
+        MemInfoBlock:
+          AllocCount:      111
+          TotalSize:       222
+          TotalLifetime:   333
+          TotalLifetimeAccessDensity: 444
+      - Callstack:
+          - { Function: 0x3333333333333333, LineOffset: 33, Column: 30, IsInlineFrame: false }
+          - { Function: 0x4444444444444444, LineOffset: 44, Column: 40, IsInlineFrame: true }
+        MemInfoBlock:
+          AllocCount:      555
+          TotalSize:       666
+          TotalLifetime:   777
+          TotalLifetimeAccessDensity: 888
+    CallSites:
+      - Frames:
+        - { Function: 0x5555555555555555, LineOffset: 55, Column: 50, IsInlineFrame: true }
+        - { Function: 0x6666666666666666, LineOffset: 66, Column: 60, IsInlineFrame: false }
+      - Frames:
+        - { Function: 0x7777777777777777, LineOffset: 77, Column: 70, IsInlineFrame: true }
+        - { Function: 0x8888888888888888, LineOffset: 88, Column: 80, IsInlineFrame: false }
+...
+;--- memprof-in-no-dap.yaml
+---
 HeapProfileRecords:
   - GUID:            0xdeadbeef12345678
     AllocSites:

diff  --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 885e06df6c390..8660eed6be2bf 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Debuginfod/HTTPClient.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/Object/Binary.h"
+#include "llvm/ProfileData/DataAccessProf.h"
 #include "llvm/ProfileData/InstrProfCorrelator.h"
 #include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/ProfileData/InstrProfWriter.h"
@@ -756,6 +757,8 @@ loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
 
     auto MemProfData = Reader->takeMemProfData();
 
+    auto DataAccessProfData = Reader->takeDataAccessProfData();
+
     // Check for the empty input in case the YAML file is invalid.
     if (MemProfData.Records.empty()) {
       WC->Errors.emplace_back(
@@ -764,6 +767,7 @@ loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
     }
 
     WC->Writer.addMemProfData(std::move(MemProfData), MemProfError);
+    WC->Writer.addDataAccessProfData(std::move(DataAccessProfData));
     return;
   }
 

diff  --git a/llvm/unittests/ProfileData/DataAccessProfTest.cpp b/llvm/unittests/ProfileData/DataAccessProfTest.cpp
index 8866c16fe292a..13af3390557d7 100644
--- a/llvm/unittests/ProfileData/DataAccessProfTest.cpp
+++ b/llvm/unittests/ProfileData/DataAccessProfTest.cpp
@@ -14,7 +14,7 @@
 #include "gtest/gtest.h"
 
 namespace llvm {
-namespace data_access_prof {
+namespace memprof {
 namespace {
 
 using ::llvm::StringRef;
@@ -177,5 +177,5 @@ TEST(MemProf, DataAccessProfile) {
   }
 }
 } // namespace
-} // namespace data_access_prof
+} // namespace memprof
 } // namespace llvm


        


More information about the llvm-commits mailing list