[llvm-branch-commits] [llvm] 0c0257e - Revert "Revert "[StaticDataLayout][PGO]Implement reader and writer change for…"

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu May 22 17:22:20 PDT 2025


Author: Mingming Liu
Date: 2025-05-22T17:22:16-07:00
New Revision: 0c0257e6243af40acf9ba1a86373398a202a533f

URL: https://github.com/llvm/llvm-project/commit/0c0257e6243af40acf9ba1a86373398a202a533f
DIFF: https://github.com/llvm/llvm-project/commit/0c0257e6243af40acf9ba1a86373398a202a533f.diff

LOG: Revert "Revert "[StaticDataLayout][PGO]Implement reader and writer change for…"

This reverts commit d0acddbdd615f1503e88903570ee7484bd217615.

Added: 
    

Modified: 
    llvm/include/llvm/ProfileData/DataAccessProf.h
    llvm/include/llvm/ProfileData/IndexedMemProfData.h
    llvm/include/llvm/ProfileData/InstrProfReader.h
    llvm/include/llvm/ProfileData/InstrProfWriter.h
    llvm/include/llvm/ProfileData/MemProfReader.h
    llvm/include/llvm/ProfileData/MemProfYAML.h
    llvm/lib/ProfileData/DataAccessProf.cpp
    llvm/lib/ProfileData/IndexedMemProfData.cpp
    llvm/lib/ProfileData/InstrProfReader.cpp
    llvm/lib/ProfileData/InstrProfWriter.cpp
    llvm/lib/ProfileData/MemProfReader.cpp
    llvm/test/tools/llvm-profdata/memprof-yaml.test
    llvm/tools/llvm-profdata/llvm-profdata.cpp
    llvm/unittests/ProfileData/DataAccessProfTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ProfileData/DataAccessProf.h b/llvm/include/llvm/ProfileData/DataAccessProf.h
index 3cc8835a776dd..cd4b200486a3f 100644
--- a/llvm/include/llvm/ProfileData/DataAccessProf.h
+++ b/llvm/include/llvm/ProfileData/DataAccessProf.h
@@ -17,10 +17,8 @@
 #ifndef LLVM_PROFILEDATA_DATAACCESSPROF_H_
 #define LLVM_PROFILEDATA_DATAACCESSPROF_H_
 
-#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseMapInfoVariant.h"
 #include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
@@ -35,12 +33,15 @@
 
 namespace llvm {
 
-namespace data_access_prof {
+namespace memprof {
 
 /// The location of data in the source code. Used by profile lookup API.
 struct SourceLocation {
   SourceLocation(StringRef FileNameRef, uint32_t Line)
       : FileName(FileNameRef.str()), Line(Line) {}
+
+  // Empty constructor is used in yaml conversion.
+  SourceLocation() {}
   /// The filename where the data is located.
   std::string FileName;
   /// The line number in the source code.
@@ -53,6 +54,8 @@ namespace internal {
 // which strings are owned by `DataAccessProfData`. Used by `DataAccessProfData`
 // to represent data locations internally.
 struct SourceLocationRef {
+  SourceLocationRef(StringRef FileNameRef, uint32_t Line)
+      : FileName(FileNameRef), Line(Line) {}
   // The filename where the data is located.
   StringRef FileName;
   // The line number in the source code.
@@ -100,18 +103,21 @@ using SymbolHandle = std::variant<std::string, uint64_t>;
 /// The data access profiles for a symbol.
 struct DataAccessProfRecord {
 public:
-  DataAccessProfRecord(SymbolHandleRef SymHandleRef,
-                       ArrayRef<internal::SourceLocationRef> LocRefs) {
+  DataAccessProfRecord(SymbolHandleRef SymHandleRef, uint64_t AccessCount,
+                       ArrayRef<internal::SourceLocationRef> LocRefs)
+      : AccessCount(AccessCount) {
     if (std::holds_alternative<StringRef>(SymHandleRef)) {
       SymHandle = std::get<StringRef>(SymHandleRef).str();
     } else
       SymHandle = std::get<uint64_t>(SymHandleRef);
 
     for (auto Loc : LocRefs)
-      Locations.push_back(SourceLocation(Loc.FileName, Loc.Line));
+      Locations.emplace_back(Loc.FileName, Loc.Line);
   }
+  // Empty constructor is used in yaml conversion.
+  DataAccessProfRecord() {}
   SymbolHandle SymHandle;
-
+  uint64_t AccessCount;
   // The locations of data in the source code. Optional.
   SmallVector<SourceLocation> Locations;
 };
@@ -208,7 +214,7 @@ class DataAccessProfData {
   llvm::SetVector<StringRef> KnownColdSymbols;
 };
 
-} // namespace data_access_prof
+} // namespace memprof
 } // namespace llvm
 
 #endif // LLVM_PROFILEDATA_DATAACCESSPROF_H_

diff  --git a/llvm/include/llvm/ProfileData/IndexedMemProfData.h b/llvm/include/llvm/ProfileData/IndexedMemProfData.h
index f33b160e0b6a9..2b40094a9bc21 100644
--- a/llvm/include/llvm/ProfileData/IndexedMemProfData.h
+++ b/llvm/include/llvm/ProfileData/IndexedMemProfData.h
@@ -15,9 +15,13 @@
 #ifndef LLVM_PROFILEDATA_INDEXEDMEMPROFDATA_H
 #define LLVM_PROFILEDATA_INDEXEDMEMPROFDATA_H
 
+#include "llvm/ProfileData/DataAccessProf.h"
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/ProfileData/MemProf.h"
 
+#include <functional>
+#include <optional>
+
 namespace llvm {
 namespace memprof {
 struct IndexedMemProfData {
@@ -82,8 +86,10 @@ struct IndexedMemProfData {
 } // namespace memprof
 
 // Write the MemProf data to OS.
-Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
-                   memprof::IndexedVersion MemProfVersionRequested,
-                   bool MemProfFullSchema);
+Error writeMemProf(
+    ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
+    memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema,
+    std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData);
+
 } // namespace llvm
 #endif

diff  --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index c250a9ede39bc..d104ab51430d1 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/ProfileSummary.h"
 #include "llvm/Object/BuildID.h"
+#include "llvm/ProfileData/DataAccessProf.h"
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/ProfileData/InstrProfCorrelator.h"
 #include "llvm/ProfileData/MemProf.h"
@@ -703,10 +704,13 @@ class IndexedMemProfReader {
   const unsigned char *CallStackBase = nullptr;
   // The number of elements in the radix tree array.
   unsigned RadixTreeSize = 0;
+  /// The data access profiles, deserialized from binary data.
+  std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData;
 
   Error deserializeV2(const unsigned char *Start, const unsigned char *Ptr);
   Error deserializeRadixTreeBased(const unsigned char *Start,
-                                  const unsigned char *Ptr);
+                                  const unsigned char *Ptr,
+                                  memprof::IndexedVersion Version);
 
 public:
   IndexedMemProfReader() = default;

diff  --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h
index b72c901dbb5b2..cdb7afb623378 100644
--- a/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -19,6 +19,7 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/Object/BuildID.h"
+#include "llvm/ProfileData/DataAccessProf.h"
 #include "llvm/ProfileData/IndexedMemProfData.h"
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/Support/Error.h"
@@ -81,6 +82,8 @@ class InstrProfWriter {
   // Whether to generated random memprof hotness for testing.
   bool MemprofGenerateRandomHotness;
 
+  std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData;
+
 public:
   // For memprof testing, random hotness can be assigned to the contexts if
   // MemprofGenerateRandomHotness is enabled. The random seed can be either
@@ -122,6 +125,9 @@ class InstrProfWriter {
   // Add a binary id to the binary ids list.
   void addBinaryIds(ArrayRef<llvm::object::BuildID> BIs);
 
+  void addDataAccessProfData(
+      std::unique_ptr<memprof::DataAccessProfData> DataAccessProfile);
+
   /// Merge existing function counts from the given writer.
   void mergeRecordsFromWriter(InstrProfWriter &&IPW,
                               function_ref<void(Error)> Warn);

diff  --git a/llvm/include/llvm/ProfileData/MemProfReader.h b/llvm/include/llvm/ProfileData/MemProfReader.h
index 130493ec77c08..3bfcdf0f42cde 100644
--- a/llvm/include/llvm/ProfileData/MemProfReader.h
+++ b/llvm/include/llvm/ProfileData/MemProfReader.h
@@ -229,6 +229,20 @@ class YAMLMemProfReader final : public MemProfReader {
   create(std::unique_ptr<MemoryBuffer> Buffer);
 
   void parse(StringRef YAMLData);
+
+  std::unique_ptr<memprof::DataAccessProfData> takeDataAccessProfData() {
+    return std::move(DataAccessProfileData);
+  }
+
+private:
+  // Called by `parse` to set data access profiles after parsing them from Yaml
+  // files.
+  void
+  setDataAccessProfileData(std::unique_ptr<memprof::DataAccessProfData> Data) {
+    DataAccessProfileData = std::move(Data);
+  }
+
+  std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData;
 };
 } // namespace memprof
 } // namespace llvm

diff  --git a/llvm/include/llvm/ProfileData/MemProfYAML.h b/llvm/include/llvm/ProfileData/MemProfYAML.h
index b642e3098aa0e..f8dc659f66662 100644
--- a/llvm/include/llvm/ProfileData/MemProfYAML.h
+++ b/llvm/include/llvm/ProfileData/MemProfYAML.h
@@ -2,6 +2,7 @@
 #define LLVM_PROFILEDATA_MEMPROFYAML_H_
 
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ProfileData/DataAccessProf.h"
 #include "llvm/ProfileData/MemProf.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/YAMLTraits.h"
@@ -20,9 +21,24 @@ struct GUIDMemProfRecordPair {
   MemProfRecord Record;
 };
 
+// Helper struct to yamlify memprof::DataAccessProfData. The struct
+// members use owned strings. This is for simplicity and assumes that most real
+// world use cases do look-ups and regression test scale is small.
+struct YamlDataAccessProfData {
+  std::vector<memprof::DataAccessProfRecord> Records;
+  std::vector<uint64_t> KnownColdStrHashes;
+  std::vector<std::string> KnownColdSymbols;
+
+  bool isEmpty() const {
+    return Records.empty() && KnownColdStrHashes.empty() &&
+           KnownColdSymbols.empty();
+  }
+};
+
 // The top-level data structure, only used with YAML for now.
 struct AllMemProfData {
   std::vector<GUIDMemProfRecordPair> HeapProfileRecords;
+  YamlDataAccessProfData YamlifiedDataAccessProfiles;
 };
 } // namespace memprof
 
@@ -206,9 +222,52 @@ template <> struct MappingTraits<memprof::GUIDMemProfRecordPair> {
   }
 };
 
+template <> struct MappingTraits<memprof::SourceLocation> {
+  static void mapping(IO &Io, memprof::SourceLocation &Loc) {
+    Io.mapOptional("FileName", Loc.FileName);
+    Io.mapOptional("Line", Loc.Line);
+  }
+};
+
+template <> struct MappingTraits<memprof::DataAccessProfRecord> {
+  static void mapping(IO &Io, memprof::DataAccessProfRecord &Rec) {
+    if (Io.outputting()) {
+      if (std::holds_alternative<std::string>(Rec.SymHandle)) {
+        Io.mapOptional("Symbol", std::get<std::string>(Rec.SymHandle));
+      } else {
+        Io.mapOptional("Hash", std::get<uint64_t>(Rec.SymHandle));
+      }
+    } else {
+      std::string SymName;
+      uint64_t Hash = 0;
+      Io.mapOptional("Symbol", SymName);
+      Io.mapOptional("Hash", Hash);
+      if (!SymName.empty()) {
+        Rec.SymHandle = SymName;
+      } else {
+        Rec.SymHandle = Hash;
+      }
+    }
+
+    Io.mapOptional("Locations", Rec.Locations);
+  }
+};
+
+template <> struct MappingTraits<memprof::YamlDataAccessProfData> {
+  static void mapping(IO &Io, memprof::YamlDataAccessProfData &Data) {
+    Io.mapOptional("SampledRecords", Data.Records);
+    Io.mapOptional("KnownColdSymbols", Data.KnownColdSymbols);
+    Io.mapOptional("KnownColdStrHashes", Data.KnownColdStrHashes);
+  }
+};
+
 template <> struct MappingTraits<memprof::AllMemProfData> {
   static void mapping(IO &Io, memprof::AllMemProfData &Data) {
     Io.mapRequired("HeapProfileRecords", Data.HeapProfileRecords);
+    // Map data access profiles if reading input, or if writing output &&
+    // the struct is populated.
+    if (!Io.outputting() || !Data.YamlifiedDataAccessProfiles.isEmpty())
+      Io.mapOptional("DataAccessProfiles", Data.YamlifiedDataAccessProfiles);
   }
 };
 
@@ -234,5 +293,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::AllocationInfo)
 LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::CallSiteInfo)
 LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDMemProfRecordPair)
 LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDHex64) // Used for CalleeGuids
+LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::DataAccessProfRecord)
+LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::SourceLocation)
 
 #endif // LLVM_PROFILEDATA_MEMPROFYAML_H_

diff  --git a/llvm/lib/ProfileData/DataAccessProf.cpp b/llvm/lib/ProfileData/DataAccessProf.cpp
index a31f3db0621fb..090dcb3dcc1b9 100644
--- a/llvm/lib/ProfileData/DataAccessProf.cpp
+++ b/llvm/lib/ProfileData/DataAccessProf.cpp
@@ -11,7 +11,7 @@
 #include <sys/types.h>
 
 namespace llvm {
-namespace data_access_prof {
+namespace memprof {
 
 // If `Map` has an entry keyed by `Str`, returns the entry iterator. Otherwise,
 // creates an owned copy of `Str`, adds a map entry for it and returns the
@@ -48,7 +48,8 @@ DataAccessProfData::getProfileRecord(const SymbolHandleRef SymbolID) const {
 
   auto It = Records.find(Key);
   if (It != Records.end()) {
-    return DataAccessProfRecord(Key, It->second.Locations);
+    return DataAccessProfRecord(Key, It->second.AccessCount,
+                                It->second.Locations);
   }
 
   return std::nullopt;
@@ -261,5 +262,5 @@ Error DataAccessProfData::deserializeRecords(const unsigned char *&Ptr) {
   }
   return Error::success();
 }
-} // namespace data_access_prof
+} // namespace memprof
 } // namespace llvm

diff  --git a/llvm/lib/ProfileData/IndexedMemProfData.cpp b/llvm/lib/ProfileData/IndexedMemProfData.cpp
index 59e59720179af..7398e4c468bbe 100644
--- a/llvm/lib/ProfileData/IndexedMemProfData.cpp
+++ b/llvm/lib/ProfileData/IndexedMemProfData.cpp
@@ -10,6 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ProfileData/DataAccessProf.h"
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/ProfileData/MemProf.h"
@@ -217,7 +218,9 @@ static Error writeMemProfV2(ProfOStream &OS,
 
 static Error writeMemProfRadixTreeBased(
     ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
-    memprof::IndexedVersion Version, bool MemProfFullSchema) {
+    memprof::IndexedVersion Version, bool MemProfFullSchema,
+    std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData =
+        nullptr) {
   assert((Version == memprof::Version3 || Version == memprof::Version4) &&
          "Unsupported version for radix tree format");
 
@@ -226,6 +229,8 @@ static Error writeMemProfRadixTreeBased(
   OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
   OS.write(0ULL); // Reserve space for the memprof record payload offset.
   OS.write(0ULL); // Reserve space for the memprof record table offset.
+  if (Version >= memprof::Version4)
+    OS.write(0ULL); // Reserve space for the data access profile offset.
 
   auto Schema = memprof::getHotColdSchema();
   if (MemProfFullSchema)
@@ -252,17 +257,29 @@ static Error writeMemProfRadixTreeBased(
   uint64_t RecordTableOffset = writeMemProfRecords(
       OS, MemProfData.Records, &Schema, Version, &MemProfCallStackIndexes);
 
+  uint64_t DataAccessProfOffset = 0;
+  if (DataAccessProfileData != nullptr) {
+    assert(Version >= memprof::Version4 &&
+           "Data access profiles are added starting from v4");
+    DataAccessProfOffset = OS.tell();
+    if (Error E = DataAccessProfileData->serialize(OS))
+      return E;
+  }
+
   // Verify that the computation for the number of elements in the call stack
   // array works.
   assert(CallStackPayloadOffset +
              NumElements * sizeof(memprof::LinearFrameId) ==
          RecordPayloadOffset);
 
-  uint64_t Header[] = {
+  SmallVector<uint64_t, 4> Header = {
       CallStackPayloadOffset,
       RecordPayloadOffset,
       RecordTableOffset,
   };
+  if (Version >= memprof::Version4)
+    Header.push_back(DataAccessProfOffset);
+
   OS.patch({{HeaderUpdatePos, Header}});
 
   return Error::success();
@@ -277,24 +294,28 @@ static Error writeMemProfV3(ProfOStream &OS,
 }
 
 // Write out MemProf Version4
-static Error writeMemProfV4(ProfOStream &OS,
-                            memprof::IndexedMemProfData &MemProfData,
-                            bool MemProfFullSchema) {
+static Error writeMemProfV4(
+    ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
+    bool MemProfFullSchema,
+    std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData) {
   return writeMemProfRadixTreeBased(OS, MemProfData, memprof::Version4,
-                                    MemProfFullSchema);
+                                    MemProfFullSchema,
+                                    std::move(DataAccessProfileData));
 }
 
 // Write out the MemProf data in a requested version.
-Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
-                   memprof::IndexedVersion MemProfVersionRequested,
-                   bool MemProfFullSchema) {
+Error writeMemProf(
+    ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
+    memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema,
+    std::unique_ptr<memprof::DataAccessProfData> DataAccessProfileData) {
   switch (MemProfVersionRequested) {
   case memprof::Version2:
     return writeMemProfV2(OS, MemProfData, MemProfFullSchema);
   case memprof::Version3:
     return writeMemProfV3(OS, MemProfData, MemProfFullSchema);
   case memprof::Version4:
-    return writeMemProfV4(OS, MemProfData, MemProfFullSchema);
+    return writeMemProfV4(OS, MemProfData, MemProfFullSchema,
+                          std::move(DataAccessProfileData));
   }
 
   return make_error<InstrProfError>(
@@ -358,7 +379,10 @@ Error IndexedMemProfReader::deserializeV2(const unsigned char *Start,
 }
 
 Error IndexedMemProfReader::deserializeRadixTreeBased(
-    const unsigned char *Start, const unsigned char *Ptr) {
+    const unsigned char *Start, const unsigned char *Ptr,
+    memprof::IndexedVersion Version) {
+  assert((Version == memprof::Version3 || Version == memprof::Version4) &&
+         "Unsupported version for radix tree format");
   // The offset in the stream right before invoking
   // CallStackTableGenerator.Emit.
   const uint64_t CallStackPayloadOffset =
@@ -370,6 +394,11 @@ Error IndexedMemProfReader::deserializeRadixTreeBased(
   const uint64_t RecordTableOffset =
       support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
 
+  uint64_t DataAccessProfOffset = 0;
+  if (Version == memprof::Version4)
+    DataAccessProfOffset =
+        support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+
   // Read the schema.
   auto SchemaOr = memprof::readMemProfSchema(Ptr);
   if (!SchemaOr)
@@ -391,6 +420,15 @@ Error IndexedMemProfReader::deserializeRadixTreeBased(
       /*Payload=*/Start + RecordPayloadOffset,
       /*Base=*/Start, memprof::RecordLookupTrait(Version, Schema)));
 
+  assert((!DataAccessProfOffset || DataAccessProfOffset > RecordTableOffset) &&
+         "Data access profile is either empty or after the record table");
+  if (DataAccessProfOffset > RecordTableOffset) {
+    DataAccessProfileData = std::make_unique<memprof::DataAccessProfData>();
+    const unsigned char *DAPPtr = Start + DataAccessProfOffset;
+    if (Error E = DataAccessProfileData->deserialize(DAPPtr))
+      return E;
+  }
+
   return Error::success();
 }
 
@@ -424,7 +462,7 @@ Error IndexedMemProfReader::deserialize(const unsigned char *Start,
   case memprof::Version3:
   case memprof::Version4:
     // V3 and V4 share the same high-level structure (radix tree, linear IDs).
-    if (Error E = deserializeRadixTreeBased(Start, Ptr))
+    if (Error E = deserializeRadixTreeBased(Start, Ptr, Version))
       return E;
     break;
   }

diff  --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index a1eb08362087f..ab109cd5b13a7 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -1552,6 +1552,20 @@ memprof::AllMemProfData IndexedMemProfReader::getAllMemProfData() const {
     Pair.Record = std::move(*Record);
     AllMemProfData.HeapProfileRecords.push_back(std::move(Pair));
   }
+  // Populate the data access profiles for yaml output.
+  if (DataAccessProfileData != nullptr) {
+    for (const auto &[SymHandleRef, RecordRef] :
+         DataAccessProfileData->getRecords())
+      AllMemProfData.YamlifiedDataAccessProfiles.Records.push_back(
+          memprof::DataAccessProfRecord(SymHandleRef, RecordRef.AccessCount,
+                                        RecordRef.Locations));
+    for (StringRef ColdSymbol : DataAccessProfileData->getKnownColdSymbols())
+      AllMemProfData.YamlifiedDataAccessProfiles.KnownColdSymbols.push_back(
+          ColdSymbol.str());
+    for (uint64_t Hash : DataAccessProfileData->getKnownColdHashes())
+      AllMemProfData.YamlifiedDataAccessProfiles.KnownColdStrHashes.push_back(
+          Hash);
+  }
   return AllMemProfData;
 }
 

diff  --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 2c6640eedebd9..039e1bc955cd4 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -16,6 +16,7 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/ProfileSummary.h"
+#include "llvm/ProfileData/DataAccessProf.h"
 #include "llvm/ProfileData/IndexedMemProfData.h"
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/ProfileData/ProfileCommon.h"
@@ -320,6 +321,11 @@ void InstrProfWriter::addBinaryIds(ArrayRef<llvm::object::BuildID> BIs) {
   llvm::append_range(BinaryIds, BIs);
 }
 
+void InstrProfWriter::addDataAccessProfData(
+    std::unique_ptr<memprof::DataAccessProfData> DataAccessProfDataIn) {
+  DataAccessProfileData = std::move(DataAccessProfDataIn);
+}
+
 void InstrProfWriter::addTemporalProfileTrace(TemporalProfTraceTy Trace) {
   assert(Trace.FunctionNameRefs.size() <= MaxTemporalProfTraceLength);
   assert(!Trace.FunctionNameRefs.empty());
@@ -605,8 +611,11 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   uint64_t MemProfSectionStart = 0;
   if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf)) {
     MemProfSectionStart = OS.tell();
-    if (auto E = writeMemProf(OS, MemProfData, MemProfVersionRequested,
-                              MemProfFullSchema))
+
+    if (auto E =
+            writeMemProf(OS, MemProfData, MemProfVersionRequested,
+                         MemProfFullSchema, std::move(DataAccessProfileData)))
+
       return E;
   }
 

diff  --git a/llvm/lib/ProfileData/MemProfReader.cpp b/llvm/lib/ProfileData/MemProfReader.cpp
index d6bc4fdf5e448..9c723e495e7f9 100644
--- a/llvm/lib/ProfileData/MemProfReader.cpp
+++ b/llvm/lib/ProfileData/MemProfReader.cpp
@@ -37,6 +37,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 
@@ -822,6 +823,34 @@ void YAMLMemProfReader::parse(StringRef YAMLData) {
 
     MemProfData.Records.try_emplace(GUID, std::move(IndexedRecord));
   }
+
+  if (Doc.YamlifiedDataAccessProfiles.isEmpty())
+    return;
+
+  auto ToSymHandleRef =
+      [](const memprof::SymbolHandle &Handle) -> memprof::SymbolHandleRef {
+    if (std::holds_alternative<std::string>(Handle))
+      return StringRef(std::get<std::string>(Handle));
+    return std::get<uint64_t>(Handle);
+  };
+
+  auto DataAccessProfileData = std::make_unique<memprof::DataAccessProfData>();
+  for (const auto &Record : Doc.YamlifiedDataAccessProfiles.Records)
+    if (Error E = DataAccessProfileData->setDataAccessProfile(
+            ToSymHandleRef(Record.SymHandle), Record.AccessCount,
+            Record.Locations))
+      reportFatalInternalError(std::move(E));
+
+  for (const uint64_t Hash : Doc.YamlifiedDataAccessProfiles.KnownColdStrHashes)
+    if (Error E = DataAccessProfileData->addKnownSymbolWithoutSamples(Hash))
+      reportFatalInternalError(std::move(E));
+
+  for (const std::string &Sym :
+       Doc.YamlifiedDataAccessProfiles.KnownColdSymbols)
+    if (Error E = DataAccessProfileData->addKnownSymbolWithoutSamples(Sym))
+      reportFatalInternalError(std::move(E));
+
+  setDataAccessProfileData(std::move(DataAccessProfileData));
 }
 } // namespace memprof
 } // namespace llvm

diff  --git a/llvm/test/tools/llvm-profdata/memprof-yaml.test b/llvm/test/tools/llvm-profdata/memprof-yaml.test
index 9766cc50f37d7..5beda8c036a12 100644
--- a/llvm/test/tools/llvm-profdata/memprof-yaml.test
+++ b/llvm/test/tools/llvm-profdata/memprof-yaml.test
@@ -1,12 +1,101 @@
 ; RUN: split-file %s %t
 ; COM: The text format only supports the latest version.
+
+; Verify that the YAML output is identical to the YAML input.
+; memprof-in.yaml has both heap profile records and data access profiles.
 ; RUN: llvm-profdata merge --memprof-version=4 %t/memprof-in.yaml -o %t/memprof-out.indexed
 ; RUN: llvm-profdata show --memory %t/memprof-out.indexed > %t/memprof-out.yaml
 ; RUN: 
diff  -b %t/memprof-in.yaml %t/memprof-out.yaml
 
-; Verify that the YAML output is identical to the YAML input.
+; Merge text profile as v3 binary profile. Test that the merged v3 profile
+; are identical to memprof-in-v3.yaml, and doesn't have callee guids or dap.
+; RUN: llvm-profdata merge --memprof-version=3 %t/memprof-in.yaml -o %t/memprof-out-v3.indexed
+; RUN: llvm-profdata show --memory %t/memprof-out-v3.indexed > %t/memprof-out-v3.yaml
+; RUN: 
diff  -b %t/memprof-out-v3.yaml %t/memprof-in-v3.yaml
+
+; memprof-in-no-dap.yaml has empty data access profiles.
+; RUN: llvm-profdata merge --memprof-version=4 %t/memprof-in-no-dap.yaml -o %t/memprof-out.indexed
+; RUN: llvm-profdata show --memory %t/memprof-out.indexed > %t/memprof-out-no-dap.yaml
+; RUN: 
diff  -b %t/memprof-in-no-dap.yaml %t/memprof-out-no-dap.yaml
+
 ;--- memprof-in.yaml
 ---
+HeapProfileRecords:
+  - GUID:            0xdeadbeef12345678
+    AllocSites:
+      - Callstack:
+          - { Function: 0x1111111111111111, LineOffset: 11, Column: 10, IsInlineFrame: true }
+          - { Function: 0x2222222222222222, LineOffset: 22, Column: 20, IsInlineFrame: false }
+        MemInfoBlock:
+          AllocCount:      111
+          TotalSize:       222
+          TotalLifetime:   333
+          TotalLifetimeAccessDensity: 444
+      - Callstack:
+          - { Function: 0x3333333333333333, LineOffset: 33, Column: 30, IsInlineFrame: false }
+          - { Function: 0x4444444444444444, LineOffset: 44, Column: 40, IsInlineFrame: true }
+        MemInfoBlock:
+          AllocCount:      555
+          TotalSize:       666
+          TotalLifetime:   777
+          TotalLifetimeAccessDensity: 888
+    CallSites:
+      - Frames:
+        - { Function: 0x5555555555555555, LineOffset: 55, Column: 50, IsInlineFrame: true }
+        - { Function: 0x6666666666666666, LineOffset: 66, Column: 60, IsInlineFrame: false }
+        CalleeGuids: [ 0x100, 0x200 ]
+      - Frames:
+        - { Function: 0x7777777777777777, LineOffset: 77, Column: 70, IsInlineFrame: true }
+        - { Function: 0x8888888888888888, LineOffset: 88, Column: 80, IsInlineFrame: false }
+        CalleeGuids: [ 0x300 ]
+DataAccessProfiles:
+  SampledRecords:
+    - Symbol:          abcde
+      Locations:
+      - FileName:      file2.h
+        Line:          123
+      - FileName:      file3.cpp
+        Line:          456
+    - Hash:            101010
+      Locations:
+        - FileName:        file.cpp
+          Line:            233
+  KnownColdSymbols:
+    - foo
+    - bar
+  KnownColdStrHashes: [ 999, 1001 ]
+...
+;--- memprof-in-v3.yaml
+---
+HeapProfileRecords:
+  - GUID:            0xdeadbeef12345678
+    AllocSites:
+      - Callstack:
+          - { Function: 0x1111111111111111, LineOffset: 11, Column: 10, IsInlineFrame: true }
+          - { Function: 0x2222222222222222, LineOffset: 22, Column: 20, IsInlineFrame: false }
+        MemInfoBlock:
+          AllocCount:      111
+          TotalSize:       222
+          TotalLifetime:   333
+          TotalLifetimeAccessDensity: 444
+      - Callstack:
+          - { Function: 0x3333333333333333, LineOffset: 33, Column: 30, IsInlineFrame: false }
+          - { Function: 0x4444444444444444, LineOffset: 44, Column: 40, IsInlineFrame: true }
+        MemInfoBlock:
+          AllocCount:      555
+          TotalSize:       666
+          TotalLifetime:   777
+          TotalLifetimeAccessDensity: 888
+    CallSites:
+      - Frames:
+        - { Function: 0x5555555555555555, LineOffset: 55, Column: 50, IsInlineFrame: true }
+        - { Function: 0x6666666666666666, LineOffset: 66, Column: 60, IsInlineFrame: false }
+      - Frames:
+        - { Function: 0x7777777777777777, LineOffset: 77, Column: 70, IsInlineFrame: true }
+        - { Function: 0x8888888888888888, LineOffset: 88, Column: 80, IsInlineFrame: false }
+...
+;--- memprof-in-no-dap.yaml
+---
 HeapProfileRecords:
   - GUID:            0xdeadbeef12345678
     AllocSites:

diff  --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 885e06df6c390..8660eed6be2bf 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Debuginfod/HTTPClient.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/Object/Binary.h"
+#include "llvm/ProfileData/DataAccessProf.h"
 #include "llvm/ProfileData/InstrProfCorrelator.h"
 #include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/ProfileData/InstrProfWriter.h"
@@ -756,6 +757,8 @@ loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
 
     auto MemProfData = Reader->takeMemProfData();
 
+    auto DataAccessProfData = Reader->takeDataAccessProfData();
+
     // Check for the empty input in case the YAML file is invalid.
     if (MemProfData.Records.empty()) {
       WC->Errors.emplace_back(
@@ -764,6 +767,7 @@ loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
     }
 
     WC->Writer.addMemProfData(std::move(MemProfData), MemProfError);
+    WC->Writer.addDataAccessProfData(std::move(DataAccessProfData));
     return;
   }
 

diff  --git a/llvm/unittests/ProfileData/DataAccessProfTest.cpp b/llvm/unittests/ProfileData/DataAccessProfTest.cpp
index 8866c16fe292a..13af3390557d7 100644
--- a/llvm/unittests/ProfileData/DataAccessProfTest.cpp
+++ b/llvm/unittests/ProfileData/DataAccessProfTest.cpp
@@ -14,7 +14,7 @@
 #include "gtest/gtest.h"
 
 namespace llvm {
-namespace data_access_prof {
+namespace memprof {
 namespace {
 
 using ::llvm::StringRef;
@@ -177,5 +177,5 @@ TEST(MemProf, DataAccessProfile) {
   }
 }
 } // namespace
-} // namespace data_access_prof
+} // namespace memprof
 } // namespace llvm


        


More information about the llvm-branch-commits mailing list