[llvm] [StaticDataLayout][PGO] Add profile format for static data layout, and the classes to operate on the profiles. (PR #138170)
Mingming Liu via llvm-commits
llvm-commits at lists.llvm.org
Mon May 12 22:17:59 PDT 2025
================
@@ -0,0 +1,167 @@
+//===- DataAccessProf.h - Data access profile format support ---------*- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support to construct and use data access profiles.
+//
+// For the original RFC of this pass please see
+// https://discourse.llvm.org/t/rfc-profile-guided-static-data-partitioning/83744
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PROFILEDATA_DATAACCESSPROF_H_
+#define LLVM_PROFILEDATA_DATAACCESSPROF_H_
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseMapInfoVariant.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/StringSaver.h"
+
+#include <cstdint>
+#include <variant>
+
+namespace llvm {
+
+namespace data_access_prof {
+// The location of data in the source code.
+struct DataLocation {
+ // The filename where the data is located.
+ StringRef FileName;
+ // The line number in the source code.
+ uint32_t Line;
+};
+
+// The data access profiles for a symbol.
+struct DataAccessProfRecord {
+ DataAccessProfRecord(uint64_t SymbolID, uint64_t AccessCount,
+ bool IsStringLiteral)
+ : SymbolID(SymbolID), AccessCount(AccessCount),
+ IsStringLiteral(IsStringLiteral) {}
+
+ // Represents a data symbol. The semantic comes in two forms: a symbol index
+ // for symbol name if `IsStringLiteral` is false, or the hash of a string
+ // content if `IsStringLiteral` is true. For most of the symbolizable static
+ // data, the mangled symbol names remain stable relative to the source code
+ // and therefore used to identify symbols across binary releases. String
+ // literals have unstable name patterns like `.str.N[.llvm.hash]`, so we use
+ // the content hash instead. This is a required field.
+ uint64_t SymbolID;
+
+ // The access count of symbol. Required.
+ uint64_t AccessCount;
+
+ // True iff this is a record for string literal (symbols with name pattern
+ // `.str.*` in the symbol table). Required.
+ bool IsStringLiteral;
+
+ // The locations of data in the source code. Optional.
+ llvm::SmallVector<DataLocation, 0> Locations;
+};
+
+/// Encapsulates the data access profile data and the methods to operate on it.
+/// This class provides profile look-up, serialization and deserialization.
+class DataAccessProfData {
+public:
+ // SymbolID is either a string representing symbol name if the symbol has
+ // stable mangled name relative to source code, or a uint64_t representing the
+ // content hash of a string literal (with unstable name patterns like
+ // `.str.N[.llvm.hash]`). The StringRef is owned by the class's saver object.
+ using SymbolHandle = std::variant<StringRef, uint64_t>;
+ using StringToIndexMap = llvm::MapVector<StringRef, uint64_t>;
+
+ DataAccessProfData() : Saver(Allocator) {}
+
+ /// Serialize profile data to the output stream.
+ /// Storage layout:
+ /// - Serialized strings.
+ /// - The encoded hashes.
+ /// - Records.
+ Error serialize(ProfOStream &OS) const;
+
+ /// Deserialize this class from the given buffer.
+ Error deserialize(const unsigned char *&Ptr);
+
+ /// Returns a pointer of profile record for \p SymbolID, or nullptr if there
+ /// isn't a record. Internally, this function will canonicalize the symbol
+ /// name before the lookup.
+ const DataAccessProfRecord *getProfileRecord(const SymbolHandle SymID) const;
----------------
mingmingl-llvm wrote:
Updated the look-up API to return a record with owned strings (i.e., of type `std::string`), and the internal records with referenced strings. Please take another look, thanks!
https://github.com/llvm/llvm-project/pull/138170
More information about the llvm-commits
mailing list