[llvm] c93b3a3 - [MemProf] Extend MemProfUse pass to make use of data access profiles to partition data (#151238)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 27 20:43:41 PDT 2025
Author: Mingming Liu
Date: 2025-08-27T23:43:37-04:00
New Revision: c93b3a345406f0ee6cef28b8b85a94d734b40fb0
URL: https://github.com/llvm/llvm-project/commit/c93b3a345406f0ee6cef28b8b85a94d734b40fb0
DIFF: https://github.com/llvm/llvm-project/commit/c93b3a345406f0ee6cef28b8b85a94d734b40fb0.diff
LOG: [MemProf] Extend MemProfUse pass to make use of data access profiles to partition data (#151238)
https://github.com/llvm/llvm-project/commit/f3f28323adbb9d01372d81b4c78ed94683e58757
introduces the data access profile format as a payload inside
[memprof](https://llvm.org/docs/InstrProfileFormat.html#memprof-profile-data),
and the MemProfUse pass reads the memprof payload.
This change extends the MemProfUse pass to read the data access profiles
to annotate global variables' section prefix.
1. If there are samples for a global variable, it's annotated as hot.
2. If a global variable is seen in the profiled binary file but doesn't
have access samples, it's annotated as unlikely.
Introduce an option `annotate-static-data-prefix` to flag-gate the
global-variable annotation path, and make it false by default.
https://github.com/llvm/llvm-project/pull/155337 is the (WIP) draft
change to "reconcile" two sources of hotness.
Added:
llvm/test/Transforms/PGOProfile/data-access-profile.ll
Modified:
llvm/include/llvm/ProfileData/InstrProfReader.h
llvm/include/llvm/Transforms/Instrumentation/MemProfUse.h
llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index deb5cd17d8fd9..134195059f9e6 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -729,6 +729,11 @@ class IndexedMemProfReader {
LLVM_ABI DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>>
getMemProfCallerCalleePairs() const;
+ // Returns non-owned pointer to data access profile data.
+ LLVM_ABI memprof::DataAccessProfData *getDataAccessProfileData() const {
+ return DataAccessProfileData.get();
+ }
+
// Return the entire MemProf profile.
LLVM_ABI memprof::AllMemProfData getAllMemProfData() const;
@@ -900,6 +905,12 @@ class LLVM_ABI IndexedInstrProfReader : public InstrProfReader {
return MemProfReader.getSummary();
}
+ /// Returns non-owned pointer to the data access profile data.
+ /// Will be null if unavailable (version < 4).
+ memprof::DataAccessProfData *getDataAccessProfileData() const {
+ return MemProfReader.getDataAccessProfileData();
+ }
+
Error readBinaryIds(std::vector<llvm::object::BuildID> &BinaryIds) override;
Error printBinaryIds(raw_ostream &OS) override;
};
diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemProfUse.h b/llvm/include/llvm/Transforms/Instrumentation/MemProfUse.h
index 6170bf48e4695..1fbb2bcb194ef 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/MemProfUse.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/MemProfUse.h
@@ -14,6 +14,7 @@
#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/ProfileData/DataAccessProf.h"
#include "llvm/ProfileData/MemProf.h"
#include "llvm/Support/Compiler.h"
@@ -36,6 +37,11 @@ class MemProfUsePass : public PassInfoMixin<MemProfUsePass> {
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
private:
+ // Annotate global variables' section prefix based on data access profile,
+ // return true if any global variable is annotated and false otherwise.
+ bool
+ annotateGlobalVariables(Module &M,
+ const memprof::DataAccessProfData *DataAccessProf);
std::string MemoryProfileFileName;
IntrusiveRefCntPtr<vfs::FileSystem> FS;
};
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
index a9a0731f16d90..ecb2f2dbc552b 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/ProfileData/DataAccessProf.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/ProfileData/MemProfCommon.h"
@@ -75,6 +76,10 @@ static cl::opt<unsigned> MinMatchedColdBytePercent(
"memprof-matching-cold-threshold", cl::init(100), cl::Hidden,
cl::desc("Min percent of cold bytes matched to hint allocation cold"));
+static cl::opt<bool> AnnotateStaticDataSectionPrefix(
+ "memprof-annotate-static-data-prefix", cl::init(false), cl::Hidden,
+ cl::desc("If true, annotate the static data section prefix"));
+
// Matching statistics
STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
STATISTIC(NumOfMemProfMismatch,
@@ -90,6 +95,14 @@ STATISTIC(NumOfMemProfMatchedAllocs,
"Number of matched memory profile allocs.");
STATISTIC(NumOfMemProfMatchedCallSites,
"Number of matched memory profile callsites.");
+STATISTIC(NumOfMemProfHotGlobalVars,
+ "Number of global vars annotated with 'hot' section prefix.");
+STATISTIC(NumOfMemProfColdGlobalVars,
+ "Number of global vars annotated with 'unlikely' section prefix.");
+STATISTIC(NumOfMemProfUnknownGlobalVars,
+ "Number of global vars with unknown hotness (no section prefix).");
+STATISTIC(NumOfMemProfExplicitSectionGlobalVars,
+ "Number of global vars with user-specified section (not annotated).");
static void addCallsiteMetadata(Instruction &I,
ArrayRef<uint64_t> InlinedCallStack,
@@ -674,11 +687,12 @@ MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile,
}
PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
- // Return immediately if the module doesn't contain any function.
- if (M.empty())
+ // Return immediately if the module doesn't contain any function or global
+ // variables.
+ if (M.empty() && M.globals().empty())
return PreservedAnalyses::all();
- LLVM_DEBUG(dbgs() << "Read in memory profile:");
+ LLVM_DEBUG(dbgs() << "Read in memory profile:\n");
auto &Ctx = M.getContext();
auto ReaderOrErr = IndexedInstrProfReader::create(MemoryProfileFileName, *FS);
if (Error E = ReaderOrErr.takeError()) {
@@ -703,6 +717,14 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
return PreservedAnalyses::all();
}
+ const bool Changed =
+ annotateGlobalVariables(M, MemProfReader->getDataAccessProfileData());
+
+ // If the module doesn't contain any function, return after we process all
+ // global variables.
+ if (M.empty())
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+
auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(*M.begin());
@@ -752,3 +774,95 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
return PreservedAnalyses::none();
}
+
+// Returns true iff the global variable has custom section either by
+// __attribute__((section("name")))
+// (https://clang.llvm.org/docs/AttributeReference.html#section-declspec-allocate)
+// or #pragma clang section directives
+// (https://clang.llvm.org/docs/LanguageExtensions.html#specifying-section-names-for-global-objects-pragma-clang-section).
+static bool hasExplicitSectionName(const GlobalVariable &GVar) {
+ if (GVar.hasSection())
+ return true;
+
+ auto Attrs = GVar.getAttributes();
+ if (Attrs.hasAttribute("bss-section") || Attrs.hasAttribute("data-section") ||
+ Attrs.hasAttribute("relro-section") ||
+ Attrs.hasAttribute("rodata-section"))
+ return true;
+ return false;
+}
+
+bool MemProfUsePass::annotateGlobalVariables(
+ Module &M, const memprof::DataAccessProfData *DataAccessProf) {
+ if (!AnnotateStaticDataSectionPrefix || M.globals().empty())
+ return false;
+
+ if (!DataAccessProf) {
+ M.getContext().diagnose(DiagnosticInfoPGOProfile(
+ MemoryProfileFileName.data(),
+ StringRef("Data access profiles not found in memprof. Ignore "
+ "-memprof-annotate-static-data-prefix."),
+ DS_Warning));
+ return false;
+ }
+
+ bool Changed = false;
+ // Iterate all global variables in the module and annotate them based on
+ // data access profiles. Note it's up to the linker to decide how to map input
+ // sections to output sections, and one conservative practice is to map
+ // unlikely-prefixed ones to unlikely output section, and map the rest
+ // (hot-prefixed or prefix-less) to the canonical output section.
+ for (GlobalVariable &GVar : M.globals()) {
+ assert(!GVar.getSectionPrefix().has_value() &&
+ "GVar shouldn't have section prefix yet");
+ if (GVar.isDeclarationForLinker())
+ continue;
+
+ if (hasExplicitSectionName(GVar)) {
+ ++NumOfMemProfExplicitSectionGlobalVars;
+ LLVM_DEBUG(dbgs() << "Global variable " << GVar.getName()
+ << " has explicit section name. Skip annotating.\n");
+ continue;
+ }
+
+ StringRef Name = GVar.getName();
+ // Skip string literals as their mangled names don't stay stable across
+ // binary releases.
+ // TODO: Track string content hash in the profiles and compute it inside the
+ // compiler to categeorize the hotness string literals.
+ if (Name.starts_with(".str")) {
+
+ LLVM_DEBUG(dbgs() << "Skip annotating string literal " << Name << "\n");
+ continue;
+ }
+
+ // DataAccessProfRecord's get* methods will canonicalize the name under the
+ // hood before looking it up, so optimizer doesn't need to do it.
+ std::optional<DataAccessProfRecord> Record =
+ DataAccessProf->getProfileRecord(Name);
+ // Annotate a global variable as hot if it has non-zero sampled count, and
+ // annotate it as cold if it's seen in the profiled binary
+ // file but doesn't have any access sample.
+ // For logging, optimization remark emitter requires a llvm::Function, but
+ // it's not well defined how to associate a global variable with a function.
+ // So we just print out the static data section prefix in LLVM_DEBUG.
+ if (Record && Record->AccessCount > 0) {
+ ++NumOfMemProfHotGlobalVars;
+ GVar.setSectionPrefix("hot");
+ Changed = true;
+ LLVM_DEBUG(dbgs() << "Global variable " << Name
+ << " is annotated as hot\n");
+ } else if (DataAccessProf->isKnownColdSymbol(Name)) {
+ ++NumOfMemProfColdGlobalVars;
+ GVar.setSectionPrefix("unlikely");
+ Changed = true;
+ LLVM_DEBUG(dbgs() << "Global variable " << Name
+ << " is annotated as unlikely\n");
+ } else {
+ ++NumOfMemProfUnknownGlobalVars;
+ LLVM_DEBUG(dbgs() << "Global variable " << Name << " is not annotated\n");
+ }
+ }
+
+ return Changed;
+}
diff --git a/llvm/test/Transforms/PGOProfile/data-access-profile.ll b/llvm/test/Transforms/PGOProfile/data-access-profile.ll
new file mode 100644
index 0000000000000..29198f34ccbba
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/data-access-profile.ll
@@ -0,0 +1,112 @@
+; REQUIRES: asserts
+; asserts are required for -debug-only=<pass-name>
+
+; RUN: rm -rf %t && split-file %s %t && cd %t
+
+;; Read a text profile and merge it into indexed profile.
+; RUN: llvm-profdata merge --memprof-version=4 memprof.yaml -o memprof.profdata
+
+;; Run optimizer pass on an IR module without IR functions, and test that global
+;; variables in the module could be annotated (i.e., no early return),
+; RUN: opt -passes='memprof-use<profile-filename=memprof.profdata>' -memprof-annotate-static-data-prefix \
+; RUN: -debug-only=memprof -stats -S funcless-module.ll -o - 2>&1 | FileCheck %s --check-prefixes=LOG,PREFIX,STAT
+
+;; Run optimizer pass on the IR, and check the section prefix.
+; RUN: opt -passes='memprof-use<profile-filename=memprof.profdata>' -memprof-annotate-static-data-prefix \
+; RUN: -debug-only=memprof -stats -S input.ll -o - 2>&1 | FileCheck %s --check-prefixes=LOG,PREFIX,STAT
+
+;; Run optimizer pass without explicitly setting -memprof-annotate-static-data-prefix.
+;; The output text IR shouldn't have `section_prefix`
+; RUN: opt -passes='memprof-use<profile-filename=memprof.profdata>' \
+; RUN: -debug-only=memprof -stats -S input.ll -o - | FileCheck %s --implicit-check-not="section_prefix"
+
+; LOG: Skip annotating string literal .str
+; LOG: Global variable var1 is annotated as hot
+; LOG: Global variable var2.llvm.125 is annotated as hot
+; LOG: Global variable bar is not annotated
+; LOG: Global variable foo is annotated as unlikely
+; LOG: Global variable var3 has explicit section name. Skip annotating.
+; LOG: Global variable var4 has explicit section name. Skip annotating.
+
+;; String literals are not annotated.
+; PREFIX: @.str = unnamed_addr constant [5 x i8] c"abcde"
+; PREFIX-NOT: section_prefix
+; PREFIX: @var1 = global i32 123, !section_prefix !0
+
+;; @var.llvm.125 will be canonicalized to @var2 for profile look-up.
+; PREFIX-NEXT: @var2.llvm.125 = global i64 0, !section_prefix !0
+
+;; @bar is not seen in hot symbol or known symbol set, so it won't get a section
+;; prefix. Test this by testing that there is no section_prefix between @bar and
+;; @foo.
+; PREFIX-NEXT: @bar = global i16 3
+; PREFIX-NOT: !section_prefix
+
+;; @foo is unlikely.
+; PREFIX-NEXT: @foo = global i8 2, !section_prefix !1
+
+; PREFIX-NEXT: @var3 = constant [2 x i32] [i32 12345, i32 6789], section "sec1"
+; PREFIX-NEXT: @var4 = constant [1 x i64] [i64 98765] #0
+
+; PREFIX: attributes #0 = { "rodata-section"="sec2" }
+
+; PREFIX: !0 = !{!"section_prefix", !"hot"}
+; PREFIX-NEXT: !1 = !{!"section_prefix", !"unlikely"}
+
+; STAT: 1 memprof - Number of global vars annotated with 'unlikely' section prefix.
+; STAT: 2 memprof - Number of global vars with user-specified section (not annotated).
+; STAT: 2 memprof - Number of global vars annotated with 'hot' section prefix.
+; STAT: 1 memprof - Number of global vars with unknown hotness (no section prefix).
+
+;--- memprof.yaml
+---
+DataAccessProfiles:
+ SampledRecords:
+ - Symbol: var1
+ AccessCount: 1000
+ - Symbol: var2
+ AccessCount: 5
+ - Hash: 101010
+ AccessCount: 145
+ KnownColdSymbols:
+ - foo
+ KnownColdStrHashes: [ 999, 1001 ]
+...
+;--- input.ll
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at .str = unnamed_addr constant [5 x i8] c"abcde"
+ at var1 = global i32 123
+ at var2.llvm.125 = global i64 0
+ at bar = global i16 3
+ at foo = global i8 2
+ at var3 = constant [2 x i32][i32 12345, i32 6789], section "sec1"
+ at var4 = constant [1 x i64][i64 98765] #0
+
+define i32 @func() {
+ %a = load i32, ptr @var1
+ %b = load i32, ptr @var2.llvm.125
+ %ret = call i32 (...) @func_taking_arbitrary_param(i32 %a, i32 %b)
+ ret i32 %ret
+}
+
+declare i32 @func_taking_arbitrary_param(...)
+
+attributes #0 = { "rodata-section"="sec2" }
+
+;--- funcless-module.ll
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at .str = unnamed_addr constant [5 x i8] c"abcde"
+ at var1 = global i32 123
+ at var2.llvm.125 = global i64 0
+ at bar = global i16 3
+ at foo = global i8 2
+ at var3 = constant [2 x i32][i32 12345, i32 6789], section "sec1"
+ at var4 = constant [1 x i64][i64 98765] #0
+
+attributes #0 = { "rodata-section"="sec2" }
More information about the llvm-commits
mailing list