[llvm] [MemProf] Extend MemProfUse pass to make use of data access profiles to partition data (PR #151238)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 25 08:33:16 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
@llvm/pr-subscribers-pgo
Author: Mingming Liu (mingmingl-llvm)
<details>
<summary>Changes</summary>
https://github.com/llvm/llvm-project/commit/f3f28323adbb9d01372d81b4c78ed94683e58757 introduces the data access profile format as a payload inside [memprof](https://llvm.org/docs/InstrProfileFormat.html#memprof-profile-data), and the MemProfUse pass reads the memprof payload.
This change extends the MemProfUse pass to read the data access profiles to annotate global variables' section prefix.
1. If there are samples for a global variable, it's annotated as hot.
2. If a global variable is seen in the profiled binary file but doesn't have access samples, it's annotated as unlikely.
---
Full diff: https://github.com/llvm/llvm-project/pull/151238.diff
4 Files Affected:
- (modified) llvm/include/llvm/ProfileData/InstrProfReader.h (+11)
- (modified) llvm/include/llvm/Transforms/Instrumentation/MemProfUse.h (+5)
- (modified) llvm/lib/Transforms/Instrumentation/MemProfUse.cpp (+78-2)
- (added) llvm/test/Transforms/PGOProfile/data-access-profile.ll (+87)
``````````diff
diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index deb5cd17d8fd9..bccbc2006b898 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -729,6 +729,11 @@ class IndexedMemProfReader {
LLVM_ABI DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>>
getMemProfCallerCalleePairs() const;
+ // Returns non-owned pointer to data access profile data.
+ memprof::DataAccessProfData *getDataAccessProfileData() const {
+ return DataAccessProfileData.get();
+ }
+
// Return the entire MemProf profile.
LLVM_ABI memprof::AllMemProfData getAllMemProfData() const;
@@ -900,6 +905,12 @@ class LLVM_ABI IndexedInstrProfReader : public InstrProfReader {
return MemProfReader.getSummary();
}
+ /// Returns non-owned pointer to the data access profile data.
+ /// Will be null if unavailable (version < 4).
+ memprof::DataAccessProfData *getDataAccessProfileData() const {
+ return MemProfReader.getDataAccessProfileData();
+ }
+
Error readBinaryIds(std::vector<llvm::object::BuildID> &BinaryIds) override;
Error printBinaryIds(raw_ostream &OS) override;
};
diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemProfUse.h b/llvm/include/llvm/Transforms/Instrumentation/MemProfUse.h
index 6170bf48e4695..c11333bf8ce5b 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/MemProfUse.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/MemProfUse.h
@@ -14,6 +14,7 @@
#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/ProfileData/DataAccessProf.h"
#include "llvm/ProfileData/MemProf.h"
#include "llvm/Support/Compiler.h"
@@ -36,6 +37,10 @@ class MemProfUsePass : public PassInfoMixin<MemProfUsePass> {
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
private:
+ // Annotate global variables' section prefix based on data access profile.
+ bool
+ annotateGlobalVariables(Module &M,
+ const memprof::DataAccessProfData *DataAccessProf);
std::string MemoryProfileFileName;
IntrusiveRefCntPtr<vfs::FileSystem> FS;
};
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
index a9a0731f16d90..3c1e41f8eb374 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/ProfileData/DataAccessProf.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/ProfileData/MemProfCommon.h"
@@ -75,6 +76,17 @@ static cl::opt<unsigned> MinMatchedColdBytePercent(
"memprof-matching-cold-threshold", cl::init(100), cl::Hidden,
cl::desc("Min percent of cold bytes matched to hint allocation cold"));
+static cl::opt<bool> AnnotationStaticDataPrefix(
+ "annotate-static-data-prefix", cl::init(false), cl::Hidden,
+ cl::desc("If true, annotate the static data section prefix"));
+
+static cl::opt<bool>
+ PrintStaticDataPrefix("print-static-data-prefix", cl::init(false),
+ cl::Hidden,
+ cl::desc("If true, print the static data section "
+ "prefix in errs(). This option is "
+ "meant for debugging."));
+
// Matching statistics
STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
STATISTIC(NumOfMemProfMismatch,
@@ -674,8 +686,9 @@ MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile,
}
PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
- // Return immediately if the module doesn't contain any function.
- if (M.empty())
+ // Return immediately if the module doesn't contain any function or global
+ // variables.
+ if (M.empty() && M.globals().empty())
return PreservedAnalyses::all();
LLVM_DEBUG(dbgs() << "Read in memory profile:");
@@ -703,6 +716,14 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
return PreservedAnalyses::all();
}
+ const bool Changed =
+ annotateGlobalVariables(M, MemProfReader->getDataAccessProfileData());
+
+ // If the module doesn't contain any function, return after we process all
+ // global variables.
+ if (M.empty())
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+
auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(*M.begin());
@@ -752,3 +773,58 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
return PreservedAnalyses::none();
}
+
+bool MemProfUsePass::annotateGlobalVariables(
+ Module &M, const memprof::DataAccessProfData *DataAccessProf) {
+ if (!AnnotationStaticDataPrefix || M.globals().empty() || !DataAccessProf)
+ return false;
+
+ bool Changed = false;
+ for (GlobalVariable &GVar : M.globals()) {
+ assert(!GVar.getSectionPrefix().has_value() &&
+ "GVar shouldn't have section prefix yet");
+ if (GVar.isDeclarationForLinker())
+ continue;
+
+ StringRef Name = GVar.getName();
+ // Skip string literals whose mangled names doesn't stay stable across
+ // binary releases.
+ // TODO: Track string content hash in the profiles and compute it inside the
+ // compiler to categeorize the hotness string literals.
+ if (Name.starts_with(".str"))
+ continue;
+
+ // DataAccessProfRecord's look-up methods will canonicalize the variable
+ // name before looking up methods, so optimizer doesn't need to do it.
+ std::optional<DataAccessProfRecord> Record =
+ DataAccessProf->getProfileRecord(Name);
+ // Annotate a global variable as hot if it has non-zero sampled count, and
+ // annotate it as cold if it's seen in the profiled binary
+ // file but doesn't have any access sample.
+ if (Record && Record->AccessCount > 0) {
+ GVar.setSectionPrefix("hot");
+ Changed = true;
+ } else if (DataAccessProf->isKnownColdSymbol(Name)) {
+ GVar.setSectionPrefix("unlikely");
+ Changed = true;
+ }
+ }
+
+ // Optimization remark emitter requires a llvm::Function, but it's not well
+ // defined to associate a global variable with a function. So we just print
+ // out the static data section prefix in errs().
+ if (PrintStaticDataPrefix) {
+ for (GlobalVariable &GVar : M.globals()) {
+ if (GVar.isDeclarationForLinker())
+ continue;
+ StringRef Name = GVar.getName();
+ auto SectionPrefix = GVar.getSectionPrefix();
+ if (SectionPrefix.has_value())
+ errs() << "Global variable " << Name
+ << " has section prefix: " << SectionPrefix.value() << "\n";
+ else
+ errs() << "Global variable " << Name << " has no section prefix\n";
+ }
+ }
+ return Changed;
+}
diff --git a/llvm/test/Transforms/PGOProfile/data-access-profile.ll b/llvm/test/Transforms/PGOProfile/data-access-profile.ll
new file mode 100644
index 0000000000000..91eaa934374b3
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/data-access-profile.ll
@@ -0,0 +1,87 @@
+; RUN: rm -rf %t && split-file %s %t && cd %t
+
+;; Read a text profile and merge it into indexed profile.
+; RUN: llvm-profdata merge --memprof-version=4 memprof.yaml -o memprof.profdata
+
+;; Run optimizer pass on the IR, and check the section prefix.
+; RUN: opt -passes='memprof-use<profile-filename=memprof.profdata>' -annotate-static-data-prefix \
+; RUN: -S input.ll -o - 2>&1 | FileCheck %s
+
+;; Repeat the command line above and enable -print-static-data-prefix. Test both IR and log.
+; RUN: opt -passes='memprof-use<profile-filename=memprof.profdata>' -annotate-static-data-prefix \
+; RUN: -print-static-data-prefix -S input.ll -o - 2>&1 | FileCheck %s --check-prefixes=LOG,CHECK
+
+; LOG: Global variable .str has no section prefix
+; LOG: Global variable var1 has section prefix: hot
+; LOG: Global variable var2.llvm.125 has section prefix: hot
+; LOG: Global variable foo has section prefix: unlikely
+; LOG: Global variable bar has no section prefix
+
+;; String literals are not annotated.
+; CHECK: @.str = unnamed_addr constant [5 x i8] c"abcde"
+; CHECK-NOT: section_prefix
+; CHECK: @var1 = global i32 123, !section_prefix !0
+
+;; @var.llvm.125 will be canonicalized to @var2 for profile look-up.
+; CHECK-NEXT: @var2.llvm.125 = global i64 0, !section_prefix !0
+; CHECK-NEXT: @foo = global i8 2, !section_prefix !1
+
+;; @bar is not seen in hot symbol or known symbol set, so it doesn't get
+;; a section prefix. It's up to the linker to decide how to map input sections
+;; to output, and one conservative practice is to map unlikely-prefixed ones to
+;; unlikely output section, and map the rest (hot-prefixed or prefix-less) to
+;; the canonical output section.
+; CHECK-NEXT: @bar = global i16 3
+
+; CHECK: !0 = !{!"section_prefix", !"hot"}
+; CHECK-NEXT: !1 = !{!"section_prefix", !"unlikely"}
+
+;--- memprof.yaml
+---
+HeapProfileRecords:
+ - GUID: 0xdeadbeef12345678
+ AllocSites:
+ - Callstack:
+ - { Function: 0x1111111111111111, LineOffset: 11, Column: 10, IsInlineFrame: true }
+ - { Function: 0x2222222222222222, LineOffset: 22, Column: 20, IsInlineFrame: false }
+ MemInfoBlock:
+ AllocCount: 111
+ TotalSize: 222
+ TotalLifetime: 333
+ TotalLifetimeAccessDensity: 444
+ CallSites:
+ - Frames:
+ - { Function: 0x5555555555555555, LineOffset: 55, Column: 50, IsInlineFrame: true }
+ - { Function: 0x6666666666666666, LineOffset: 66, Column: 60, IsInlineFrame: false }
+ CalleeGuids: [ 0x100, 0x200 ]
+DataAccessProfiles:
+ SampledRecords:
+ - Symbol: var1
+ AccessCount: 1000
+ - Symbol: var2
+ AccessCount: 5
+ - Hash: 101010
+ AccessCount: 145
+ KnownColdSymbols:
+ - foo
+ KnownColdStrHashes: [ 999, 1001 ]
+...
+;--- input.ll
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at .str = unnamed_addr constant [5 x i8] c"abcde"
+ at var1 = global i32 123
+ at var2.llvm.125 = global i64 0
+ at foo = global i8 2
+ at bar = global i16 3
+
+define i32 @func() {
+ %a = load i32, ptr @var1
+ %b = load i32, ptr @var2.llvm.125
+ %ret = call i32 (...) @func_taking_arbitrary_param(i32 %a, i32 %b)
+ ret i32 %ret
+}
+
+declare i32 @func_taking_arbitrary_param(...)
``````````
</details>
https://github.com/llvm/llvm-project/pull/151238
More information about the llvm-commits
mailing list