[llvm] [MemProf] Add option to emit full call context for matched allocations (PR #170516)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 3 09:35:23 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-pgo
Author: Teresa Johnson (teresajohnson)
<details>
<summary>Changes</summary>
Add the -memprof-print-matched-alloc-stack option to enable emitting the
full allocation call context (of stack ids) for each matched allocation
reported by -memprof-print-match-info. Noop when the latter is not
enabled.
---
Full diff: https://github.com/llvm/llvm-project/pull/170516.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Instrumentation/MemProfUse.cpp (+64-28)
- (modified) llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll (+13-4)
``````````diff
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
index b72d41a748857..bda1d4555af2d 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
@@ -62,6 +62,12 @@ static cl::opt<bool>
"context in this module's profiles"),
cl::Hidden, cl::init(false));
+static cl::opt<bool> PrintMatchedAllocStack(
+ "memprof-print-matched-alloc-stack",
+ cl::desc("Print full stack context for matched "
+ "allocations with -memprof-print-match-info."),
+ cl::Hidden, cl::init(false));
+
static cl::opt<bool>
SalvageStaleProfile("memprof-salvage-stale-profile",
cl::desc("Salvage stale MemProf profile"),
@@ -222,9 +228,26 @@ static void HandleUnsupportedAnnotationKinds(GlobalVariable &GVar,
<< Reason << ".\n");
}
+// Structure for tracking info about matched allocation contexts for use with
+// -memprof-print-match-info and -memprof-print-matched-alloc-stack.
struct AllocMatchInfo {
+ // Total size in bytes of matched context.
uint64_t TotalSize = 0;
+ // Matched allocation's type.
AllocationType AllocType = AllocationType::None;
+ // Number of frames matched to the allocation itself (values will be >1 in
+ // cases where allocation was already inlined). Use a set because there can
+ // be multiple inlined instances and each may have a different inline depth.
+ // Use std::set to iterate in sorted order when printing.
+ std::set<unsigned> MatchedFramesSet;
+ // The full call stack of the allocation, for cases where requested via
+ // -memprof-print-matched-alloc-stack.
+ std::vector<Frame> CallStack;
+
+ // Caller responsible for inserting the matched frames and the call stack when
+ // appropriate.
+ AllocMatchInfo(uint64_t TotalSize, AllocationType AllocType)
+ : TotalSize(TotalSize), AllocType(AllocType) {}
};
DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>>
@@ -402,13 +425,11 @@ static void addVPMetadata(Module &M, Instruction &I,
}
}
-static void
-handleAllocSite(Instruction &I, CallBase *CI,
- ArrayRef<uint64_t> InlinedCallStack, LLVMContext &Ctx,
- OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize,
- const std::set<const AllocationInfo *> &AllocInfoSet,
- std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>
- &FullStackIdToAllocMatchInfo) {
+static void handleAllocSite(
+ Instruction &I, CallBase *CI, ArrayRef<uint64_t> InlinedCallStack,
+ LLVMContext &Ctx, OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize,
+ const std::set<const AllocationInfo *> &AllocInfoSet,
+ std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo) {
// TODO: Remove this once the profile creation logic deduplicates contexts
// that are the same other than the IsInlineFrame bool. Until then, keep the
// largest.
@@ -450,9 +471,15 @@ handleAllocSite(Instruction &I, CallBase *CI,
// was requested.
if (ClPrintMemProfMatchInfo) {
assert(FullStackId != 0);
- FullStackIdToAllocMatchInfo[std::make_pair(FullStackId,
- InlinedCallStack.size())] = {
- AllocInfo->Info.getTotalSize(), AllocType};
+ auto [Iter, Inserted] = FullStackIdToAllocMatchInfo.try_emplace(
+ FullStackId,
+ AllocMatchInfo(AllocInfo->Info.getTotalSize(), AllocType));
+ // Always insert the new matched frame count, since it may differ.
+ Iter->second.MatchedFramesSet.insert(InlinedCallStack.size());
+ if (Inserted && PrintMatchedAllocStack)
+ Iter->second.CallStack.insert(Iter->second.CallStack.begin(),
+ AllocInfo->CallStack.begin(),
+ AllocInfo->CallStack.end());
}
}
}
@@ -544,14 +571,13 @@ static void handleCallSite(
}
}
-static void readMemprof(Module &M, Function &F,
- IndexedInstrProfReader *MemProfReader,
- const TargetLibraryInfo &TLI,
- std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>
- &FullStackIdToAllocMatchInfo,
- std::set<std::vector<uint64_t>> &MatchedCallSites,
- DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
- OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize) {
+static void
+readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
+ const TargetLibraryInfo &TLI,
+ std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo,
+ std::set<std::vector<uint64_t>> &MatchedCallSites,
+ DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
+ OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize) {
auto &Ctx = M.getContext();
// Previously we used getIRPGOFuncName() here. If F is local linkage,
// getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
@@ -777,11 +803,11 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
if (SalvageStaleProfile)
UndriftMaps = computeUndriftMap(M, MemProfReader.get(), TLI);
- // Map from the stack hash and matched frame count of each allocation context
- // in the function profiles to the total profiled size (bytes) and allocation
- // type.
- std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>
- FullStackIdToAllocMatchInfo;
+ // Map from the stack hash of each matched allocation context in the function
+ // profiles to match info such as the total profiled size (bytes), allocation
+ // type, number of frames matched to the allocation itself, and the full array
+ // of call stack ids.
+ std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo;
// Set of the matched call sites, each expressed as a sequence of an inline
// call stack.
@@ -802,11 +828,21 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
}
if (ClPrintMemProfMatchInfo) {
- for (const auto &[IdLengthPair, Info] : FullStackIdToAllocMatchInfo) {
- auto [Id, Length] = IdLengthPair;
- errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
- << " context with id " << Id << " has total profiled size "
- << Info.TotalSize << " is matched with " << Length << " frames\n";
+ for (const auto &[Id, Info] : FullStackIdToAllocMatchInfo) {
+ for (auto Frames : Info.MatchedFramesSet) {
+ // TODO: To reduce verbosity, should we change the existing message
+ // so that we emit a list of matched frame counts in a single message
+ // about the context (instead of one message per frame count?
+ errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
+ << " context with id " << Id << " has total profiled size "
+ << Info.TotalSize << " is matched with " << Frames << " frames";
+ if (PrintMatchedAllocStack) {
+ errs() << " and call stack";
+ for (auto &F : Info.CallStack)
+ errs() << " " << computeStackId(F);
+ }
+ errs() << "\n";
+ }
}
for (const auto &CallStack : MatchedCallSites) {
diff --git a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll
index 2dcaa9d492869..2eec875d16488 100644
--- a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll
+++ b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll
@@ -26,7 +26,13 @@
; REQUIRES: x86_64-linux
; RUN: split-file %s %t
; RUN: llvm-profdata merge %t/memprof-dump-matched-alloc-site.yaml -o %t/memprof-dump-matched-alloc-site.memprofdata
-; RUN: opt < %t/memprof-dump-matched-alloc-site.ll -passes='memprof-use<profile-filename=%t/memprof-dump-matched-alloc-site.memprofdata>' -memprof-print-match-info -S 2>&1 | FileCheck %s
+; RUN: opt < %t/memprof-dump-matched-alloc-site.ll -passes='memprof-use<profile-filename=%t/memprof-dump-matched-alloc-site.memprofdata>' -memprof-print-match-info -S 2>&1 | FileCheck %s --check-prefix=MATCH
+;; Test that -memprof-print-matched-alloc-stack enables reporting of the full
+;; matched stack.
+; RUN: opt < %t/memprof-dump-matched-alloc-site.ll -passes='memprof-use<profile-filename=%t/memprof-dump-matched-alloc-site.memprofdata>' -memprof-print-match-info -memprof-print-matched-alloc-stack -S 2>&1 | FileCheck %s --check-prefixes=MATCH,STACK
+;; Test that -memprof-print-matched-alloc-stack without -memprof-print-match-info
+;; is a noop.
+; RUN: opt < %t/memprof-dump-matched-alloc-site.ll -passes='memprof-use<profile-filename=%t/memprof-dump-matched-alloc-site.memprofdata>' -memprof-print-matched-alloc-stack -S 2>&1 | FileCheck %s --implicit-check-not="context with id" --implicit-check-not="and call stack"
;--- memprof-dump-matched-alloc-site.yaml
---
@@ -77,9 +83,12 @@ HeapProfileRecords:
# Kept empty here because this section is irrelevant for this test.
...
;--- memprof-dump-matched-alloc-site.ll
-; CHECK: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 1 frames
-; CHECK: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 2 frames
-; CHECK: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 3 frames
+; MATCH: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 1 frames
+; STACK-SAME: and call stack 16675831946704128299 1244320836757332728 8373967866436022208 5401059281181789382
+; MATCH: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 2 frames
+; STACK-SAME: and call stack 16675831946704128299 1244320836757332728 8373967866436022208 5401059281181789382
+; MATCH: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 3 frames
+; STACK-SAME: and call stack 16675831946704128299 1244320836757332728 8373967866436022208 5401059281181789382
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
``````````
</details>
https://github.com/llvm/llvm-project/pull/170516
More information about the llvm-commits
mailing list