[llvm] [MemProf] Add option to emit full call context for matched allocations (PR #170516)
Teresa Johnson via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 11 09:19:38 PST 2025
https://github.com/teresajohnson updated https://github.com/llvm/llvm-project/pull/170516
>From 62e802a4be1b1bb38e2194a2fec639e506489c50 Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Wed, 3 Dec 2025 09:06:43 -0800
Subject: [PATCH] [MemProf] Add option to emit full call context for matched
allocations
Add the -memprof-print-matched-alloc-stack option to enable emitting the
full allocation call context (of stack ids) for each matched allocation
reported by -memprof-print-match-info. Noop when the latter is not
enabled.
---
.../Transforms/Instrumentation/MemProfUse.cpp | 92 +++++++++++++------
.../memprof-dump-matched-alloc-site.ll | 30 +++---
2 files changed, 83 insertions(+), 39 deletions(-)
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
index 25953f43e1aa9..ec6c53b6a95ee 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
@@ -62,6 +62,12 @@ static cl::opt<bool>
"context in this module's profiles"),
cl::Hidden, cl::init(false));
+static cl::opt<bool> PrintMatchedAllocStack(
+ "memprof-print-matched-alloc-stack",
+ cl::desc("Print full stack context for matched "
+ "allocations with -memprof-print-match-info."),
+ cl::Hidden, cl::init(false));
+
static cl::opt<bool>
PrintFunctionGuids("memprof-print-function-guids",
cl::desc("Print function GUIDs computed for matching"),
@@ -227,9 +233,26 @@ static void HandleUnsupportedAnnotationKinds(GlobalVariable &GVar,
<< Reason << ".\n");
}
+// Structure for tracking info about matched allocation contexts for use with
+// -memprof-print-match-info and -memprof-print-matched-alloc-stack.
struct AllocMatchInfo {
+ // Total size in bytes of matched context.
uint64_t TotalSize = 0;
+ // Matched allocation's type.
AllocationType AllocType = AllocationType::None;
+ // Number of frames matched to the allocation itself (values will be >1 in
+ // cases where allocation was already inlined). Use a set because there can
+ // be multiple inlined instances and each may have a different inline depth.
+ // Use std::set to iterate in sorted order when printing.
+ std::set<unsigned> MatchedFramesSet;
+ // The full call stack of the allocation, for cases where requested via
+ // -memprof-print-matched-alloc-stack.
+ std::vector<Frame> CallStack;
+
+ // Caller responsible for inserting the matched frames and the call stack when
+ // appropriate.
+ AllocMatchInfo(uint64_t TotalSize, AllocationType AllocType)
+ : TotalSize(TotalSize), AllocType(AllocType) {}
};
DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>>
@@ -407,13 +430,11 @@ static void addVPMetadata(Module &M, Instruction &I,
}
}
-static void
-handleAllocSite(Instruction &I, CallBase *CI,
- ArrayRef<uint64_t> InlinedCallStack, LLVMContext &Ctx,
- OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize,
- const std::set<const AllocationInfo *> &AllocInfoSet,
- std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>
- &FullStackIdToAllocMatchInfo) {
+static void handleAllocSite(
+ Instruction &I, CallBase *CI, ArrayRef<uint64_t> InlinedCallStack,
+ LLVMContext &Ctx, OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize,
+ const std::set<const AllocationInfo *> &AllocInfoSet,
+ std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo) {
// TODO: Remove this once the profile creation logic deduplicates contexts
// that are the same other than the IsInlineFrame bool. Until then, keep the
// largest.
@@ -455,9 +476,15 @@ handleAllocSite(Instruction &I, CallBase *CI,
// was requested.
if (ClPrintMemProfMatchInfo) {
assert(FullStackId != 0);
- FullStackIdToAllocMatchInfo[std::make_pair(FullStackId,
- InlinedCallStack.size())] = {
- AllocInfo->Info.getTotalSize(), AllocType};
+ auto [Iter, Inserted] = FullStackIdToAllocMatchInfo.try_emplace(
+ FullStackId,
+ AllocMatchInfo(AllocInfo->Info.getTotalSize(), AllocType));
+ // Always insert the new matched frame count, since it may differ.
+ Iter->second.MatchedFramesSet.insert(InlinedCallStack.size());
+ if (Inserted && PrintMatchedAllocStack)
+ Iter->second.CallStack.insert(Iter->second.CallStack.begin(),
+ AllocInfo->CallStack.begin(),
+ AllocInfo->CallStack.end());
}
ORE.emit(
OptimizationRemark(DEBUG_TYPE, "MemProfUse", CI)
@@ -564,14 +591,13 @@ static void handleCallSite(Instruction &I, const Function *CalledFunction,
addVPMetadata(M, I, CalleeGuids.getArrayRef());
}
-static void readMemprof(Module &M, Function &F,
- IndexedInstrProfReader *MemProfReader,
- const TargetLibraryInfo &TLI,
- std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>
- &FullStackIdToAllocMatchInfo,
- std::set<std::vector<uint64_t>> &MatchedCallSites,
- DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
- OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize) {
+static void
+readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
+ const TargetLibraryInfo &TLI,
+ std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo,
+ std::set<std::vector<uint64_t>> &MatchedCallSites,
+ DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
+ OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize) {
auto &Ctx = M.getContext();
// Previously we used getIRPGOFuncName() here. If F is local linkage,
// getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
@@ -799,11 +825,11 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
if (SalvageStaleProfile)
UndriftMaps = computeUndriftMap(M, MemProfReader.get(), TLI);
- // Map from the stack hash and matched frame count of each allocation context
- // in the function profiles to the total profiled size (bytes) and allocation
- // type.
- std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>
- FullStackIdToAllocMatchInfo;
+ // Map from the stack hash of each matched allocation context in the function
+ // profiles to match info such as the total profiled size (bytes), allocation
+ // type, number of frames matched to the allocation itself, and the full array
+ // of call stack ids.
+ std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo;
// Set of the matched call sites, each expressed as a sequence of an inline
// call stack.
@@ -824,11 +850,21 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
}
if (ClPrintMemProfMatchInfo) {
- for (const auto &[IdLengthPair, Info] : FullStackIdToAllocMatchInfo) {
- auto [Id, Length] = IdLengthPair;
- errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
- << " context with id " << Id << " has total profiled size "
- << Info.TotalSize << " is matched with " << Length << " frames\n";
+ for (const auto &[Id, Info] : FullStackIdToAllocMatchInfo) {
+ for (auto Frames : Info.MatchedFramesSet) {
+ // TODO: To reduce verbosity, should we change the existing message
+ // so that we emit a list of matched frame counts in a single message
+ // about the context (instead of one message per frame count?
+ errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
+ << " context with id " << Id << " has total profiled size "
+ << Info.TotalSize << " is matched with " << Frames << " frames";
+ if (PrintMatchedAllocStack) {
+ errs() << " and call stack";
+ for (auto &F : Info.CallStack)
+ errs() << " " << computeStackId(F);
+ }
+ errs() << "\n";
+ }
}
for (const auto &CallStack : MatchedCallSites) {
diff --git a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll
index f628c5a8a3251..4d523ff85b503 100644
--- a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll
+++ b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll
@@ -26,7 +26,13 @@
; REQUIRES: x86_64-linux
; RUN: split-file %s %t
; RUN: llvm-profdata merge %t/memprof-dump-matched-alloc-site.yaml -o %t/memprof-dump-matched-alloc-site.memprofdata
-; RUN: opt < %t/memprof-dump-matched-alloc-site.ll -passes='memprof-use<profile-filename=%t/memprof-dump-matched-alloc-site.memprofdata>' -memprof-print-match-info -memprof-print-function-guids -S -pass-remarks=memprof 2>&1 | FileCheck %s
+; RUN: opt < %t/memprof-dump-matched-alloc-site.ll -passes='memprof-use<profile-filename=%t/memprof-dump-matched-alloc-site.memprofdata>' -memprof-print-match-info -memprof-print-function-guids -S -pass-remarks=memprof 2>&1 | FileCheck %s --check-prefixes=MATCH,FUNCGUID,REMARK
+;; Test that -memprof-print-matched-alloc-stack enables reporting of the full
+;; matched stack.
+; RUN: opt < %t/memprof-dump-matched-alloc-site.ll -passes='memprof-use<profile-filename=%t/memprof-dump-matched-alloc-site.memprofdata>' -memprof-print-match-info -memprof-print-matched-alloc-stack -S 2>&1 | FileCheck %s --check-prefixes=MATCH,STACK
+;; Test that -memprof-print-matched-alloc-stack without -memprof-print-match-info
+;; is a noop.
+; RUN: opt < %t/memprof-dump-matched-alloc-site.ll -passes='memprof-use<profile-filename=%t/memprof-dump-matched-alloc-site.memprofdata>' -memprof-print-matched-alloc-stack -S 2>&1 | FileCheck %s --implicit-check-not="context with id" --implicit-check-not="and call stack"
;--- memprof-dump-matched-alloc-site.yaml
---
@@ -79,17 +85,19 @@ HeapProfileRecords:
;--- memprof-dump-matched-alloc-site.ll
;; From -pass-remarks=memprof and -memprof-print-function-guids
-; CHECK: MemProf: Function GUID 4708092051066754107 is _Z2f1v
-; CHECK: remark: memprof-dump-matched-alloc-site.cc:1:21: call in function _Z2f1v matched alloc context with alloc type notcold total size 3 full context id 5736731103568718490 frame count 1
-; CHECK: MemProf: Function GUID 14255129117669598641 is _Z2f2v
-; CHECK: remark: memprof-dump-matched-alloc-site.cc:1:21: call in function _Z2f2v matched alloc context with alloc type notcold total size 3 full context id 5736731103568718490 frame count 2
-; CHECK: MemProf: Function GUID 2771528421763978342 is _Z2f3v
-; CHECK: remark: memprof-dump-matched-alloc-site.cc:1:21: call in function _Z2f3v matched alloc context with alloc type notcold total size 3 full context id 5736731103568718490 frame count 3
+; FUNCGUID: MemProf: Function GUID 4708092051066754107 is _Z2f1v
+; REMARK: remark: memprof-dump-matched-alloc-site.cc:1:21: call in function _Z2f1v matched alloc context with alloc type notcold total size 3 full context id 5736731103568718490 frame count 1
+; FUNCGUID: MemProf: Function GUID 14255129117669598641 is _Z2f2v
+; REMARK: remark: memprof-dump-matched-alloc-site.cc:1:21: call in function _Z2f2v matched alloc context with alloc type notcold total size 3 full context id 5736731103568718490 frame count 2
+; FUNCGUID: MemProf: Function GUID 2771528421763978342 is _Z2f3v
+; REMARK: remark: memprof-dump-matched-alloc-site.cc:1:21: call in function _Z2f3v matched alloc context with alloc type notcold total size 3 full context id 5736731103568718490 frame count 3
-;; From -memprof-print-match-info
-; CHECK: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 1 frames
-; CHECK: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 2 frames
-; CHECK: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 3 frames
+; MATCH: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 1 frames
+; STACK-SAME: and call stack 16675831946704128299 1244320836757332728 8373967866436022208 5401059281181789382
+; MATCH: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 2 frames
+; STACK-SAME: and call stack 16675831946704128299 1244320836757332728 8373967866436022208 5401059281181789382
+; MATCH: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 3 frames
+; STACK-SAME: and call stack 16675831946704128299 1244320836757332728 8373967866436022208 5401059281181789382
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
More information about the llvm-commits
mailing list