[llvm] [MemProf] Add option to emit full call context for matched allocations (PR #170516)

Teresa Johnson via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 11 09:19:38 PST 2025


https://github.com/teresajohnson updated https://github.com/llvm/llvm-project/pull/170516

>From 62e802a4be1b1bb38e2194a2fec639e506489c50 Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Wed, 3 Dec 2025 09:06:43 -0800
Subject: [PATCH] [MemProf] Add option to emit full call context for matched
 allocations

Add the -memprof-print-matched-alloc-stack option to enable emitting the
full allocation call context (of stack ids) for each matched allocation
reported by -memprof-print-match-info. Noop when the latter is not
enabled.
---
 .../Transforms/Instrumentation/MemProfUse.cpp | 92 +++++++++++++------
 .../memprof-dump-matched-alloc-site.ll        | 30 +++---
 2 files changed, 83 insertions(+), 39 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
index 25953f43e1aa9..ec6c53b6a95ee 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
@@ -62,6 +62,12 @@ static cl::opt<bool>
                                      "context in this module's profiles"),
                             cl::Hidden, cl::init(false));
 
+static cl::opt<bool> PrintMatchedAllocStack(
+    "memprof-print-matched-alloc-stack",
+    cl::desc("Print full stack context for matched "
+             "allocations with -memprof-print-match-info."),
+    cl::Hidden, cl::init(false));
+
 static cl::opt<bool>
     PrintFunctionGuids("memprof-print-function-guids",
                        cl::desc("Print function GUIDs computed for matching"),
@@ -227,9 +233,26 @@ static void HandleUnsupportedAnnotationKinds(GlobalVariable &GVar,
                     << Reason << ".\n");
 }
 
+// Structure for tracking info about matched allocation contexts for use with
+// -memprof-print-match-info and -memprof-print-matched-alloc-stack.
 struct AllocMatchInfo {
+  // Total size in bytes of matched context.
   uint64_t TotalSize = 0;
+  // Matched allocation's type.
   AllocationType AllocType = AllocationType::None;
+  // Number of frames matched to the allocation itself (values will be >1 in
+  // cases where allocation was already inlined). Use a set because there can
+  // be multiple inlined instances and each may have a different inline depth.
+  // Use std::set to iterate in sorted order when printing.
+  std::set<unsigned> MatchedFramesSet;
+  // The full call stack of the allocation, for cases where requested via
+  // -memprof-print-matched-alloc-stack.
+  std::vector<Frame> CallStack;
+
+  // Caller responsible for inserting the matched frames and the call stack when
+  // appropriate.
+  AllocMatchInfo(uint64_t TotalSize, AllocationType AllocType)
+      : TotalSize(TotalSize), AllocType(AllocType) {}
 };
 
 DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>>
@@ -407,13 +430,11 @@ static void addVPMetadata(Module &M, Instruction &I,
   }
 }
 
-static void
-handleAllocSite(Instruction &I, CallBase *CI,
-                ArrayRef<uint64_t> InlinedCallStack, LLVMContext &Ctx,
-                OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize,
-                const std::set<const AllocationInfo *> &AllocInfoSet,
-                std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>
-                    &FullStackIdToAllocMatchInfo) {
+static void handleAllocSite(
+    Instruction &I, CallBase *CI, ArrayRef<uint64_t> InlinedCallStack,
+    LLVMContext &Ctx, OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize,
+    const std::set<const AllocationInfo *> &AllocInfoSet,
+    std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo) {
   // TODO: Remove this once the profile creation logic deduplicates contexts
   // that are the same other than the IsInlineFrame bool. Until then, keep the
   // largest.
@@ -455,9 +476,15 @@ handleAllocSite(Instruction &I, CallBase *CI,
       // was requested.
       if (ClPrintMemProfMatchInfo) {
         assert(FullStackId != 0);
-        FullStackIdToAllocMatchInfo[std::make_pair(FullStackId,
-                                                   InlinedCallStack.size())] = {
-            AllocInfo->Info.getTotalSize(), AllocType};
+        auto [Iter, Inserted] = FullStackIdToAllocMatchInfo.try_emplace(
+            FullStackId,
+            AllocMatchInfo(AllocInfo->Info.getTotalSize(), AllocType));
+        // Always insert the new matched frame count, since it may differ.
+        Iter->second.MatchedFramesSet.insert(InlinedCallStack.size());
+        if (Inserted && PrintMatchedAllocStack)
+          Iter->second.CallStack.insert(Iter->second.CallStack.begin(),
+                                        AllocInfo->CallStack.begin(),
+                                        AllocInfo->CallStack.end());
       }
       ORE.emit(
           OptimizationRemark(DEBUG_TYPE, "MemProfUse", CI)
@@ -564,14 +591,13 @@ static void handleCallSite(Instruction &I, const Function *CalledFunction,
   addVPMetadata(M, I, CalleeGuids.getArrayRef());
 }
 
-static void readMemprof(Module &M, Function &F,
-                        IndexedInstrProfReader *MemProfReader,
-                        const TargetLibraryInfo &TLI,
-                        std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>
-                            &FullStackIdToAllocMatchInfo,
-                        std::set<std::vector<uint64_t>> &MatchedCallSites,
-                        DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
-                        OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize) {
+static void
+readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
+            const TargetLibraryInfo &TLI,
+            std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo,
+            std::set<std::vector<uint64_t>> &MatchedCallSites,
+            DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
+            OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize) {
   auto &Ctx = M.getContext();
   // Previously we used getIRPGOFuncName() here. If F is local linkage,
   // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
@@ -799,11 +825,11 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
   if (SalvageStaleProfile)
     UndriftMaps = computeUndriftMap(M, MemProfReader.get(), TLI);
 
-  // Map from the stack hash and matched frame count of each allocation context
-  // in the function profiles to the total profiled size (bytes) and allocation
-  // type.
-  std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>
-      FullStackIdToAllocMatchInfo;
+  // Map from the stack hash of each matched allocation context in the function
+  // profiles to match info such as the total profiled size (bytes), allocation
+  // type, number of frames matched to the allocation itself, and the full array
+  // of call stack ids.
+  std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo;
 
   // Set of the matched call sites, each expressed as a sequence of an inline
   // call stack.
@@ -824,11 +850,21 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
   }
 
   if (ClPrintMemProfMatchInfo) {
-    for (const auto &[IdLengthPair, Info] : FullStackIdToAllocMatchInfo) {
-      auto [Id, Length] = IdLengthPair;
-      errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
-             << " context with id " << Id << " has total profiled size "
-             << Info.TotalSize << " is matched with " << Length << " frames\n";
+    for (const auto &[Id, Info] : FullStackIdToAllocMatchInfo) {
+      for (auto Frames : Info.MatchedFramesSet) {
+        // TODO: To reduce verbosity, should we change the existing message
+        // so that we emit a list of matched frame counts in a single message
+        // about the context (instead of one message per frame count?
+        errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
+               << " context with id " << Id << " has total profiled size "
+               << Info.TotalSize << " is matched with " << Frames << " frames";
+        if (PrintMatchedAllocStack) {
+          errs() << " and call stack";
+          for (auto &F : Info.CallStack)
+            errs() << " " << computeStackId(F);
+        }
+        errs() << "\n";
+      }
     }
 
     for (const auto &CallStack : MatchedCallSites) {
diff --git a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll
index f628c5a8a3251..4d523ff85b503 100644
--- a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll
+++ b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll
@@ -26,7 +26,13 @@
 ; REQUIRES: x86_64-linux
 ; RUN: split-file %s %t
 ; RUN: llvm-profdata merge %t/memprof-dump-matched-alloc-site.yaml -o %t/memprof-dump-matched-alloc-site.memprofdata
-; RUN: opt < %t/memprof-dump-matched-alloc-site.ll -passes='memprof-use<profile-filename=%t/memprof-dump-matched-alloc-site.memprofdata>' -memprof-print-match-info -memprof-print-function-guids -S -pass-remarks=memprof 2>&1 | FileCheck %s
+; RUN: opt < %t/memprof-dump-matched-alloc-site.ll -passes='memprof-use<profile-filename=%t/memprof-dump-matched-alloc-site.memprofdata>' -memprof-print-match-info -memprof-print-function-guids -S -pass-remarks=memprof 2>&1 | FileCheck %s --check-prefixes=MATCH,FUNCGUID,REMARK
+;; Test that -memprof-print-matched-alloc-stack enables reporting of the full
+;; matched stack.
+; RUN: opt < %t/memprof-dump-matched-alloc-site.ll -passes='memprof-use<profile-filename=%t/memprof-dump-matched-alloc-site.memprofdata>' -memprof-print-match-info -memprof-print-matched-alloc-stack -S 2>&1 | FileCheck %s --check-prefixes=MATCH,STACK
+;; Test that -memprof-print-matched-alloc-stack without -memprof-print-match-info
+;; is a noop.
+; RUN: opt < %t/memprof-dump-matched-alloc-site.ll -passes='memprof-use<profile-filename=%t/memprof-dump-matched-alloc-site.memprofdata>' -memprof-print-matched-alloc-stack -S 2>&1 | FileCheck %s --implicit-check-not="context with id" --implicit-check-not="and call stack"
 
 ;--- memprof-dump-matched-alloc-site.yaml
 ---
@@ -79,17 +85,19 @@ HeapProfileRecords:
 ;--- memprof-dump-matched-alloc-site.ll
 
 ;; From -pass-remarks=memprof and -memprof-print-function-guids
-; CHECK: MemProf: Function GUID 4708092051066754107 is _Z2f1v
-; CHECK: remark: memprof-dump-matched-alloc-site.cc:1:21: call in function _Z2f1v matched alloc context with alloc type notcold total size 3 full context id 5736731103568718490 frame count 1
-; CHECK: MemProf: Function GUID 14255129117669598641 is _Z2f2v
-; CHECK: remark: memprof-dump-matched-alloc-site.cc:1:21: call in function _Z2f2v matched alloc context with alloc type notcold total size 3 full context id 5736731103568718490 frame count 2
-; CHECK: MemProf: Function GUID 2771528421763978342 is _Z2f3v
-; CHECK: remark: memprof-dump-matched-alloc-site.cc:1:21: call in function _Z2f3v matched alloc context with alloc type notcold total size 3 full context id 5736731103568718490 frame count 3
+; FUNCGUID: MemProf: Function GUID 4708092051066754107 is _Z2f1v
+; REMARK: remark: memprof-dump-matched-alloc-site.cc:1:21: call in function _Z2f1v matched alloc context with alloc type notcold total size 3 full context id 5736731103568718490 frame count 1
+; FUNCGUID: MemProf: Function GUID 14255129117669598641 is _Z2f2v
+; REMARK: remark: memprof-dump-matched-alloc-site.cc:1:21: call in function _Z2f2v matched alloc context with alloc type notcold total size 3 full context id 5736731103568718490 frame count 2
+; FUNCGUID: MemProf: Function GUID 2771528421763978342 is _Z2f3v
+; REMARK: remark: memprof-dump-matched-alloc-site.cc:1:21: call in function _Z2f3v matched alloc context with alloc type notcold total size 3 full context id 5736731103568718490 frame count 3
 
-;; From -memprof-print-match-info
-; CHECK: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 1 frames
-; CHECK: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 2 frames
-; CHECK: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 3 frames
+; MATCH: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 1 frames
+; STACK-SAME: and call stack 16675831946704128299 1244320836757332728 8373967866436022208 5401059281181789382
+; MATCH: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 2 frames
+; STACK-SAME: and call stack 16675831946704128299 1244320836757332728 8373967866436022208 5401059281181789382
+; MATCH: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 3 frames
+; STACK-SAME: and call stack 16675831946704128299 1244320836757332728 8373967866436022208 5401059281181789382
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"



More information about the llvm-commits mailing list