[llvm] [MemProf] Add remarks for matched allocs and calls (PR #170379)

Teresa Johnson via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 5 15:26:33 PST 2025


https://github.com/teresajohnson updated https://github.com/llvm/llvm-project/pull/170379

>From e5918ff1c5875aa8ed71f34f712802070985568f Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Tue, 2 Dec 2025 14:06:57 -0800
Subject: [PATCH 1/2] [MemProf] Add remarks for matched allocs and calls

The new remarks are somewhat equivalent to the -memprof-print-match-info
messages, however, are not deduplicated across the module so will be
generally more verbose.
---
 .../Transforms/Instrumentation/MemProfUse.cpp | 19 +++++++++++++++++--
 .../memprof-dump-matched-alloc-site.ll        |  9 ++++++++-
 .../memprof-dump-matched-call-sites.ll        |  8 +++++++-
 3 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
index b72d41a748857..bc4d331862dc5 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
@@ -454,6 +454,15 @@ handleAllocSite(Instruction &I, CallBase *CI,
                                                    InlinedCallStack.size())] = {
             AllocInfo->Info.getTotalSize(), AllocType};
       }
+      ORE.emit(
+          OptimizationRemark(DEBUG_TYPE, "MemprofContext", CI)
+          << ore::NV("AllocationCall", CI) << " in function "
+          << ore::NV("Caller", CI->getFunction())
+          << " matched alloc context with alloc type "
+          << ore::NV("Attribute", getAllocTypeAttributeString(AllocType))
+          << " total size " << ore::NV("Size", AllocInfo->Info.getTotalSize())
+          << " full context id " << ore::NV("Context", FullStackId)
+          << " frame count " << ore::NV("Frames", InlinedCallStack.size()));
     }
   }
   // If the threshold for the percent of cold bytes is less than 100%,
@@ -516,7 +525,8 @@ static void handleCallSite(
     Instruction &I, const Function *CalledFunction,
     ArrayRef<uint64_t> InlinedCallStack,
     const std::unordered_set<CallSiteEntry, CallSiteEntryHash> &CallSiteEntries,
-    Module &M, std::set<std::vector<uint64_t>> &MatchedCallSites) {
+    Module &M, std::set<std::vector<uint64_t>> &MatchedCallSites,
+    OptimizationRemarkEmitter &ORE) {
   auto &Ctx = M.getContext();
   for (const auto &CallSiteEntry : CallSiteEntries) {
     // If we found and thus matched all frames on the call, create and
@@ -539,6 +549,11 @@ static void handleCallSite(
         append_range(CallStack, InlinedCallStack);
         MatchedCallSites.insert(std::move(CallStack));
       }
+      ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofContext", &I)
+               << ore::NV("CallSite", &I) << " in function "
+               << ore::NV("Caller", I.getFunction())
+               << " matched callsite with frame count "
+               << ore::NV("Frames", InlinedCallStack.size()));
       break;
     }
   }
@@ -719,7 +734,7 @@ static void readMemprof(Module &M, Function &F,
         // instruction's leaf location in the callsites map and not the
         // allocation map.
         handleCallSite(I, CalledFunction, InlinedCallStack,
-                       CallSitesIter->second, M, MatchedCallSites);
+                       CallSitesIter->second, M, MatchedCallSites, ORE);
     }
   }
 }
diff --git a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll
index 2dcaa9d492869..10cafff3490b3 100644
--- a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll
+++ b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll
@@ -26,7 +26,7 @@
 ; REQUIRES: x86_64-linux
 ; RUN: split-file %s %t
 ; RUN: llvm-profdata merge %t/memprof-dump-matched-alloc-site.yaml -o %t/memprof-dump-matched-alloc-site.memprofdata
-; RUN: opt < %t/memprof-dump-matched-alloc-site.ll -passes='memprof-use<profile-filename=%t/memprof-dump-matched-alloc-site.memprofdata>' -memprof-print-match-info -S 2>&1 | FileCheck %s
+; RUN: opt < %t/memprof-dump-matched-alloc-site.ll -passes='memprof-use<profile-filename=%t/memprof-dump-matched-alloc-site.memprofdata>' -memprof-print-match-info -S -pass-remarks=memprof 2>&1 | FileCheck %s
 
 ;--- memprof-dump-matched-alloc-site.yaml
 ---
@@ -77,6 +77,13 @@ HeapProfileRecords:
       # Kept empty here because this section is irrelevant for this test.
 ...
 ;--- memprof-dump-matched-alloc-site.ll
+
+;; From -pass-remarks=memprof
+; CHECK: remark: memprof-dump-matched-alloc-site.cc:1:21: call in function _Z2f1v matched alloc context with alloc type notcold total size 3 full context id 5736731103568718490 frame count 1
+; CHECK: remark: memprof-dump-matched-alloc-site.cc:1:21: call in function _Z2f2v matched alloc context with alloc type notcold total size 3 full context id 5736731103568718490 frame count 2
+; CHECK: remark: memprof-dump-matched-alloc-site.cc:1:21: call in function _Z2f3v matched alloc context with alloc type notcold total size 3 full context id 5736731103568718490 frame count 3
+
+;; From -memprof-print-match-info
 ; CHECK: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 1 frames
 ; CHECK: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 2 frames
 ; CHECK: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 3 frames
diff --git a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-call-sites.ll b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-call-sites.ll
index fa99116b820f9..541ac7b561e8b 100644
--- a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-call-sites.ll
+++ b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-call-sites.ll
@@ -34,7 +34,7 @@
 ; REQUIRES: x86_64-linux
 ; RUN: split-file %s %t
 ; RUN: llvm-profdata merge %t/memprof-dump-matched-call-site.yaml -o %t/memprof-dump-matched-call-site.memprofdata
-; RUN: opt < %t/memprof-dump-matched-call-site.ll -passes='memprof-use<profile-filename=%t/memprof-dump-matched-call-site.memprofdata>' -memprof-print-match-info -S 2>&1 | FileCheck %s
+; RUN: opt < %t/memprof-dump-matched-call-site.ll -passes='memprof-use<profile-filename=%t/memprof-dump-matched-call-site.memprofdata>' -memprof-print-match-info -S -pass-remarks=memprof 2>&1 | FileCheck %s
 
 ;--- memprof-dump-matched-call-site.yaml
 ---
@@ -71,6 +71,12 @@ HeapProfileRecords:
     CallSites:       []
 ...
 ;--- memprof-dump-matched-call-site.ll
+
+;; From -pass-remarks=memprof
+; CHECK: remark: match.cc:18:3: call in function main matched callsite with frame count 1
+; CHECK: remark: match.cc:13:28: call in function _ZL2f1v matched callsite with frame count 2
+
+;; From -memprof-print-match-info
 ; CHECK: MemProf notcold context with id 3894143216621363392 has total profiled size 4 is matched with 1 frames
 ; CHECK: MemProf callsite match for inline call stack 4745611964195289084 10616861955219347331
 ; CHECK: MemProf callsite match for inline call stack 5401059281181789382

>From 8a7883d12139a535ce8c88cdd8a1da4907b30927 Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Fri, 5 Dec 2025 15:20:49 -0800
Subject: [PATCH 2/2] Update remark name to pass name

---
 llvm/lib/Transforms/Instrumentation/MemProfUse.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
index bc4d331862dc5..31e69784262da 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
@@ -455,7 +455,7 @@ handleAllocSite(Instruction &I, CallBase *CI,
             AllocInfo->Info.getTotalSize(), AllocType};
       }
       ORE.emit(
-          OptimizationRemark(DEBUG_TYPE, "MemprofContext", CI)
+          OptimizationRemark(DEBUG_TYPE, "MemProfUse", CI)
           << ore::NV("AllocationCall", CI) << " in function "
           << ore::NV("Caller", CI->getFunction())
           << " matched alloc context with alloc type "
@@ -549,7 +549,7 @@ static void handleCallSite(
         append_range(CallStack, InlinedCallStack);
         MatchedCallSites.insert(std::move(CallStack));
       }
-      ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofContext", &I)
+      ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemProfUse", &I)
                << ore::NV("CallSite", &I) << " in function "
                << ore::NV("Caller", I.getFunction())
                << " matched callsite with frame count "



More information about the llvm-commits mailing list