[llvm] [memprof] Dump the number of matched frames (PR #137082)

Kazu Hirata via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 23 19:20:33 PDT 2025


https://github.com/kazutakahirata updated https://github.com/llvm/llvm-project/pull/137082

>From a4752193dc70964bab0294aa5130794280a03f68 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Mon, 21 Apr 2025 15:02:29 -0700
Subject: [PATCH 1/2] [memprof] Dump the number of matched frames

This patch teaches readMemprof to dump the number of frames for each
allocation site match.  This information helps us analyze what part of
the call stack in the MemProf profile has matched the IR.

Aside from updating existing test cases, this patch adds one more test
case, memprof-dump-matched-alloc-site.ll, because none of the existing
test cases has the number of frames greater than one.
---
 .../Instrumentation/MemProfiler.cpp           |  6 +-
 .../memprof-dump-matched-alloc-site.ll        | 78 +++++++++++++++++++
 .../memprof-dump-matched-call-sites.ll        |  2 +-
 llvm/test/Transforms/PGOProfile/memprof.ll    | 16 ++--
 4 files changed, 91 insertions(+), 11 deletions(-)
 create mode 100644 llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll

diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 46b524d054493..afcce5e82ba8b 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -820,6 +820,7 @@ struct AllocMatchInfo {
   uint64_t TotalSize = 0;
   AllocationType AllocType = AllocationType::None;
   bool Matched = false;
+  size_t NumFramesMatched = 0;
 };
 
 DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>>
@@ -1152,7 +1153,8 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
             if (ClPrintMemProfMatchInfo) {
               assert(FullStackId != 0);
               FullStackIdToAllocMatchInfo[FullStackId] = {
-                  AllocInfo->Info.getTotalSize(), AllocType, /*Matched=*/true};
+                  AllocInfo->Info.getTotalSize(), AllocType, /*Matched=*/true,
+                  InlinedCallStack.size()};
             }
           }
         }
@@ -1285,7 +1287,7 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
       errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
              << " context with id " << Id << " has total profiled size "
              << Info.TotalSize << (Info.Matched ? " is" : " not")
-             << " matched\n";
+             << " matched with " << Info.NumFramesMatched << " frames\n";
 
     for (const auto &CallStack : MatchedCallSites) {
       errs() << "MemProf callsite match for inline call stack";
diff --git a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll
new file mode 100644
index 0000000000000..7840f68e18d8c
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll
@@ -0,0 +1,78 @@
+; Tests that the compiler dumps an allocation site with multiple inlined frames.
+;
+; The test case is generated from:
+;
+; // main
+; // |
+; // f1 (noinline)
+; // |
+; // f2
+; // |
+; // f3
+; // |
+; // new
+;
+; char *f1() { return new char[3]; }
+; char *f2() { return f1(); }
+; __attribute__((noinline)) char *f3() { return f2(); }
+;
+; int main() {
+;   f3();
+;   return 0;
+; }
+;
+; Here we expect to match the allocation site to encompass 3 frames.
+
+; REQUIRES: x86_64-linux
+; RUN: split-file %s %t
+; RUN: llvm-profdata merge %t/memprof-dump-matched-alloc-site.yaml -o %t/memprof-dump-matched-alloc-site.memprofdata
+; RUN: opt < %t/memprof-dump-matched-alloc-site.ll -passes='memprof-use<profile-filename=%t/memprof-dump-matched-alloc-site.memprofdata>' -memprof-print-match-info -S 2>&1 | FileCheck %s
+
+;--- memprof-dump-matched-alloc-site.yaml
+---
+---
+HeapProfileRecords:
+  - GUID:            _Z2f3v
+    AllocSites:
+      - Callstack:
+          - { Function: _ZL2f1v, LineOffset: 0, Column: 35, IsInlineFrame: true }
+          - { Function: _ZL2f2v, LineOffset: 0, Column: 35, IsInlineFrame: true }
+          - { Function: _Z2f3v, LineOffset: 0, Column: 47, IsInlineFrame: false }
+          - { Function: main, LineOffset: 1, Column: 3, IsInlineFrame: false }
+        MemInfoBlock:
+          AllocCount:      1
+          TotalSize:       3
+          TotalLifetime:   0
+          TotalLifetimeAccessDensity: 0
+    CallSites:
+...
+;--- memprof-dump-matched-alloc-site.ll
+; CHECK: MemProf notcold context with id 12978026349401156968 has total profiled size 3 is matched with 3 frames
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define ptr @_Z2f3v() {
+entry:
+  %call.i.i = call ptr @_Znam(i64 0), !dbg !3
+  ret ptr null
+}
+
+declare ptr @_Znam(i64)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1)
+!1 = !DIFile(filename: "memprof-dump-matched-alloc-site.cc", directory: "/")
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = !DILocation(line: 1, column: 35, scope: !4, inlinedAt: !7)
+!4 = distinct !DISubprogram(name: "f1", linkageName: "_ZL2f1v", scope: !1, file: !1, line: 1, type: !5, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!5 = !DISubroutineType(types: !6)
+!6 = !{}
+!7 = distinct !DILocation(line: 2, column: 35, scope: !8, inlinedAt: !9)
+!8 = distinct !DISubprogram(name: "f2", linkageName: "_ZL2f2v", scope: !1, file: !1, line: 2, type: !5, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!9 = distinct !DILocation(line: 3, column: 47, scope: !10)
+!10 = distinct !DISubprogram(name: "f3", linkageName: "_Z2f3v", scope: !1, file: !1, line: 3, type: !5, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!11 = !DILocation(line: 6, column: 3, scope: !12)
+!12 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 5, type: !5, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
diff --git a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-call-sites.ll b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-call-sites.ll
index 6fe0e5cd497ec..fa99116b820f9 100644
--- a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-call-sites.ll
+++ b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-call-sites.ll
@@ -71,7 +71,7 @@ HeapProfileRecords:
     CallSites:       []
 ...
 ;--- memprof-dump-matched-call-site.ll
-; CHECK: MemProf notcold context with id 3894143216621363392 has total profiled size 4 is matched
+; CHECK: MemProf notcold context with id 3894143216621363392 has total profiled size 4 is matched with 1 frames
 ; CHECK: MemProf callsite match for inline call stack 4745611964195289084 10616861955219347331
 ; CHECK: MemProf callsite match for inline call stack 5401059281181789382
 
diff --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll
index 5a958de5f7f8d..73226df861ea5 100644
--- a/llvm/test/Transforms/PGOProfile/memprof.ll
+++ b/llvm/test/Transforms/PGOProfile/memprof.ll
@@ -93,14 +93,14 @@
 ;; notcold again.
 ; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-min-ave-lifetime-access-density-hot-threshold=0 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL
 
-; MEMPROFMATCHINFO: MemProf notcold context with id 1093248920606587996 has total profiled size 10 is matched
-; MEMPROFMATCHINFO: MemProf notcold context with id 5725971306423925017 has total profiled size 10 is matched
-; MEMPROFMATCHINFO: MemProf notcold context with id 6792096022461663180 has total profiled size 10 is matched
-; MEMPROFMATCHINFO: MemProf cold context with id 8525406123785421946 has total profiled size 10 is matched
-; MEMPROFMATCHINFO: MemProf cold context with id 11714230664165068698 has total profiled size 10 is matched
-; MEMPROFMATCHINFO: MemProf cold context with id 15737101490731057601 has total profiled size 10 is matched
-; MEMPROFMATCHINFO: MemProf cold context with id 16342802530253093571 has total profiled size 10 is matched
-; MEMPROFMATCHINFO: MemProf cold context with id 18254812774972004394 has total profiled size 10 is matched
+; MEMPROFMATCHINFO: MemProf notcold context with id 1093248920606587996 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf notcold context with id 5725971306423925017 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf notcold context with id 6792096022461663180 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf cold context with id 8525406123785421946 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf cold context with id 11714230664165068698 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf cold context with id 15737101490731057601 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf cold context with id 16342802530253093571 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf cold context with id 18254812774972004394 has total profiled size 10 is matched with 1 frames
 ; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 748269490701775343
 ; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 1544787832369987002
 ; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 2061451396820446691

>From f93cac902e0712df9c56c02191a8ae22cf06b17e Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu at google.com>
Date: Wed, 23 Apr 2025 17:14:01 -0700
Subject: [PATCH 2/2] Address comments.

---
 llvm/lib/Transforms/Instrumentation/MemProfiler.cpp         | 6 +++---
 .../PGOProfile/memprof-dump-matched-alloc-site.ll           | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index afcce5e82ba8b..67eba057c1791 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -818,9 +818,9 @@ static bool isAllocationWithHotColdVariant(const Function *Callee,
 
 struct AllocMatchInfo {
   uint64_t TotalSize = 0;
+  size_t NumFramesMatched = 0;
   AllocationType AllocType = AllocationType::None;
   bool Matched = false;
-  size_t NumFramesMatched = 0;
 };
 
 DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>>
@@ -1153,8 +1153,8 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
             if (ClPrintMemProfMatchInfo) {
               assert(FullStackId != 0);
               FullStackIdToAllocMatchInfo[FullStackId] = {
-                  AllocInfo->Info.getTotalSize(), AllocType, /*Matched=*/true,
-                  InlinedCallStack.size()};
+                  AllocInfo->Info.getTotalSize(), InlinedCallStack.size(),
+                  AllocType, /*Matched=*/true};
             }
           }
         }
diff --git a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll
index 7840f68e18d8c..b9126ac9a457f 100644
--- a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll
+++ b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll
@@ -30,7 +30,6 @@
 
 ;--- memprof-dump-matched-alloc-site.yaml
 ---
----
 HeapProfileRecords:
   - GUID:            _Z2f3v
     AllocSites:
@@ -45,6 +44,7 @@ HeapProfileRecords:
           TotalLifetime:   0
           TotalLifetimeAccessDensity: 0
     CallSites:
+      # Kept empty here because this section is irrelevant for this test.
 ...
 ;--- memprof-dump-matched-alloc-site.ll
 ; CHECK: MemProf notcold context with id 12978026349401156968 has total profiled size 3 is matched with 3 frames



More information about the llvm-commits mailing list