[llvm] cb96a3d - [memprof] Dump the number of matched frames (#137082)

via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 23 21:29:19 PDT 2025


Author: Kazu Hirata
Date: 2025-04-23T21:29:16-07:00
New Revision: cb96a3dc07b0a26023633ae91fab9b4213730236

URL: https://github.com/llvm/llvm-project/commit/cb96a3dc07b0a26023633ae91fab9b4213730236
DIFF: https://github.com/llvm/llvm-project/commit/cb96a3dc07b0a26023633ae91fab9b4213730236.diff

LOG: [memprof] Dump the number of matched frames (#137082)

This patch teaches readMemprof to dump the number of frames for each
allocation site match.  This information helps us analyze what part of
the call stack in the MemProf profile has matched the IR.

Aside from updating existing test cases, this patch adds one more test
case, memprof-dump-matched-alloc-site.ll, because none of the existing
test cases has the number of frames greater than one.

Added: 
    llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll

Modified: 
    llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
    llvm/test/Transforms/PGOProfile/memprof-dump-matched-call-sites.ll
    llvm/test/Transforms/PGOProfile/memprof.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 46b524d054493..67eba057c1791 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -818,6 +818,7 @@ static bool isAllocationWithHotColdVariant(const Function *Callee,
 
 struct AllocMatchInfo {
   uint64_t TotalSize = 0;
+  size_t NumFramesMatched = 0;
   AllocationType AllocType = AllocationType::None;
   bool Matched = false;
 };
@@ -1152,7 +1153,8 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
             if (ClPrintMemProfMatchInfo) {
               assert(FullStackId != 0);
               FullStackIdToAllocMatchInfo[FullStackId] = {
-                  AllocInfo->Info.getTotalSize(), AllocType, /*Matched=*/true};
+                  AllocInfo->Info.getTotalSize(), InlinedCallStack.size(),
+                  AllocType, /*Matched=*/true};
             }
           }
         }
@@ -1285,7 +1287,7 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
       errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
              << " context with id " << Id << " has total profiled size "
              << Info.TotalSize << (Info.Matched ? " is" : " not")
-             << " matched\n";
+             << " matched with " << Info.NumFramesMatched << " frames\n";
 
     for (const auto &CallStack : MatchedCallSites) {
       errs() << "MemProf callsite match for inline call stack";

diff  --git a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll
new file mode 100644
index 0000000000000..b9126ac9a457f
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll
@@ -0,0 +1,78 @@
+; Tests that the compiler dumps an allocation site with multiple inlined frames.
+;
+; The test case is generated from:
+;
+; // main
+; // |
+; // f1 (noinline)
+; // |
+; // f2
+; // |
+; // f3
+; // |
+; // new
+;
+; char *f1() { return new char[3]; }
+; char *f2() { return f1(); }
+; __attribute__((noinline)) char *f3() { return f2(); }
+;
+; int main() {
+;   f3();
+;   return 0;
+; }
+;
+; Here we expect to match the allocation site to encompass 3 frames.
+
+; REQUIRES: x86_64-linux
+; RUN: split-file %s %t
+; RUN: llvm-profdata merge %t/memprof-dump-matched-alloc-site.yaml -o %t/memprof-dump-matched-alloc-site.memprofdata
+; RUN: opt < %t/memprof-dump-matched-alloc-site.ll -passes='memprof-use<profile-filename=%t/memprof-dump-matched-alloc-site.memprofdata>' -memprof-print-match-info -S 2>&1 | FileCheck %s
+
+;--- memprof-dump-matched-alloc-site.yaml
+---
+HeapProfileRecords:
+  - GUID:            _Z2f3v
+    AllocSites:
+      - Callstack:
+          - { Function: _ZL2f1v, LineOffset: 0, Column: 35, IsInlineFrame: true }
+          - { Function: _ZL2f2v, LineOffset: 0, Column: 35, IsInlineFrame: true }
+          - { Function: _Z2f3v, LineOffset: 0, Column: 47, IsInlineFrame: false }
+          - { Function: main, LineOffset: 1, Column: 3, IsInlineFrame: false }
+        MemInfoBlock:
+          AllocCount:      1
+          TotalSize:       3
+          TotalLifetime:   0
+          TotalLifetimeAccessDensity: 0
+    CallSites:
+      # Kept empty here because this section is irrelevant for this test.
+...
+;--- memprof-dump-matched-alloc-site.ll
+; CHECK: MemProf notcold context with id 12978026349401156968 has total profiled size 3 is matched with 3 frames
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define ptr @_Z2f3v() {
+entry:
+  %call.i.i = call ptr @_Znam(i64 0), !dbg !3
+  ret ptr null
+}
+
+declare ptr @_Znam(i64)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1)
+!1 = !DIFile(filename: "memprof-dump-matched-alloc-site.cc", directory: "/")
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = !DILocation(line: 1, column: 35, scope: !4, inlinedAt: !7)
+!4 = distinct !DISubprogram(name: "f1", linkageName: "_ZL2f1v", scope: !1, file: !1, line: 1, type: !5, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!5 = !DISubroutineType(types: !6)
+!6 = !{}
+!7 = distinct !DILocation(line: 2, column: 35, scope: !8, inlinedAt: !9)
+!8 = distinct !DISubprogram(name: "f2", linkageName: "_ZL2f2v", scope: !1, file: !1, line: 2, type: !5, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!9 = distinct !DILocation(line: 3, column: 47, scope: !10)
+!10 = distinct !DISubprogram(name: "f3", linkageName: "_Z2f3v", scope: !1, file: !1, line: 3, type: !5, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!11 = !DILocation(line: 6, column: 3, scope: !12)
+!12 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 5, type: !5, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)

diff  --git a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-call-sites.ll b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-call-sites.ll
index 6fe0e5cd497ec..fa99116b820f9 100644
--- a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-call-sites.ll
+++ b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-call-sites.ll
@@ -71,7 +71,7 @@ HeapProfileRecords:
     CallSites:       []
 ...
 ;--- memprof-dump-matched-call-site.ll
-; CHECK: MemProf notcold context with id 3894143216621363392 has total profiled size 4 is matched
+; CHECK: MemProf notcold context with id 3894143216621363392 has total profiled size 4 is matched with 1 frames
 ; CHECK: MemProf callsite match for inline call stack 4745611964195289084 10616861955219347331
 ; CHECK: MemProf callsite match for inline call stack 5401059281181789382
 

diff  --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll
index 5a958de5f7f8d..73226df861ea5 100644
--- a/llvm/test/Transforms/PGOProfile/memprof.ll
+++ b/llvm/test/Transforms/PGOProfile/memprof.ll
@@ -93,14 +93,14 @@
 ;; notcold again.
 ; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-min-ave-lifetime-access-density-hot-threshold=0 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL
 
-; MEMPROFMATCHINFO: MemProf notcold context with id 1093248920606587996 has total profiled size 10 is matched
-; MEMPROFMATCHINFO: MemProf notcold context with id 5725971306423925017 has total profiled size 10 is matched
-; MEMPROFMATCHINFO: MemProf notcold context with id 6792096022461663180 has total profiled size 10 is matched
-; MEMPROFMATCHINFO: MemProf cold context with id 8525406123785421946 has total profiled size 10 is matched
-; MEMPROFMATCHINFO: MemProf cold context with id 11714230664165068698 has total profiled size 10 is matched
-; MEMPROFMATCHINFO: MemProf cold context with id 15737101490731057601 has total profiled size 10 is matched
-; MEMPROFMATCHINFO: MemProf cold context with id 16342802530253093571 has total profiled size 10 is matched
-; MEMPROFMATCHINFO: MemProf cold context with id 18254812774972004394 has total profiled size 10 is matched
+; MEMPROFMATCHINFO: MemProf notcold context with id 1093248920606587996 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf notcold context with id 5725971306423925017 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf notcold context with id 6792096022461663180 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf cold context with id 8525406123785421946 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf cold context with id 11714230664165068698 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf cold context with id 15737101490731057601 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf cold context with id 16342802530253093571 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf cold context with id 18254812774972004394 has total profiled size 10 is matched with 1 frames
 ; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 748269490701775343
 ; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 1544787832369987002
 ; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 2061451396820446691


        


More information about the llvm-commits mailing list