[llvm] b7feccb - [memprof] Dump call site matching information (#125130)

via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 6 23:37:14 PST 2025

Author: Kazu Hirata
Date: 2025-02-06T23:37:10-08:00
New Revision: b7feccb31dba8f512c97f89cd413625016f34cf3

URL: https://github.com/llvm/llvm-project/commit/b7feccb31dba8f512c97f89cd413625016f34cf3
DIFF: https://github.com/llvm/llvm-project/commit/b7feccb31dba8f512c97f89cd413625016f34cf3.diff

LOG: [memprof] Dump call site matching information (#125130)

MemProfiler.cpp annotates the IR with the memory profile so that we
can later duplicate context. This patch dumps the entire inline call
for each call site match.




diff  --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 91c48338d03208..7d8bc3aa4c5895 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -970,6 +970,7 @@ static void
 readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
             const TargetLibraryInfo &TLI,
             std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo,
+            std::set<std::vector<uint64_t>> &MatchedCallSites,
             DenseMap<uint64_t, LocToLocMap> &UndriftMaps) {
   auto &Ctx = M.getContext();
   // Previously we used getIRPGOFuncName() here. If F is local linkage,
@@ -1210,6 +1211,13 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
           addCallsiteMetadata(I, InlinedCallStack, Ctx);
           // Only need to find one with a matching call stack and add a single
           // callsite metadata.
+          // Accumulate call site matching information upon request.
+          if (ClPrintMemProfMatchInfo) {
+            std::vector<uint64_t> CallStack;
+            append_range(CallStack, InlinedCallStack);
+            MatchedCallSites.insert(std::move(CallStack));
+          }
@@ -1266,13 +1274,17 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
   // it to an allocation in the IR.
   std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo;
+  // Set of the matched call sites, each expressed as a sequence of an inline
+  // call stack.
+  std::set<std::vector<uint64_t>> MatchedCallSites;
   for (auto &F : M) {
     if (F.isDeclaration())
     const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
     readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo,
-                UndriftMaps);
+                MatchedCallSites, UndriftMaps);
   if (ClPrintMemProfMatchInfo) {
@@ -1281,6 +1293,13 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
              << " context with id " << Id << " has total profiled size "
              << Info.TotalSize << (Info.Matched ? " is" : " not")
              << " matched\n";
+    for (const auto &CallStack : MatchedCallSites) {
+      errs() << "MemProf callsite match for inline call stack";
+      for (uint64_t StackId : CallStack)
+        errs() << " " << StackId;
+      errs() << "\n";
+    }
   return PreservedAnalyses::none();

diff  --git a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-call-sites.ll b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-call-sites.ll
new file mode 100644
index 00000000000000..a5302895d0593d
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-call-sites.ll
@@ -0,0 +1,114 @@
+; Tests that the compiler dumps call site matches upon request.
+; The test case is generated from:
+; // main
+; // |
+; // f1 (noinline)
+; // |
+; // f2
+; // |
+; // f3 (noinline)
+; // |
+; // new
+; __attribute__((noinline)) char *f3() { return ::new char[4]; }
+; static char *f2() { return f3(); }
+; __attribute__((noinline)) static char *f1() { return f2(); }
+; int main() {
+;   f1();
+;   return 0;
+; }
+; Here we expect to match two inline call stacks:
+; - [main]
+; - [f1, f2]
+; Note that f3 is considered to be an allocation site, not a call site, because
+; it directly calls new after inlining.
+; REQUIRES: x86_64-linux
+; RUN: split-file %s %t
+; RUN: llvm-profdata merge %t/memprof-dump-matched-call-site.yaml -o %t/memprof-dump-matched-call-site.memprofdata
+; RUN: opt < %t/memprof-dump-matched-call-site.ll -passes='memprof-use<profile-filename=%t/memprof-dump-matched-call-site.memprofdata>' -memprof-print-match-info -S 2>&1 | FileCheck %s
+;--- memprof-dump-matched-call-site.yaml
+  - GUID:            main
+    AllocSites:      []
+    CallSites:
+      - - { Function: main, LineOffset: 1, Column: 3, IsInlineFrame: false }
+  - GUID:            _ZL2f1v
+    AllocSites:      []
+    CallSites:
+      - - { Function: _ZL2f2v, LineOffset: 0, Column: 28, IsInlineFrame: true }
+        - { Function: _ZL2f1v, LineOffset: 0, Column: 54, IsInlineFrame: false }
+  - GUID:            _ZL2f2v
+    AllocSites:      []
+    CallSites:
+      - - { Function: _ZL2f2v, LineOffset: 0, Column: 28, IsInlineFrame: true }
+        - { Function: _ZL2f1v, LineOffset: 0, Column: 54, IsInlineFrame: false }
+  - GUID:            _Z2f3v
+    AllocSites:
+      - Callstack:
+          - { Function: _Z2f3v, LineOffset: 0, Column: 47, IsInlineFrame: false }
+          - { Function: _ZL2f2v, LineOffset: 0, Column: 28, IsInlineFrame: true }
+          - { Function: _ZL2f1v, LineOffset: 0, Column: 54, IsInlineFrame: false }
+          - { Function: main, LineOffset: 1, Column: 3, IsInlineFrame: false }
+        MemInfoBlock:
+          AllocCount:      1
+          TotalSize:       4
+          TotalLifetime:   0
+          TotalLifetimeAccessDensity: 0
+    CallSites:       []
+;--- memprof-dump-matched-call-site.ll
+; CHECK: MemProf notcold context with id 3894143216621363392 has total profiled size 4 is matched
+; CHECK: MemProf callsite match for inline call stack 4745611964195289084 10616861955219347331
+; CHECK: MemProf callsite match for inline call stack 5401059281181789382
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+define ptr @_Z2f3v() {
+  %call = call ptr @_Znam(i64 0), !dbg !3
+  ret ptr null
+declare ptr @_Znam(i64)
+define i32 @main() {
+  call void @_ZL2f1v(), !dbg !7
+  ret i32 0
+define void @_ZL2f1v() {
+  %call.i = call ptr @_Z2f3v(), !dbg !9
+  ret void
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2}
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1)
+!1 = !DIFile(filename: "match.cc", directory: "/")
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = !DILocation(line: 11, column: 47, scope: !4)
+!4 = distinct !DISubprogram(name: "f3", linkageName: "_Z2f3v", scope: !1, file: !1, line: 11, type: !5, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!5 = !DISubroutineType(types: !6)
+!6 = !{}
+!7 = !DILocation(line: 18, column: 3, scope: !8)
+!8 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 17, type: !5, scopeLine: 17, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!9 = !DILocation(line: 13, column: 28, scope: !10, inlinedAt: !11)
+!10 = distinct !DISubprogram(name: "f2", linkageName: "_ZL2f2v", scope: !1, file: !1, line: 13, type: !5, scopeLine: 13, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!11 = distinct !DILocation(line: 15, column: 54, scope: !12)
+!12 = distinct !DISubprogram(name: "f1", linkageName: "_ZL2f1v", scope: !1, file: !1, line: 15, type: !13, scopeLine: 15, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!13 = !DISubroutineType(cc: DW_CC_nocall, types: !6)

diff  --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll
index f0421ba60cffca..5a958de5f7f8d5 100644
--- a/llvm/test/Transforms/PGOProfile/memprof.ll
+++ b/llvm/test/Transforms/PGOProfile/memprof.ll
@@ -101,6 +101,16 @@
 ; MEMPROFMATCHINFO: MemProf cold context with id 15737101490731057601 has total profiled size 10 is matched
 ; MEMPROFMATCHINFO: MemProf cold context with id 16342802530253093571 has total profiled size 10 is matched
 ; MEMPROFMATCHINFO: MemProf cold context with id 18254812774972004394 has total profiled size 10 is matched
+; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 748269490701775343
+; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 1544787832369987002
+; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 2061451396820446691
+; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 2104812325165620841
+; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 6281715513834610934
+; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 8467819354083268568
+; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 8690657650969109624
+; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 9086428284934609951
+; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 12481870273128938184
+; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 12699492813229484831
 ; ModuleID = 'memprof.cc'
 source_filename = "memprof.cc"


