[llvm] b7feccb - [memprof] Dump call site matching information (#125130)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 6 23:37:14 PST 2025
Author: Kazu Hirata
Date: 2025-02-06T23:37:10-08:00
New Revision: b7feccb31dba8f512c97f89cd413625016f34cf3
URL: https://github.com/llvm/llvm-project/commit/b7feccb31dba8f512c97f89cd413625016f34cf3
DIFF: https://github.com/llvm/llvm-project/commit/b7feccb31dba8f512c97f89cd413625016f34cf3.diff
LOG: [memprof] Dump call site matching information (#125130)
MemProfiler.cpp annotates the IR with the memory profile so that we
can later duplicate context. This patch dumps the entire inline call
stack
for each call site match.
Added:
llvm/test/Transforms/PGOProfile/memprof-dump-matched-call-sites.ll
Modified:
llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
llvm/test/Transforms/PGOProfile/memprof.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 91c48338d03208..7d8bc3aa4c5895 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -970,6 +970,7 @@ static void
readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
const TargetLibraryInfo &TLI,
std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo,
+ std::set<std::vector<uint64_t>> &MatchedCallSites,
DenseMap<uint64_t, LocToLocMap> &UndriftMaps) {
auto &Ctx = M.getContext();
// Previously we used getIRPGOFuncName() here. If F is local linkage,
@@ -1210,6 +1211,13 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
addCallsiteMetadata(I, InlinedCallStack, Ctx);
// Only need to find one with a matching call stack and add a single
// callsite metadata.
+
+ // Accumulate call site matching information upon request.
+ if (ClPrintMemProfMatchInfo) {
+ std::vector<uint64_t> CallStack;
+ append_range(CallStack, InlinedCallStack);
+ MatchedCallSites.insert(std::move(CallStack));
+ }
break;
}
}
@@ -1266,13 +1274,17 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
// it to an allocation in the IR.
std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo;
+ // Set of the matched call sites, each expressed as a sequence of an inline
+ // call stack.
+ std::set<std::vector<uint64_t>> MatchedCallSites;
+
for (auto &F : M) {
if (F.isDeclaration())
continue;
const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo,
- UndriftMaps);
+ MatchedCallSites, UndriftMaps);
}
if (ClPrintMemProfMatchInfo) {
@@ -1281,6 +1293,13 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
<< " context with id " << Id << " has total profiled size "
<< Info.TotalSize << (Info.Matched ? " is" : " not")
<< " matched\n";
+
+ for (const auto &CallStack : MatchedCallSites) {
+ errs() << "MemProf callsite match for inline call stack";
+ for (uint64_t StackId : CallStack)
+ errs() << " " << StackId;
+ errs() << "\n";
+ }
}
return PreservedAnalyses::none();
diff --git a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-call-sites.ll b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-call-sites.ll
new file mode 100644
index 00000000000000..a5302895d0593d
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-call-sites.ll
@@ -0,0 +1,114 @@
+; Tests that the compiler dumps call site matches upon request.
+;
+; The test case is generated from:
+;
+; // main
+; // |
+; // f1 (noinline)
+; // |
+; // f2
+; // |
+; // f3 (noinline)
+; // |
+; // new
+;
+; __attribute__((noinline)) char *f3() { return ::new char[4]; }
+;
+; static char *f2() { return f3(); }
+;
+; __attribute__((noinline)) static char *f1() { return f2(); }
+;
+; int main() {
+; f1();
+; return 0;
+; }
+;
+; Here we expect to match two inline call stacks:
+;
+; - [main]
+; - [f1, f2]
+;
+; Note that f3 is considered to be an allocation site, not a call site, because
+; it directly calls new after inlining.
+
+; REQUIRES: x86_64-linux
+; RUN: split-file %s %t
+; RUN: llvm-profdata merge %t/memprof-dump-matched-call-site.yaml -o %t/memprof-dump-matched-call-site.memprofdata
+; RUN: opt < %t/memprof-dump-matched-call-site.ll -passes='memprof-use<profile-filename=%t/memprof-dump-matched-call-site.memprofdata>' -memprof-print-match-info -S 2>&1 | FileCheck %s
+
+;--- memprof-dump-matched-call-site.yaml
+---
+HeapProfileRecords:
+ - GUID: main
+ AllocSites: []
+ CallSites:
+ - - { Function: main, LineOffset: 1, Column: 3, IsInlineFrame: false }
+ - GUID: _ZL2f1v
+ AllocSites: []
+ CallSites:
+ - - { Function: _ZL2f2v, LineOffset: 0, Column: 28, IsInlineFrame: true }
+ - { Function: _ZL2f1v, LineOffset: 0, Column: 54, IsInlineFrame: false }
+ - GUID: _ZL2f2v
+ AllocSites: []
+ CallSites:
+ - - { Function: _ZL2f2v, LineOffset: 0, Column: 28, IsInlineFrame: true }
+ - { Function: _ZL2f1v, LineOffset: 0, Column: 54, IsInlineFrame: false }
+ - GUID: _Z2f3v
+ AllocSites:
+ - Callstack:
+ - { Function: _Z2f3v, LineOffset: 0, Column: 47, IsInlineFrame: false }
+ - { Function: _ZL2f2v, LineOffset: 0, Column: 28, IsInlineFrame: true }
+ - { Function: _ZL2f1v, LineOffset: 0, Column: 54, IsInlineFrame: false }
+ - { Function: main, LineOffset: 1, Column: 3, IsInlineFrame: false }
+ MemInfoBlock:
+ AllocCount: 1
+ TotalSize: 4
+ TotalLifetime: 0
+ TotalLifetimeAccessDensity: 0
+ CallSites: []
+...
+;--- memprof-dump-matched-call-site.ll
+; CHECK: MemProf notcold context with id 3894143216621363392 has total profiled size 4 is matched
+; CHECK: MemProf callsite match for inline call stack 4745611964195289084 10616861955219347331
+; CHECK: MemProf callsite match for inline call stack 5401059281181789382
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define ptr @_Z2f3v() {
+entry:
+ %call = call ptr @_Znam(i64 0), !dbg !3
+ ret ptr null
+}
+
+declare ptr @_Znam(i64)
+
+define i32 @main() {
+entry:
+ call void @_ZL2f1v(), !dbg !7
+ ret i32 0
+}
+
+define void @_ZL2f1v() {
+entry:
+ %call.i = call ptr @_Z2f3v(), !dbg !9
+ ret void
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1)
+!1 = !DIFile(filename: "match.cc", directory: "/")
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = !DILocation(line: 11, column: 47, scope: !4)
+!4 = distinct !DISubprogram(name: "f3", linkageName: "_Z2f3v", scope: !1, file: !1, line: 11, type: !5, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!5 = !DISubroutineType(types: !6)
+!6 = !{}
+!7 = !DILocation(line: 18, column: 3, scope: !8)
+!8 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 17, type: !5, scopeLine: 17, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!9 = !DILocation(line: 13, column: 28, scope: !10, inlinedAt: !11)
+!10 = distinct !DISubprogram(name: "f2", linkageName: "_ZL2f2v", scope: !1, file: !1, line: 13, type: !5, scopeLine: 13, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!11 = distinct !DILocation(line: 15, column: 54, scope: !12)
+!12 = distinct !DISubprogram(name: "f1", linkageName: "_ZL2f1v", scope: !1, file: !1, line: 15, type: !13, scopeLine: 15, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!13 = !DISubroutineType(cc: DW_CC_nocall, types: !6)
diff --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll
index f0421ba60cffca..5a958de5f7f8d5 100644
--- a/llvm/test/Transforms/PGOProfile/memprof.ll
+++ b/llvm/test/Transforms/PGOProfile/memprof.ll
@@ -101,6 +101,16 @@
; MEMPROFMATCHINFO: MemProf cold context with id 15737101490731057601 has total profiled size 10 is matched
; MEMPROFMATCHINFO: MemProf cold context with id 16342802530253093571 has total profiled size 10 is matched
; MEMPROFMATCHINFO: MemProf cold context with id 18254812774972004394 has total profiled size 10 is matched
+; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 748269490701775343
+; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 1544787832369987002
+; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 2061451396820446691
+; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 2104812325165620841
+; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 6281715513834610934
+; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 8467819354083268568
+; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 8690657650969109624
+; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 9086428284934609951
+; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 12481870273128938184
+; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 12699492813229484831
; ModuleID = 'memprof.cc'
source_filename = "memprof.cc"
More information about the llvm-commits
mailing list