[llvm] [memprof] Print alloc site matches immediately (PR #142233)

via llvm-commits llvm-commits at lists.llvm.org
Fri May 30 16:05:01 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Kazu Hirata (kazutakahirata)

<details>
<summary>Changes</summary>

Without this patch, we buffer alloc site matches in
FullStackIdToAllocMatchInfo and then print them out at the end of
MemProfUsePass.

This practice is problematic when we have multiple matches per alloc
site.  Consider:

  char *f1() { return new char[3]; }
  char *f2() { return f1(); }
  __attribute__((noinline)) char *f3() { return f2(); }

In this example, f1 contains an alloc site, of course, but so do f2
and f3 via inlining.  When something like this happens,
FullStackIdToAllocMatchInfo gets updated multiple times for the same
full stack ID at:

  FullStackIdToAllocMatchInfo[FullStackId] = { ... };

with different InlinedCallStack.size() each time.

This patch changes the behavior by immediately printing out alloc site
matches, potentially printing out multiple matches for the same
FullStackId.  It is up to the consumer of the message to figure out
the length of the longest matches for example.

For the test, this test adjusts an existing one,
memprof-dump-matched-alloc-site.ll.  Specifically, this patch
"restores" the IR and corresponding profile for f2 and f1 so that the
compiler generates a "MemProf notcold" message for each of f1, f2, and
f3.


---
Full diff: https://github.com/llvm/llvm-project/pull/142233.diff


3 Files Affected:

- (modified) llvm/lib/Transforms/Instrumentation/MemProfiler.cpp (+13-30) 
- (modified) llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll (+59-14) 
- (modified) llvm/test/Transforms/PGOProfile/memprof.ll (+4-4) 


``````````diff
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index c03aa5accc011..6bcf713603ba2 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -810,13 +810,6 @@ static bool isAllocationWithHotColdVariant(const Function *Callee,
   }
 }
 
-struct AllocMatchInfo {
-  uint64_t TotalSize = 0;
-  size_t NumFramesMatched = 0;
-  AllocationType AllocType = AllocationType::None;
-  bool Matched = false;
-};
-
 DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>>
 memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI,
                             function_ref<bool(uint64_t)> IsPresentInProfile) {
@@ -952,13 +945,12 @@ undriftMemProfRecord(const DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
     UndriftCallStack(CS.Frames);
 }
 
-static void
-readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
-            const TargetLibraryInfo &TLI,
-            std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo,
-            std::set<std::vector<uint64_t>> &MatchedCallSites,
-            DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
-            OptimizationRemarkEmitter &ORE) {
+static void readMemprof(Module &M, Function &F,
+                        IndexedInstrProfReader *MemProfReader,
+                        const TargetLibraryInfo &TLI,
+                        std::set<std::vector<uint64_t>> &MatchedCallSites,
+                        DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
+                        OptimizationRemarkEmitter &ORE) {
   auto &Ctx = M.getContext();
   // Previously we used getIRPGOFuncName() here. If F is local linkage,
   // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
@@ -1146,9 +1138,11 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
             // was requested.
             if (ClPrintMemProfMatchInfo) {
               assert(FullStackId != 0);
-              FullStackIdToAllocMatchInfo[FullStackId] = {
-                  AllocInfo->Info.getTotalSize(), InlinedCallStack.size(),
-                  AllocType, /*Matched=*/true};
+              errs() << "MemProf " << getAllocTypeAttributeString(AllocType)
+                     << " context with id " << FullStackId
+                     << " has total profiled size "
+                     << AllocInfo->Info.getTotalSize() << " is matched with "
+                     << InlinedCallStack.size() << " frames\n";
             }
           }
         }
@@ -1258,11 +1252,6 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
   if (SalvageStaleProfile)
     UndriftMaps = computeUndriftMap(M, MemProfReader.get(), TLI);
 
-  // Map from the stack has of each allocation context in the function profiles
-  // to the total profiled size (bytes), allocation type, and whether we matched
-  // it to an allocation in the IR.
-  std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo;
-
   // Set of the matched call sites, each expressed as a sequence of an inline
   // call stack.
   std::set<std::vector<uint64_t>> MatchedCallSites;
@@ -1273,17 +1262,11 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
 
     const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
     auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
-    readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo,
-                MatchedCallSites, UndriftMaps, ORE);
+    readMemprof(M, F, MemProfReader.get(), TLI, MatchedCallSites, UndriftMaps,
+                ORE);
   }
 
   if (ClPrintMemProfMatchInfo) {
-    for (const auto &[Id, Info] : FullStackIdToAllocMatchInfo)
-      errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
-             << " context with id " << Id << " has total profiled size "
-             << Info.TotalSize << (Info.Matched ? " is" : " not")
-             << " matched with " << Info.NumFramesMatched << " frames\n";
-
     for (const auto &CallStack : MatchedCallSites) {
       errs() << "MemProf callsite match for inline call stack";
       for (uint64_t StackId : CallStack)
diff --git a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll
index b9126ac9a457f..2dcaa9d492869 100644
--- a/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll
+++ b/llvm/test/Transforms/PGOProfile/memprof-dump-matched-alloc-site.ll
@@ -31,11 +31,41 @@
 ;--- memprof-dump-matched-alloc-site.yaml
 ---
 HeapProfileRecords:
+  - GUID:            _Z2f2v
+    AllocSites:
+      - Callstack:
+          - { Function: _Z2f1v, LineOffset: 0, Column: 21, IsInlineFrame: true }
+          - { Function: _Z2f2v, LineOffset: 0, Column: 21, IsInlineFrame: true }
+          - { Function: _Z2f3v, LineOffset: 0, Column: 47, IsInlineFrame: false }
+          - { Function: main, LineOffset: 1, Column: 3, IsInlineFrame: false }
+        MemInfoBlock:
+          AllocCount:      1
+          TotalSize:       3
+          TotalLifetime:   0
+          TotalLifetimeAccessDensity: 0
+    CallSites:
+      - Frames:
+          - { Function: _Z2f1v, LineOffset: 0, Column: 21, IsInlineFrame: true }
+          - { Function: _Z2f2v, LineOffset: 0, Column: 21, IsInlineFrame: true }
+          - { Function: _Z2f3v, LineOffset: 0, Column: 47, IsInlineFrame: false }
+  - GUID:            _Z2f1v
+    AllocSites:
+      - Callstack:
+          - { Function: _Z2f1v, LineOffset: 0, Column: 21, IsInlineFrame: true }
+          - { Function: _Z2f2v, LineOffset: 0, Column: 21, IsInlineFrame: true }
+          - { Function: _Z2f3v, LineOffset: 0, Column: 47, IsInlineFrame: false }
+          - { Function: main, LineOffset: 1, Column: 3, IsInlineFrame: false }
+        MemInfoBlock:
+          AllocCount:      1
+          TotalSize:       3
+          TotalLifetime:   0
+          TotalLifetimeAccessDensity: 0
+    CallSites:       []
   - GUID:            _Z2f3v
     AllocSites:
       - Callstack:
-          - { Function: _ZL2f1v, LineOffset: 0, Column: 35, IsInlineFrame: true }
-          - { Function: _ZL2f2v, LineOffset: 0, Column: 35, IsInlineFrame: true }
+          - { Function: _Z2f1v, LineOffset: 0, Column: 21, IsInlineFrame: true }
+          - { Function: _Z2f2v, LineOffset: 0, Column: 21, IsInlineFrame: true }
           - { Function: _Z2f3v, LineOffset: 0, Column: 47, IsInlineFrame: false }
           - { Function: main, LineOffset: 1, Column: 3, IsInlineFrame: false }
         MemInfoBlock:
@@ -47,32 +77,47 @@ HeapProfileRecords:
       # Kept empty here because this section is irrelevant for this test.
 ...
 ;--- memprof-dump-matched-alloc-site.ll
-; CHECK: MemProf notcold context with id 12978026349401156968 has total profiled size 3 is matched with 3 frames
+; CHECK: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 1 frames
+; CHECK: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 2 frames
+; CHECK: MemProf notcold context with id 5736731103568718490 has total profiled size 3 is matched with 3 frames
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-define ptr @_Z2f3v() {
+define ptr @_Z2f1v() {
 entry:
-  %call.i.i = call ptr @_Znam(i64 0), !dbg !3
-  ret ptr null
+  %call = call ptr @_Znam(i64 0), !dbg !3
+  ret ptr %call
 }
 
 declare ptr @_Znam(i64)
 
+define ptr @_Z2f2v() {
+entry:
+  %call.i = call ptr @_Znam(i64 0), !dbg !7
+  ret ptr %call.i
+}
+
+define ptr @_Z2f3v() {
+entry:
+  %call.i.i = call ptr @_Znam(i64 0), !dbg !10
+  ret ptr %call.i.i
+}
+
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!2}
 
 !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1)
 !1 = !DIFile(filename: "memprof-dump-matched-alloc-site.cc", directory: "/")
 !2 = !{i32 2, !"Debug Info Version", i32 3}
-!3 = !DILocation(line: 1, column: 35, scope: !4, inlinedAt: !7)
-!4 = distinct !DISubprogram(name: "f1", linkageName: "_ZL2f1v", scope: !1, file: !1, line: 1, type: !5, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!3 = !DILocation(line: 1, column: 21, scope: !4)
+!4 = distinct !DISubprogram(name: "f1", linkageName: "_Z2f1v", scope: !1, file: !1, line: 1, type: !5, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
 !5 = !DISubroutineType(types: !6)
 !6 = !{}
-!7 = distinct !DILocation(line: 2, column: 35, scope: !8, inlinedAt: !9)
-!8 = distinct !DISubprogram(name: "f2", linkageName: "_ZL2f2v", scope: !1, file: !1, line: 2, type: !5, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0)
-!9 = distinct !DILocation(line: 3, column: 47, scope: !10)
-!10 = distinct !DISubprogram(name: "f3", linkageName: "_Z2f3v", scope: !1, file: !1, line: 3, type: !5, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
-!11 = !DILocation(line: 6, column: 3, scope: !12)
-!12 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 5, type: !5, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!7 = !DILocation(line: 1, column: 21, scope: !4, inlinedAt: !8)
+!8 = distinct !DILocation(line: 2, column: 21, scope: !9)
+!9 = distinct !DISubprogram(name: "f2", linkageName: "_Z2f2v", scope: !1, file: !1, line: 2, type: !5, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!10 = !DILocation(line: 1, column: 21, scope: !4, inlinedAt: !11)
+!11 = distinct !DILocation(line: 2, column: 21, scope: !9, inlinedAt: !12)
+!12 = distinct !DILocation(line: 3, column: 47, scope: !13)
+!13 = distinct !DISubprogram(name: "f3", linkageName: "_Z2f3v", scope: !1, file: !1, line: 3, type: !5, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
diff --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll
index c69d0311e0388..e48da36a6d97c 100644
--- a/llvm/test/Transforms/PGOProfile/memprof.ll
+++ b/llvm/test/Transforms/PGOProfile/memprof.ll
@@ -111,13 +111,13 @@
 ; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-min-ave-lifetime-access-density-hot-threshold=0 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL
 
 ; MEMPROFMATCHINFO: MemProf notcold context with id 1093248920606587996 has total profiled size 10 is matched with 1 frames
-; MEMPROFMATCHINFO: MemProf notcold context with id 5725971306423925017 has total profiled size 10 is matched with 1 frames
-; MEMPROFMATCHINFO: MemProf notcold context with id 6792096022461663180 has total profiled size 10 is matched with 1 frames
 ; MEMPROFMATCHINFO: MemProf cold context with id 8525406123785421946 has total profiled size 10 is matched with 1 frames
-; MEMPROFMATCHINFO: MemProf cold context with id 11714230664165068698 has total profiled size 10 is matched with 1 frames
-; MEMPROFMATCHINFO: MemProf cold context with id 15737101490731057601 has total profiled size 10 is matched with 1 frames
 ; MEMPROFMATCHINFO: MemProf cold context with id 16342802530253093571 has total profiled size 10 is matched with 1 frames
 ; MEMPROFMATCHINFO: MemProf cold context with id 18254812774972004394 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf cold context with id 11714230664165068698 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf notcold context with id 5725971306423925017 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf notcold context with id 6792096022461663180 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf cold context with id 15737101490731057601 has total profiled size 10 is matched with 1 frames
 ; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 748269490701775343
 ; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 1544787832369987002
 ; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 2061451396820446691

``````````

</details>


https://github.com/llvm/llvm-project/pull/142233


More information about the llvm-commits mailing list