[llvm] [memprof] Deduplicate alloc site matches (PR #142334)

via llvm-commits llvm-commits at lists.llvm.org
Sun Jun 1 22:57:48 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-pgo

Author: Kazu Hirata (kazutakahirata)

<details>
<summary>Changes</summary>

With:

  commit 2425626d803002027cbf71c39df80cb7b56db0fb
  Author: Kazu Hirata <kazu@<!-- -->google.com>
  Date:   Sun Jun 1 08:09:58 2025 -0700

we print out a lot of duplicate alloc site matches.

This patch partially reverts the patch above.  The core idea of using
a map to deduplicate entries remains the same, but details are
different.  Specifically:

- This PR uses the [FullStackID, MatchLength] as the key, where
  MatchLength is the length of an alloc site match.

- AllocMatchInfo in this PR no longer has Matched because we always
  report matches.

- AllocMatchInfo in this PR no longer has NumFramesMatched because it
  has become part of the key.

This deduplication roughly halves the amount of messages printed out.


---
Full diff: https://github.com/llvm/llvm-project/pull/142334.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/Instrumentation/MemProfiler.cpp (+25-7) 
- (modified) llvm/test/Transforms/PGOProfile/memprof.ll (+4-4) 


``````````diff
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index a64dfc02f6bf3..9075c2663b108 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -816,6 +816,11 @@ static bool isAllocationWithHotColdVariant(const Function *Callee,
   }
 }
 
+struct AllocMatchInfo {
+  uint64_t TotalSize = 0;
+  AllocationType AllocType = AllocationType::None;
+};
+
 DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>>
 memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI,
                             function_ref<bool(uint64_t)> IsPresentInProfile) {
@@ -994,6 +999,8 @@ static void addVPMetadata(Module &M, Instruction &I,
 static void readMemprof(Module &M, Function &F,
                         IndexedInstrProfReader *MemProfReader,
                         const TargetLibraryInfo &TLI,
+                        std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>
+                            &FullStackIdToAllocMatchInfo,
                         std::set<std::vector<uint64_t>> &MatchedCallSites,
                         DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
                         OptimizationRemarkEmitter &ORE) {
@@ -1206,11 +1213,9 @@ static void readMemprof(Module &M, Function &F,
             // was requested.
             if (ClPrintMemProfMatchInfo) {
               assert(FullStackId != 0);
-              errs() << "MemProf " << getAllocTypeAttributeString(AllocType)
-                     << " context with id " << FullStackId
-                     << " has total profiled size "
-                     << AllocInfo->Info.getTotalSize() << " is matched with "
-                     << InlinedCallStack.size() << " frames\n";
+              FullStackIdToAllocMatchInfo[std::make_pair(
+                  FullStackId, InlinedCallStack.size())] = {
+                  AllocInfo->Info.getTotalSize(), AllocType};
             }
           }
         }
@@ -1325,6 +1330,12 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
   if (SalvageStaleProfile)
     UndriftMaps = computeUndriftMap(M, MemProfReader.get(), TLI);
 
+  // Map from the stack has of each allocation context in the function profiles
+  // to the total profiled size (bytes), allocation type, and whether we matched
+  // it to an allocation in the IR.
+  std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>
+      FullStackIdToAllocMatchInfo;
+
   // Set of the matched call sites, each expressed as a sequence of an inline
   // call stack.
   std::set<std::vector<uint64_t>> MatchedCallSites;
@@ -1335,11 +1346,18 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
 
     const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
     auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
-    readMemprof(M, F, MemProfReader.get(), TLI, MatchedCallSites, UndriftMaps,
-                ORE);
+    readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo,
+                MatchedCallSites, UndriftMaps, ORE);
   }
 
   if (ClPrintMemProfMatchInfo) {
+    for (const auto &[IdLengthPair, Info] : FullStackIdToAllocMatchInfo) {
+      auto [Id, Length] = IdLengthPair;
+      errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
+             << " context with id " << Id << " has total profiled size "
+             << Info.TotalSize << " is matched with " << Length << " frames\n";
+    }
+
     for (const auto &CallStack : MatchedCallSites) {
       errs() << "MemProf callsite match for inline call stack";
       for (uint64_t StackId : CallStack)
diff --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll
index e48da36a6d97c..c69d0311e0388 100644
--- a/llvm/test/Transforms/PGOProfile/memprof.ll
+++ b/llvm/test/Transforms/PGOProfile/memprof.ll
@@ -111,13 +111,13 @@
 ; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-min-ave-lifetime-access-density-hot-threshold=0 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL
 
 ; MEMPROFMATCHINFO: MemProf notcold context with id 1093248920606587996 has total profiled size 10 is matched with 1 frames
-; MEMPROFMATCHINFO: MemProf cold context with id 8525406123785421946 has total profiled size 10 is matched with 1 frames
-; MEMPROFMATCHINFO: MemProf cold context with id 16342802530253093571 has total profiled size 10 is matched with 1 frames
-; MEMPROFMATCHINFO: MemProf cold context with id 18254812774972004394 has total profiled size 10 is matched with 1 frames
-; MEMPROFMATCHINFO: MemProf cold context with id 11714230664165068698 has total profiled size 10 is matched with 1 frames
 ; MEMPROFMATCHINFO: MemProf notcold context with id 5725971306423925017 has total profiled size 10 is matched with 1 frames
 ; MEMPROFMATCHINFO: MemProf notcold context with id 6792096022461663180 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf cold context with id 8525406123785421946 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf cold context with id 11714230664165068698 has total profiled size 10 is matched with 1 frames
 ; MEMPROFMATCHINFO: MemProf cold context with id 15737101490731057601 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf cold context with id 16342802530253093571 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf cold context with id 18254812774972004394 has total profiled size 10 is matched with 1 frames
 ; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 748269490701775343
 ; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 1544787832369987002
 ; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 2061451396820446691

``````````

</details>


https://github.com/llvm/llvm-project/pull/142334


More information about the llvm-commits mailing list