[llvm] [memprof] Deduplicate alloc site matches (PR #142334)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 1 22:57:48 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-pgo
Author: Kazu Hirata (kazutakahirata)
<details>
<summary>Changes</summary>
With:
commit 2425626d803002027cbf71c39df80cb7b56db0fb
Author: Kazu Hirata <kazu@<!-- -->google.com>
Date: Sun Jun 1 08:09:58 2025 -0700
we print out a lot of duplicate alloc site matches.
This patch partially reverts the patch above. The core idea of using
a map to deduplicate entries remains the same, but details are
different. Specifically:
- This PR uses the [FullStackID, MatchLength] as the key, where
MatchLength is the length of an alloc site match.
- AllocMatchInfo in this PR no longer has Matched because we always
report matches.
- AllocMatchInfo in this PR no longer has NumFramesMatched because it
has become part of the key.
This deduplication roughly halves the amount of messages printed out.
---
Full diff: https://github.com/llvm/llvm-project/pull/142334.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Instrumentation/MemProfiler.cpp (+25-7)
- (modified) llvm/test/Transforms/PGOProfile/memprof.ll (+4-4)
``````````diff
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index a64dfc02f6bf3..9075c2663b108 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -816,6 +816,11 @@ static bool isAllocationWithHotColdVariant(const Function *Callee,
}
}
+struct AllocMatchInfo {
+ uint64_t TotalSize = 0;
+ AllocationType AllocType = AllocationType::None;
+};
+
DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>>
memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI,
function_ref<bool(uint64_t)> IsPresentInProfile) {
@@ -994,6 +999,8 @@ static void addVPMetadata(Module &M, Instruction &I,
static void readMemprof(Module &M, Function &F,
IndexedInstrProfReader *MemProfReader,
const TargetLibraryInfo &TLI,
+ std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>
+ &FullStackIdToAllocMatchInfo,
std::set<std::vector<uint64_t>> &MatchedCallSites,
DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
OptimizationRemarkEmitter &ORE) {
@@ -1206,11 +1213,9 @@ static void readMemprof(Module &M, Function &F,
// was requested.
if (ClPrintMemProfMatchInfo) {
assert(FullStackId != 0);
- errs() << "MemProf " << getAllocTypeAttributeString(AllocType)
- << " context with id " << FullStackId
- << " has total profiled size "
- << AllocInfo->Info.getTotalSize() << " is matched with "
- << InlinedCallStack.size() << " frames\n";
+ FullStackIdToAllocMatchInfo[std::make_pair(
+ FullStackId, InlinedCallStack.size())] = {
+ AllocInfo->Info.getTotalSize(), AllocType};
}
}
}
@@ -1325,6 +1330,12 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
if (SalvageStaleProfile)
UndriftMaps = computeUndriftMap(M, MemProfReader.get(), TLI);
+ // Map from the stack has of each allocation context in the function profiles
+ // to the total profiled size (bytes), allocation type, and whether we matched
+ // it to an allocation in the IR.
+ std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>
+ FullStackIdToAllocMatchInfo;
+
// Set of the matched call sites, each expressed as a sequence of an inline
// call stack.
std::set<std::vector<uint64_t>> MatchedCallSites;
@@ -1335,11 +1346,18 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
- readMemprof(M, F, MemProfReader.get(), TLI, MatchedCallSites, UndriftMaps,
- ORE);
+ readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo,
+ MatchedCallSites, UndriftMaps, ORE);
}
if (ClPrintMemProfMatchInfo) {
+ for (const auto &[IdLengthPair, Info] : FullStackIdToAllocMatchInfo) {
+ auto [Id, Length] = IdLengthPair;
+ errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
+ << " context with id " << Id << " has total profiled size "
+ << Info.TotalSize << " is matched with " << Length << " frames\n";
+ }
+
for (const auto &CallStack : MatchedCallSites) {
errs() << "MemProf callsite match for inline call stack";
for (uint64_t StackId : CallStack)
diff --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll
index e48da36a6d97c..c69d0311e0388 100644
--- a/llvm/test/Transforms/PGOProfile/memprof.ll
+++ b/llvm/test/Transforms/PGOProfile/memprof.ll
@@ -111,13 +111,13 @@
; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-min-ave-lifetime-access-density-hot-threshold=0 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL
; MEMPROFMATCHINFO: MemProf notcold context with id 1093248920606587996 has total profiled size 10 is matched with 1 frames
-; MEMPROFMATCHINFO: MemProf cold context with id 8525406123785421946 has total profiled size 10 is matched with 1 frames
-; MEMPROFMATCHINFO: MemProf cold context with id 16342802530253093571 has total profiled size 10 is matched with 1 frames
-; MEMPROFMATCHINFO: MemProf cold context with id 18254812774972004394 has total profiled size 10 is matched with 1 frames
-; MEMPROFMATCHINFO: MemProf cold context with id 11714230664165068698 has total profiled size 10 is matched with 1 frames
; MEMPROFMATCHINFO: MemProf notcold context with id 5725971306423925017 has total profiled size 10 is matched with 1 frames
; MEMPROFMATCHINFO: MemProf notcold context with id 6792096022461663180 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf cold context with id 8525406123785421946 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf cold context with id 11714230664165068698 has total profiled size 10 is matched with 1 frames
; MEMPROFMATCHINFO: MemProf cold context with id 15737101490731057601 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf cold context with id 16342802530253093571 has total profiled size 10 is matched with 1 frames
+; MEMPROFMATCHINFO: MemProf cold context with id 18254812774972004394 has total profiled size 10 is matched with 1 frames
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 748269490701775343
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 1544787832369987002
; MEMPROFMATCHINFO: MemProf callsite match for inline call stack 2061451396820446691
``````````
</details>
https://github.com/llvm/llvm-project/pull/142334
More information about the llvm-commits
mailing list