[llvm] 7536474 - [MemProf] Add matching statistics and tracing (#94814)

via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 7 16:26:44 PDT 2024


Author: Teresa Johnson
Date: 2024-06-07T16:26:41-07:00
New Revision: 7536474ea723f84db791ce5b6c3fc0cefbc2cceb

URL: https://github.com/llvm/llvm-project/commit/7536474ea723f84db791ce5b6c3fc0cefbc2cceb
DIFF: https://github.com/llvm/llvm-project/commit/7536474ea723f84db791ce5b6c3fc0cefbc2cceb.diff

LOG: [MemProf] Add matching statistics and tracing (#94814)

To help debug or surface matching issues, add more statistics to the
matching. Also add optional emission of each context seen in the
function profiles along with its allocation type, size in bytes, and
whether it was matched. This information is emitted along with a hash of
the full stack context, to allow deduplication across modules for
allocations within header files.

Added: 
    

Modified: 
    llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
    llvm/test/Transforms/PGOProfile/memprof.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index d70c6a7a0a152..14d0981da501d 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -149,11 +149,33 @@ static cl::opt<bool> ClMemProfMatchHotColdNew(
         "Match allocation profiles onto existing hot/cold operator new calls"),
     cl::Hidden, cl::init(false));
 
+static cl::opt<bool>
+    ClPrintMemProfMatchInfo("memprof-print-match-info",
+                            cl::desc("Print matching stats for each allocation "
+                                     "context in this module's profiles"),
+                            cl::Hidden, cl::init(false));
+
+// Instrumentation statistics
 STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
 STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
 STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads");
 STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes");
+
+// Matching statistics
 STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
+STATISTIC(NumOfMemProfMismatch,
+          "Number of functions having mismatched memory profile hash.");
+STATISTIC(NumOfMemProfFunc, "Number of functions having valid memory profile.");
+STATISTIC(NumOfMemProfAllocContextProfiles,
+          "Number of alloc contexts in memory profile.");
+STATISTIC(NumOfMemProfCallSiteProfiles,
+          "Number of callsites in memory profile.");
+STATISTIC(NumOfMemProfMatchedAllocContexts,
+          "Number of matched memory profile alloc contexts.");
+STATISTIC(NumOfMemProfMatchedAllocs,
+          "Number of matched memory profile allocs.");
+STATISTIC(NumOfMemProfMatchedCallSites,
+          "Number of matched memory profile callsites.");
 
 namespace {
 
@@ -637,8 +659,22 @@ static uint64_t computeStackId(const memprof::Frame &Frame) {
   return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column);
 }
 
-static void addCallStack(CallStackTrie &AllocTrie,
-                         const AllocationInfo *AllocInfo) {
+// Helper to generate a single hash id for a given callstack, used for emitting
+// matching statistics and useful for uniquing such statistics across modules.
+static uint64_t
+computeFullStackId(const SmallVectorImpl<memprof::Frame> &CallStack) {
+  llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little>
+      HashBuilder;
+  for (auto &F : CallStack)
+    HashBuilder.add(F.Function, F.LineOffset, F.Column);
+  llvm::BLAKE3Result<8> Hash = HashBuilder.final();
+  uint64_t Id;
+  std::memcpy(&Id, Hash.data(), sizeof(Hash));
+  return Id;
+}
+
+static AllocationType addCallStack(CallStackTrie &AllocTrie,
+                                   const AllocationInfo *AllocInfo) {
   SmallVector<uint64_t> StackIds;
   for (const auto &StackFrame : AllocInfo->CallStack)
     StackIds.push_back(computeStackId(StackFrame));
@@ -646,6 +682,7 @@ static void addCallStack(CallStackTrie &AllocTrie,
                                 AllocInfo->Info.getAllocCount(),
                                 AllocInfo->Info.getTotalLifetime());
   AllocTrie.addCallStack(AllocType, StackIds);
+  return AllocType;
 }
 
 // Helper to compare the InlinedCallStack computed from an instruction's debug
@@ -701,9 +738,16 @@ static bool isNewWithHotColdVariant(Function *Callee,
   }
 }
 
-static void readMemprof(Module &M, Function &F,
-                        IndexedInstrProfReader *MemProfReader,
-                        const TargetLibraryInfo &TLI) {
+struct AllocMatchInfo {
+  uint64_t TotalSize = 0;
+  AllocationType AllocType = AllocationType::None;
+  bool Matched = false;
+};
+
+static void
+readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
+            const TargetLibraryInfo &TLI,
+            std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo) {
   auto &Ctx = M.getContext();
   // Previously we used getIRPGOFuncName() here. If F is local linkage,
   // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
@@ -727,6 +771,7 @@ static void readMemprof(Module &M, Function &F,
         SkipWarning = !PGOWarnMissing;
         LLVM_DEBUG(dbgs() << "unknown function");
       } else if (Err == instrprof_error::hash_mismatch) {
+        NumOfMemProfMismatch++;
         SkipWarning =
             NoPGOWarnMismatch ||
             (NoPGOWarnMismatchComdatWeak &&
@@ -748,6 +793,8 @@ static void readMemprof(Module &M, Function &F,
     return;
   }
 
+  NumOfMemProfFunc++;
+
   // Detect if there are non-zero column numbers in the profile. If not,
   // treat all column numbers as 0 when matching (i.e. ignore any non-zero
   // columns in the IR). The profiled binary might have been built with
@@ -762,6 +809,7 @@ static void readMemprof(Module &M, Function &F,
   std::map<uint64_t, std::set<std::pair<const std::vector<Frame> *, unsigned>>>
       LocHashToCallSites;
   for (auto &AI : MemProfRec->AllocSites) {
+    NumOfMemProfAllocContextProfiles++;
     // Associate the allocation info with the leaf frame. The later matching
     // code will match any inlined call sequences in the IR with a longer prefix
     // of call stack frames.
@@ -770,6 +818,7 @@ static void readMemprof(Module &M, Function &F,
     ProfileHasColumns |= AI.CallStack[0].Column;
   }
   for (auto &CS : MemProfRec->CallSites) {
+    NumOfMemProfCallSiteProfiles++;
     // Need to record all frames from leaf up to and including this function,
     // as any of these may or may not have been inlined at this point.
     unsigned Idx = 0;
@@ -863,13 +912,23 @@ static void readMemprof(Module &M, Function &F,
           // If we found and thus matched all frames on the call, include
           // this MIB.
           if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack,
-                                                 InlinedCallStack))
-            addCallStack(AllocTrie, AllocInfo);
+                                                 InlinedCallStack)) {
+            NumOfMemProfMatchedAllocContexts++;
+            auto AllocType = addCallStack(AllocTrie, AllocInfo);
+            // Record information about the allocation if match info printing
+            // was requested.
+            if (ClPrintMemProfMatchInfo) {
+              auto FullStackId = computeFullStackId(AllocInfo->CallStack);
+              FullStackIdToAllocMatchInfo[FullStackId] = {
+                  AllocInfo->Info.getTotalSize(), AllocType, /*Matched=*/true};
+            }
+          }
         }
         // We might not have matched any to the full inlined call stack.
         // But if we did, create and attach metadata, or a function attribute if
         // all contexts have identical profiled behavior.
         if (!AllocTrie.empty()) {
+          NumOfMemProfMatchedAllocs++;
           // MemprofMDAttached will be false if a function attribute was
           // attached.
           bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI);
@@ -897,6 +956,7 @@ static void readMemprof(Module &M, Function &F,
         // attach call stack metadata.
         if (stackFrameIncludesInlinedCallStack(
                 *CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) {
+          NumOfMemProfMatchedCallSites++;
           addCallsiteMetadata(I, InlinedCallStack, Ctx);
           // Only need to find one with a matching call stack and add a single
           // callsite metadata.
@@ -942,12 +1002,25 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
 
   auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
 
+  // Map from the stack has of each allocation context in the function profiles
+  // to the total profiled size (bytes), allocation type, and whether we matched
+  // it to an allocation in the IR.
+  std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo;
+
   for (auto &F : M) {
     if (F.isDeclaration())
       continue;
 
     const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
-    readMemprof(M, F, MemProfReader.get(), TLI);
+    readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo);
+  }
+
+  if (ClPrintMemProfMatchInfo) {
+    for (const auto &[Id, Info] : FullStackIdToAllocMatchInfo)
+      errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)
+             << " context with id " << Id << " has total profiled size "
+             << Info.TotalSize << (Info.Matched ? " is" : " not")
+             << " matched\n";
   }
 
   return PreservedAnalyses::none();

diff  --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll
index 13f370a4071e8..4a87f4f9d7449 100644
--- a/llvm/test/Transforms/PGOProfile/memprof.ll
+++ b/llvm/test/Transforms/PGOProfile/memprof.ll
@@ -5,6 +5,8 @@
 ; REQUIRES: zlib
 ;; Avoid failures on big-endian systems that can't read the profile properly
 ; REQUIRES: x86_64-linux
+;; -stats requires asserts
+; REQUIRES: asserts
 
 ;; TODO: Use text profile inputs once that is available for memprof.
 ;; # To update the Inputs below, run Inputs/update_memprof_inputs.sh.
@@ -25,7 +27,7 @@
 ; ALL-NOT: no profile data available for function
 
 ;; Using a memprof-only profile for memprof-use should only give memprof metadata
-; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,MEMPROFONLY
+; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-print-match-info -stats 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,MEMPROFONLY,MEMPROFMATCHINFO,MEMPROFSTATS
 ; There should not be any PGO metadata
 ; MEMPROFONLY-NOT: !prof
 
@@ -61,6 +63,15 @@
 ;; give both memprof and pgo metadata.
 ; RUN: opt < %s -passes='pgo-instr-use,memprof-use<profile-filename=%t.pgomemprofdata>' -pgo-test-profile-file=%t.pgomemprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,PGO
 
+; MEMPROFMATCHINFO: MemProf notcold context with id 1093248920606587996 has total profiled size 10 is matched
+; MEMPROFMATCHINFO: MemProf notcold context with id 5725971306423925017 has total profiled size 10 is matched
+; MEMPROFMATCHINFO: MemProf notcold context with id 6792096022461663180 has total profiled size 10 is matched
+; MEMPROFMATCHINFO: MemProf cold context with id 8525406123785421946 has total profiled size 10 is matched
+; MEMPROFMATCHINFO: MemProf cold context with id 11714230664165068698 has total profiled size 10 is matched
+; MEMPROFMATCHINFO: MemProf cold context with id 15737101490731057601 has total profiled size 10 is matched
+; MEMPROFMATCHINFO: MemProf cold context with id 16342802530253093571 has total profiled size 10 is matched
+; MEMPROFMATCHINFO: MemProf cold context with id 18254812774972004394 has total profiled size 10 is matched
+
 ; ModuleID = 'memprof.cc'
 source_filename = "memprof.cc"
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
@@ -346,6 +357,14 @@ for.end:                                          ; preds = %for.cond
 ; MEMPROFNOCOLINFO: ![[C10]] = !{i64 -4535090212904553409}
 ; MEMPROFNOCOLINFO: ![[C11]] = !{i64 3577763375057267810}
 
+; MEMPROFSTATS:  8 memprof - Number of alloc contexts in memory profile.
+; MEMPROFSTATS: 10 memprof - Number of callsites in memory profile.
+; MEMPROFSTATS:  6 memprof - Number of functions having valid memory profile.
+; MEMPROFSTATS:  8 memprof - Number of matched memory profile alloc contexts.
+; MEMPROFSTATS:  3 memprof - Number of matched memory profile allocs.
+; MEMPROFSTATS: 10 memprof - Number of matched memory profile callsites.
+
+
 ; Function Attrs: argmemonly nofree nounwind willreturn writeonly
 declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #3
 


        


More information about the llvm-commits mailing list