[llvm] 295a01f - [MemProf] Fix reporting with -memprof-matching-cold-threshold (#173327)

via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 26 12:48:58 PST 2025


Author: Teresa Johnson
Date: 2025-12-26T12:48:53-08:00
New Revision: 295a01f36c7ad0f9cacd10e6627993e06d8376e9

URL: https://github.com/llvm/llvm-project/commit/295a01f36c7ad0f9cacd10e6627993e06d8376e9
DIFF: https://github.com/llvm/llvm-project/commit/295a01f36c7ad0f9cacd10e6627993e06d8376e9.diff

LOG: [MemProf] Fix reporting with -memprof-matching-cold-threshold (#173327)

With the -memprof-matching-cold-threshold option, we hint as cold
allocations where the fraction of cold bytes is at least the given
threshold. However, we were incorrectly reporting all of the
allocation's contexts and bytes as hinted cold.

Fix this to report the non-cold contexts as ignored. To do this,
refactor out some existing reporting, and also keep track of the
original allocation type for each context in the Trie along with its
ContextTotalSize information. Most of the changes are the change to this
array's type and name.

Added: 
    

Modified: 
    llvm/include/llvm/Analysis/MemoryProfileInfo.h
    llvm/lib/Analysis/MemoryProfileInfo.cpp
    llvm/test/Transforms/PGOProfile/memprof.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h
index ba4010bd8f50c..cdd8ce0c66771 100644
--- a/llvm/include/llvm/Analysis/MemoryProfileInfo.h
+++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h
@@ -61,6 +61,15 @@ LLVM_ABI void removeAnyExistingAmbiguousAttribute(CallBase *CB);
 /// profile but that we haven't yet been able to disambiguate.
 LLVM_ABI void addAmbiguousAttribute(CallBase *CB);
 
+// During matching we also keep the AllocationType along with the
+// ContextTotalSize in the Trie for the most accurate reporting when we decide
+// to hint unambiguously where there is a dominant type. We don't put the
+// AllocationType in the ContextTotalSize struct as it isn't needed there
+// during the LTO step, because due to context trimming a summarized
+// context with its allocation type can correspond to multiple context/size
+// pairs. Here the redundancy is a short-lived convenience.
+using ContextSizeTypePair = std::pair<ContextTotalSize, AllocationType>;
+
 /// Class to build a trie of call stack contexts for a particular profiled
 /// allocation call, along with their associated allocation types.
 /// The allocation will be at the root of the trie, which is then used to
@@ -75,8 +84,9 @@ class CallStackTrie {
     // If the user has requested reporting of hinted sizes, keep track of the
     // associated full stack id and profiled sizes. Can have more than one
     // after trimming (e.g. when building from metadata). This is only placed on
-    // the last (root-most) trie node for each allocation context.
-    std::vector<ContextTotalSize> ContextSizeInfo;
+    // the last (root-most) trie node for each allocation context. Also
+    // track the original allocation type of the context.
+    std::vector<ContextSizeTypePair> ContextInfo;
     // Map of caller stack id to the corresponding child Trie node.
     std::map<uint64_t, CallStackTrieNode *> Callers;
     CallStackTrieNode(AllocationType Type)
@@ -118,10 +128,11 @@ class CallStackTrie {
     delete Node;
   }
 
-  // Recursively build up a complete list of context size information from the
-  // trie nodes reached form the given Node, for hint size reporting.
-  void collectContextSizeInfo(CallStackTrieNode *Node,
-                              std::vector<ContextTotalSize> &ContextSizeInfo);
+  // Recursively build up a complete list of context information from the
+  // trie nodes reached form the given Node, including each context's
+  // ContextTotalSize and AllocationType, for hint size reporting.
+  void collectContextInfo(CallStackTrieNode *Node,
+                          std::vector<ContextSizeTypePair> &ContextInfo);
 
   // Recursively convert hot allocation types to notcold, since we don't
   // actually do any cloning for hot contexts, to facilitate more aggressive

diff  --git a/llvm/lib/Analysis/MemoryProfileInfo.cpp b/llvm/lib/Analysis/MemoryProfileInfo.cpp
index b09f4ed78ca7e..fb22a098c60fb 100644
--- a/llvm/lib/Analysis/MemoryProfileInfo.cpp
+++ b/llvm/lib/Analysis/MemoryProfileInfo.cpp
@@ -181,7 +181,13 @@ void CallStackTrie::addCallStack(
     Curr = New;
   }
   assert(Curr);
-  llvm::append_range(Curr->ContextSizeInfo, ContextSizeInfo);
+  // Append all of the ContextSizeInfo, along with their original AllocType.
+  llvm::append_range(Curr->ContextInfo,
+                     llvm::map_range(ContextSizeInfo,
+                                     [AllocType](const ContextTotalSize &CTS) {
+                                       return ContextSizeTypePair(CTS,
+                                                                  AllocType);
+                                     }));
 }
 
 void CallStackTrie::addCallStack(MDNode *MIB) {
@@ -216,7 +222,7 @@ void CallStackTrie::addCallStack(MDNode *MIB) {
 
 static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef<uint64_t> MIBCallStack,
                              AllocationType AllocType,
-                             ArrayRef<ContextTotalSize> ContextSizeInfo,
+                             ArrayRef<ContextSizeTypePair> ContextInfo,
                              const uint64_t MaxColdSize,
                              bool BuiltFromExistingMetadata,
                              uint64_t &TotalBytes, uint64_t &ColdBytes) {
@@ -225,7 +231,7 @@ static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef<uint64_t> MIBCallStack,
   MIBPayload.push_back(
       MDString::get(Ctx, getAllocTypeAttributeString(AllocType)));
 
-  if (ContextSizeInfo.empty()) {
+  if (ContextInfo.empty()) {
     // The profile matcher should have provided context size info if there was a
     // MinCallsiteColdBytePercent < 100. Here we check >=100 to gracefully
     // handle a user-provided percent larger than 100. However, we may not have
@@ -234,7 +240,8 @@ static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef<uint64_t> MIBCallStack,
     return MDNode::get(Ctx, MIBPayload);
   }
 
-  for (const auto &[FullStackId, TotalSize] : ContextSizeInfo) {
+  for (const auto &[CSI, AT] : ContextInfo) {
+    const auto &[FullStackId, TotalSize] = CSI;
     TotalBytes += TotalSize;
     bool LargeColdContext = false;
     if (AllocType == AllocationType::Cold) {
@@ -267,11 +274,11 @@ static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef<uint64_t> MIBCallStack,
   return MDNode::get(Ctx, MIBPayload);
 }
 
-void CallStackTrie::collectContextSizeInfo(
-    CallStackTrieNode *Node, std::vector<ContextTotalSize> &ContextSizeInfo) {
-  llvm::append_range(ContextSizeInfo, Node->ContextSizeInfo);
+void CallStackTrie::collectContextInfo(
+    CallStackTrieNode *Node, std::vector<ContextSizeTypePair> &ContextInfo) {
+  llvm::append_range(ContextInfo, Node->ContextInfo);
   for (auto &Caller : Node->Callers)
-    collectContextSizeInfo(Caller.second, ContextSizeInfo);
+    collectContextInfo(Caller.second, ContextInfo);
 }
 
 void CallStackTrie::convertHotToNotCold(CallStackTrieNode *Node) {
@@ -283,6 +290,17 @@ void CallStackTrie::convertHotToNotCold(CallStackTrieNode *Node) {
     convertHotToNotCold(Caller.second);
 }
 
+// Helper to emit messages for non-cold contexts that are ignored for various
+// reasons when reporting of hinted bytes is enabled.
+static void emitIgnoredNonColdContextMessage(StringRef Tag,
+                                             uint64_t FullStackId,
+                                             StringRef Extra,
+                                             uint64_t TotalSize) {
+  errs() << "MemProf hinting: Total size for " << Tag
+         << " non-cold full allocation context hash " << FullStackId << Extra
+         << ": " << TotalSize << "\n";
+}
+
 // Copy over some or all of NewMIBNodes to the SavedMIBNodes vector, depending
 // on options that enable filtering out some NotCold contexts.
 static void saveFilteredNewMIBNodes(std::vector<Metadata *> &NewMIBNodes,
@@ -321,9 +339,7 @@ static void saveFilteredNewMIBNodes(std::vector<Metadata *> &NewMIBNodes,
       uint64_t TS =
           mdconst::dyn_extract<ConstantInt>(ContextSizePair->getOperand(1))
               ->getZExtValue();
-      errs() << "MemProf hinting: Total size for " << Tag
-             << " non-cold full allocation context hash " << FullStackId
-             << Extra << ": " << TS << "\n";
+      emitIgnoredNonColdContextMessage(Tag, FullStackId, Extra, TS);
     }
   };
 
@@ -430,10 +446,10 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
   // Trim context below the first node in a prefix with a single alloc type.
   // Add an MIB record for the current call stack prefix.
   if (hasSingleAllocType(Node->AllocTypes)) {
-    std::vector<ContextTotalSize> ContextSizeInfo;
-    collectContextSizeInfo(Node, ContextSizeInfo);
+    std::vector<ContextSizeTypePair> ContextInfo;
+    collectContextInfo(Node, ContextInfo);
     MIBNodes.push_back(createMIBNode(
-        Ctx, MIBCallStack, (AllocationType)Node->AllocTypes, ContextSizeInfo,
+        Ctx, MIBCallStack, (AllocationType)Node->AllocTypes, ContextInfo,
         MaxColdSize, BuiltFromExistingMetadata, TotalBytes, ColdBytes));
     return true;
   }
@@ -486,10 +502,10 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
   // non-cold allocation type.
   if (!CalleeHasAmbiguousCallerContext)
     return false;
-  std::vector<ContextTotalSize> ContextSizeInfo;
-  collectContextSizeInfo(Node, ContextSizeInfo);
+  std::vector<ContextSizeTypePair> ContextInfo;
+  collectContextInfo(Node, ContextInfo);
   MIBNodes.push_back(createMIBNode(
-      Ctx, MIBCallStack, AllocationType::NotCold, ContextSizeInfo, MaxColdSize,
+      Ctx, MIBCallStack, AllocationType::NotCold, ContextInfo, MaxColdSize,
       BuiltFromExistingMetadata, TotalBytes, ColdBytes));
   return true;
 }
@@ -503,9 +519,16 @@ void CallStackTrie::addSingleAllocTypeAttribute(CallBase *CI, AllocationType AT,
   removeAnyExistingAmbiguousAttribute(CI);
   CI->addFnAttr(A);
   if (MemProfReportHintedSizes) {
-    std::vector<ContextTotalSize> ContextSizeInfo;
-    collectContextSizeInfo(Alloc, ContextSizeInfo);
-    for (const auto &[FullStackId, TotalSize] : ContextSizeInfo) {
+    std::vector<ContextSizeTypePair> ContextInfo;
+    collectContextInfo(Alloc, ContextInfo);
+    for (const auto &[CSI, OrigAT] : ContextInfo) {
+      const auto &[FullStackId, TotalSize] = CSI;
+      // If the original alloc type is not the one being applied as the hint,
+      // report that we ignored this context.
+      if (AT != OrigAT) {
+        emitIgnoredNonColdContextMessage("ignored", FullStackId, "", TotalSize);
+        continue;
+      }
       errs() << "MemProf hinting: Total size for full allocation context hash "
              << FullStackId << " and " << Descriptor << " alloc type "
              << getAllocTypeAttributeString(AT) << ": " << TotalSize << "\n";

diff  --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll
index f6a89a8ceb86a..a1f0f1d403c8f 100644
--- a/llvm/test/Transforms/PGOProfile/memprof.ll
+++ b/llvm/test/Transforms/PGOProfile/memprof.ll
@@ -400,10 +400,10 @@ for.end:                                          ; preds = %for.cond
 ;; with the full allocation context hash, type, and size in bytes.
 ; TOTALSIZESTHRESH60: Total size for full allocation context hash 8525406123785421946 and dominant alloc type cold: 10
 ; TOTALSIZESTHRESH60: Total size for full allocation context hash 11714230664165068698 and dominant alloc type cold: 10
-; TOTALSIZESTHRESH60: Total size for full allocation context hash 5725971306423925017 and dominant alloc type cold: 10
+; TOTALSIZESTHRESH60: Total size for ignored non-cold full allocation context hash 5725971306423925017: 10
 ; TOTALSIZESTHRESH60: Total size for full allocation context hash 16342802530253093571 and dominant alloc type cold: 10
 ; TOTALSIZESTHRESH60: Total size for full allocation context hash 18254812774972004394 and dominant alloc type cold: 10
-; TOTALSIZESTHRESH60: Total size for full allocation context hash 1093248920606587996 and dominant alloc type cold: 10
+; TOTALSIZESTHRESH60: Total size for ignored non-cold full allocation context hash 1093248920606587996: 10
 ; TOTALSIZESSINGLE: Total size for full allocation context hash 6792096022461663180 and single alloc type notcold: 10
 ; REMARKSINGLE: remark: memprof.cc:25:13: call in function main marked with memprof allocation attribute notcold
 ; TOTALSIZESSINGLE: Total size for full allocation context hash 15737101490731057601 and single alloc type cold: 10


        


More information about the llvm-commits mailing list