[llvm] [MemProf] Print full context hash when reporting hinted bytes (PR #114465)

Teresa Johnson via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 31 14:13:06 PDT 2024


https://github.com/teresajohnson updated https://github.com/llvm/llvm-project/pull/114465

>From 11d13e4dd4d6267b9f0e2d1d69784599d0e3704b Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Thu, 31 Oct 2024 12:51:27 -0700
Subject: [PATCH 1/2] [MemProf] Print full context hash when reporting hinted
 bytes

Improve the information printed when -memprof-report-hinted-sizes is
enabled. Now print the full context hash computed from the original
profile, similar to what we do when reporting matching statistics. This
will make it easier to correlate with the profile.

Note that the full context hash must be computed at profile match time
and saved in the metadata and summary, because we may trim the context
during matching when it isn't needed for distinguishing hotness.
Similarly, due to the context trimming, we may have more than one full
context id and total size pair per MIB in the metadata and summary,
which now get a list of these pairs.

Remove the old aggregate size from the metadata and summary support.
One other change from the prior support is that we no longer write the
size information into the combined index for the LTO backends, which
don't use this information, which reduces unnecessary bloat in
distributed index files.
---
 .../include/llvm/Analysis/MemoryProfileInfo.h | 27 ++++--
 llvm/include/llvm/Bitcode/LLVMBitCodes.h      | 10 ++-
 llvm/include/llvm/IR/ModuleSummaryIndex.h     | 76 ++++++++++++++--
 llvm/lib/Analysis/MemoryProfileInfo.cpp       | 90 +++++++++++++------
 llvm/lib/Analysis/ModuleSummaryAnalysis.cpp   | 33 +++++--
 llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp   |  1 +
 llvm/lib/Bitcode/Reader/BitcodeReader.cpp     | 55 +++++++-----
 llvm/lib/Bitcode/Writer/BitcodeWriter.cpp     | 40 +++++++--
 llvm/lib/IR/Verifier.cpp                      | 37 ++++++--
 .../IPO/MemProfContextDisambiguation.cpp      | 66 +++++++++-----
 .../Instrumentation/MemProfiler.cpp           | 18 ++--
 .../ThinLTO/X86/memprof-aliased-location1.ll  |  4 +-
 .../ThinLTO/X86/memprof-aliased-location2.ll  |  4 +-
 llvm/test/ThinLTO/X86/memprof-basic.ll        | 12 ++-
 .../aliased-location1.ll                      |  4 +-
 .../aliased-location2.ll                      |  4 +-
 .../MemProfContextDisambiguation/basic.ll     | 12 ++-
 llvm/test/Transforms/PGOProfile/memprof.ll    | 29 +++---
 llvm/test/Verifier/memprof-metadata-bad.ll    |  2 +-
 19 files changed, 378 insertions(+), 146 deletions(-)

diff --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h
index edbce706953d18..55889c841b283e 100644
--- a/llvm/include/llvm/Analysis/MemoryProfileInfo.h
+++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h
@@ -28,16 +28,17 @@ AllocationType getAllocType(uint64_t TotalLifetimeAccessDensity,
 /// the resulting metadata node.
 MDNode *buildCallstackMetadata(ArrayRef<uint64_t> CallStack, LLVMContext &Ctx);
 
+/// Build metadata from the provided list of full stack id and profiled size, to
+/// use when reporting of hinted sizes is enabled.
+MDNode *buildContextSizeMetadata(ArrayRef<ContextTotalSize> ContextSizeInfo,
+                                 LLVMContext &Ctx);
+
 /// Returns the stack node from an MIB metadata node.
 MDNode *getMIBStackNode(const MDNode *MIB);
 
 /// Returns the allocation type from an MIB metadata node.
 AllocationType getMIBAllocType(const MDNode *MIB);
 
-/// Returns the total size from an MIB metadata node, or 0 if it was not
-/// recorded.
-uint64_t getMIBTotalSize(const MDNode *MIB);
-
 /// Returns the string to use in attributes with the given type.
 std::string getAllocTypeAttributeString(AllocationType Type);
 
@@ -55,11 +56,15 @@ class CallStackTrie {
     // Allocation types for call context sharing the context prefix at this
     // node.
     uint8_t AllocTypes;
-    uint64_t TotalSize;
+    // If the user has requested reporting of hinted sizes, keep track of the
+    // associated full stack id and profiled sizes. Can have more than one
+    // after trimming (e.g. when building from metadata). This is only placed on
+    // the last (root-most) trie node for each allocation context.
+    std::vector<ContextTotalSize> ContextSizeInfo;
     // Map of caller stack id to the corresponding child Trie node.
     std::map<uint64_t, CallStackTrieNode *> Callers;
-    CallStackTrieNode(AllocationType Type, uint64_t TotalSize)
-        : AllocTypes(static_cast<uint8_t>(Type)), TotalSize(TotalSize) {}
+    CallStackTrieNode(AllocationType Type)
+        : AllocTypes(static_cast<uint8_t>(Type)) {}
   };
 
   // The node for the allocation at the root.
@@ -75,6 +80,11 @@ class CallStackTrie {
     delete Node;
   }
 
+  // Recursively build up a complete list of context size information from the
+  // trie nodes reached form the given Node, for hint size reporting.
+  void collectContextSizeInfo(CallStackTrieNode *Node,
+                              std::vector<ContextTotalSize> &ContextSizeInfo);
+
   // Recursive helper to trim contexts and create metadata nodes.
   bool buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
                      std::vector<uint64_t> &MIBCallStack,
@@ -93,7 +103,8 @@ class CallStackTrie {
   /// allocation call down to the bottom of the call stack (i.e. callee to
   /// caller order).
   void addCallStack(AllocationType AllocType, ArrayRef<uint64_t> StackIds,
-                    uint64_t TotalSize = 0);
+                    std::vector<ContextTotalSize> ContextSizeInfo =
+                        std::vector<ContextTotalSize>());
 
   /// Add the call stack context along with its allocation type from the MIB
   /// metadata to the Trie.
diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index 41a6447356c23b..130c92b28b3d5e 100644
--- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -308,7 +308,7 @@ enum GlobalValueSummarySymtabCodes {
   FS_PERMODULE_CALLSITE_INFO = 26,
   // Summary of per-module allocation memprof metadata.
   // [nummib, nummib x (alloc type, numstackids, numstackids x stackidindex),
-  // [nummib x total size]?]
+  // [nummib x (numcontext x contextsizeindex)]?]
   FS_PERMODULE_ALLOC_INFO = 27,
   // Summary of combined index memprof callsite metadata.
   // [valueid, numstackindices, numver,
@@ -317,9 +317,15 @@ enum GlobalValueSummarySymtabCodes {
   // Summary of combined index allocation memprof metadata.
   // [nummib, numver,
   //  nummib x (alloc type, numstackids, numstackids x stackidindex),
-  //  numver x version, [nummib x total size]?]
+  //  numver x version]
   FS_COMBINED_ALLOC_INFO = 29,
+  // List of all stack ids referenced by index in the callsite and alloc infos.
+  // [n x stack id]
   FS_STACK_IDS = 30,
+  // List of all (full stack id, total size) pairs optionally referenced by
+  // index from the alloc info records.
+  // [n x (full stack id, total size)]
+  FS_CONTEXT_SIZE_INFOS = 31,
 };
 
 enum MetadataCodes {
diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h
index 1cfe7c15f97dbc..ccb6c8473f23ee 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -302,6 +302,14 @@ template <> struct DenseMapInfo<ValueInfo> {
   static unsigned getHashValue(ValueInfo I) { return (uintptr_t)I.getRef(); }
 };
 
+// For optional hinted size reporting, holds a pair of the full stack id
+// (pre-trimming, from the full context in the profile), and the associated
+// total profiled size.
+struct ContextTotalSize {
+  uint64_t FullStackId;
+  uint64_t TotalSize;
+};
+
 /// Summary of memprof callsite metadata.
 struct CallsiteInfo {
   // Actual callee function.
@@ -408,9 +416,15 @@ struct AllocInfo {
   // Vector of MIBs in this memprof metadata.
   std::vector<MIBInfo> MIBs;
 
-  // If requested, keep track of total profiled sizes for each MIB. This will be
-  // a vector of the same length and order as the MIBs vector, if non-empty.
-  std::vector<uint64_t> TotalSizes;
+  // If requested, keep track of full stack contexts and total profiled sizes
+  // for each MIB. This will be a vector of the same length and order as the
+  // MIBs vector, if non-empty. Note that each MIB in the summary can have
+  // multiple of these as we trim the contexts when possible during matching.
+  // For hinted size reporting we, however, want the original pre-trimmed full
+  // stack context id for better correlation with the profile. Note that these
+  // are indexes into the ContextSizeInfos list in the index, to enable
+  // deduplication.
+  std::vector<std::vector<unsigned>> ContextSizeInfoIndices;
 
   AllocInfo(std::vector<MIBInfo> MIBs) : MIBs(std::move(MIBs)) {
     Versions.push_back(0);
@@ -432,14 +446,21 @@ inline raw_ostream &operator<<(raw_ostream &OS, const AllocInfo &AE) {
   for (auto &M : AE.MIBs) {
     OS << "\t\t" << M << "\n";
   }
-  if (!AE.TotalSizes.empty()) {
-    OS << " TotalSizes per MIB:\n\t\t";
+  if (!AE.ContextSizeInfoIndices.empty()) {
+    OS << " ContextSizeInfo index per MIB:\n\t\t";
     First = true;
-    for (uint64_t TS : AE.TotalSizes) {
+    for (auto Indices : AE.ContextSizeInfoIndices) {
       if (!First)
         OS << ", ";
       First = false;
-      OS << TS << "\n";
+      bool FirstIndex = true;
+      for (uint64_t Index : Indices) {
+        if (!FirstIndex)
+          OS << ", ";
+        FirstIndex = false;
+        OS << Index;
+      }
+      OS << "\n";
     }
   }
   return OS;
@@ -1426,6 +1447,19 @@ class ModuleSummaryIndex {
   // built via releaseTemporaryMemory.
   DenseMap<uint64_t, unsigned> StackIdToIndex;
 
+  // List of unique ContextTotalSize structs (pair of the full stack id hash and
+  // its associated total profiled size). We use an index into this vector when
+  // referencing from the alloc summary to reduce the overall memory and size
+  // requirements, since often allocations may be duplicated due to inlining.
+  std::vector<ContextTotalSize> ContextSizeInfos;
+
+  // Temporary map while building the ContextSizeInfos list. Clear when index is
+  // completely built via releaseTemporaryMemory.
+  // Maps from full stack id to a map of total size to the assigned index.
+  // We need size in here too because due to stack truncation in the profile we
+  // can have the same full stack id and different sizes.
+  DenseMap<uint64_t, DenseMap<uint64_t, unsigned>> ContextToTotalSizeAndIndex;
+
   // YAML I/O support.
   friend yaml::MappingTraits<ModuleSummaryIndex>;
 
@@ -1470,6 +1504,9 @@ class ModuleSummaryIndex {
   size_t size() const { return GlobalValueMap.size(); }
 
   const std::vector<uint64_t> &stackIds() const { return StackIds; }
+  const std::vector<ContextTotalSize> &contextSizeInfos() const {
+    return ContextSizeInfos;
+  }
 
   unsigned addOrGetStackIdIndex(uint64_t StackId) {
     auto Inserted = StackIdToIndex.insert({StackId, StackIds.size()});
@@ -1483,15 +1520,36 @@ class ModuleSummaryIndex {
     return StackIds[Index];
   }
 
+  unsigned addOrGetContextSizeIndex(ContextTotalSize ContextSizeInfo) {
+    auto &Entry = ContextToTotalSizeAndIndex[ContextSizeInfo.FullStackId];
+    auto Inserted =
+        Entry.insert({ContextSizeInfo.TotalSize, ContextSizeInfos.size()});
+    if (Inserted.second)
+      ContextSizeInfos.push_back(
+          {ContextSizeInfo.FullStackId, ContextSizeInfo.TotalSize});
+    else
+      assert(Inserted.first->first == ContextSizeInfo.TotalSize);
+    return Inserted.first->second;
+  }
+
+  ContextTotalSize getContextSizeInfoAtIndex(unsigned Index) const {
+    assert(ContextSizeInfos.size() > Index);
+    return ContextSizeInfos[Index];
+  }
+
   // Facility to release memory from data structures only needed during index
-  // construction (including while building combined index). Currently this only
+  // construction (including while building combined index). Currently this
   // releases the temporary map used while constructing a correspondence between
-  // stack ids and their index in the StackIds vector. Mostly impactful when
+  // stack ids and their index in the StackIds vector, and a similar map used
+  // while constructing a the ContextSizeInfos vector. Mostly impactful when
   // building a large combined index.
   void releaseTemporaryMemory() {
     assert(StackIdToIndex.size() == StackIds.size());
     StackIdToIndex.clear();
     StackIds.shrink_to_fit();
+    assert(ContextToTotalSizeAndIndex.size() == ContextSizeInfos.size());
+    ContextToTotalSizeAndIndex.clear();
+    ContextSizeInfos.shrink_to_fit();
   }
 
   /// Convenience function for doing a DFS on a ValueInfo. Marks the function in
diff --git a/llvm/lib/Analysis/MemoryProfileInfo.cpp b/llvm/lib/Analysis/MemoryProfileInfo.cpp
index 2b49dce17b7931..885f2e4d040143 100644
--- a/llvm/lib/Analysis/MemoryProfileInfo.cpp
+++ b/llvm/lib/Analysis/MemoryProfileInfo.cpp
@@ -99,12 +99,6 @@ AllocationType llvm::memprof::getMIBAllocType(const MDNode *MIB) {
   return AllocationType::NotCold;
 }
 
-uint64_t llvm::memprof::getMIBTotalSize(const MDNode *MIB) {
-  if (MIB->getNumOperands() < 3)
-    return 0;
-  return mdconst::dyn_extract<ConstantInt>(MIB->getOperand(2))->getZExtValue();
-}
-
 std::string llvm::memprof::getAllocTypeAttributeString(AllocationType Type) {
   switch (Type) {
   case AllocationType::NotCold:
@@ -135,22 +129,22 @@ bool llvm::memprof::hasSingleAllocType(uint8_t AllocTypes) {
   return NumAllocTypes == 1;
 }
 
-void CallStackTrie::addCallStack(AllocationType AllocType,
-                                 ArrayRef<uint64_t> StackIds,
-                                 uint64_t TotalSize) {
+void CallStackTrie::addCallStack(
+    AllocationType AllocType, ArrayRef<uint64_t> StackIds,
+    std::vector<ContextTotalSize> ContextSizeInfo) {
   bool First = true;
   CallStackTrieNode *Curr = nullptr;
   for (auto StackId : StackIds) {
-    // If this is the first stack frame, add or update alloc node.
+    // errs() << StackId << " ";
+    //  If this is the first stack frame, add or update alloc node.
     if (First) {
       First = false;
       if (Alloc) {
         assert(AllocStackId == StackId);
         Alloc->AllocTypes |= static_cast<uint8_t>(AllocType);
-        Alloc->TotalSize += TotalSize;
       } else {
         AllocStackId = StackId;
-        Alloc = new CallStackTrieNode(AllocType, TotalSize);
+        Alloc = new CallStackTrieNode(AllocType);
       }
       Curr = Alloc;
       continue;
@@ -160,15 +154,18 @@ void CallStackTrie::addCallStack(AllocationType AllocType,
     if (Next != Curr->Callers.end()) {
       Curr = Next->second;
       Curr->AllocTypes |= static_cast<uint8_t>(AllocType);
-      Curr->TotalSize += TotalSize;
       continue;
     }
     // Otherwise add a new caller node.
-    auto *New = new CallStackTrieNode(AllocType, TotalSize);
+    auto *New = new CallStackTrieNode(AllocType);
     Curr->Callers[StackId] = New;
     Curr = New;
   }
   assert(Curr);
+  Curr->ContextSizeInfo.insert(Curr->ContextSizeInfo.end(),
+                               ContextSizeInfo.begin(), ContextSizeInfo.end());
+  std::vector<ContextTotalSize> AllContextSizeInfo;
+  collectContextSizeInfo(Curr, AllContextSizeInfo);
 }
 
 void CallStackTrie::addCallStack(MDNode *MIB) {
@@ -181,21 +178,55 @@ void CallStackTrie::addCallStack(MDNode *MIB) {
     assert(StackId);
     CallStack.push_back(StackId->getZExtValue());
   }
-  addCallStack(getMIBAllocType(MIB), CallStack, getMIBTotalSize(MIB));
+  std::vector<ContextTotalSize> ContextSizeInfo;
+  // Collect the context size information if it exists.
+  if (MIB->getNumOperands() > 2) {
+    for (unsigned I = 2; I < MIB->getNumOperands(); I++) {
+      MDNode *ContextSizePair = dyn_cast<MDNode>(MIB->getOperand(I));
+      assert(ContextSizePair->getNumOperands() == 2);
+      uint64_t FullStackId =
+          mdconst::dyn_extract<ConstantInt>(ContextSizePair->getOperand(0))
+              ->getZExtValue();
+      uint64_t TotalSize =
+          mdconst::dyn_extract<ConstantInt>(ContextSizePair->getOperand(1))
+              ->getZExtValue();
+      ContextSizeInfo.push_back({FullStackId, TotalSize});
+    }
+  }
+  addCallStack(getMIBAllocType(MIB), CallStack, std::move(ContextSizeInfo));
 }
 
 static MDNode *createMIBNode(LLVMContext &Ctx, ArrayRef<uint64_t> MIBCallStack,
-                             AllocationType AllocType, uint64_t TotalSize) {
+                             AllocationType AllocType,
+                             ArrayRef<ContextTotalSize> ContextSizeInfo) {
   SmallVector<Metadata *> MIBPayload(
       {buildCallstackMetadata(MIBCallStack, Ctx)});
   MIBPayload.push_back(
       MDString::get(Ctx, getAllocTypeAttributeString(AllocType)));
-  if (TotalSize)
-    MIBPayload.push_back(ValueAsMetadata::get(
-        ConstantInt::get(Type::getInt64Ty(Ctx), TotalSize)));
+  if (!ContextSizeInfo.empty()) {
+    for (auto Info : ContextSizeInfo) {
+      auto *FullStackIdMD = ValueAsMetadata::get(
+          ConstantInt::get(Type::getInt64Ty(Ctx), Info.FullStackId));
+      auto *TotalSizeMD = ValueAsMetadata::get(
+          ConstantInt::get(Type::getInt64Ty(Ctx), Info.TotalSize));
+      auto *ContextSizeMD = MDNode::get(Ctx, {FullStackIdMD, TotalSizeMD});
+      MIBPayload.push_back(ContextSizeMD);
+    }
+  }
   return MDNode::get(Ctx, MIBPayload);
 }
 
+void CallStackTrie::collectContextSizeInfo(
+    CallStackTrieNode *Node, std::vector<ContextTotalSize> &ContextSizeInfo) {
+  ContextSizeInfo.insert(ContextSizeInfo.end(), Node->ContextSizeInfo.begin(),
+                         Node->ContextSizeInfo.end());
+  if (Node->Callers.empty())
+    return;
+  for (auto &Caller : Node->Callers) {
+    collectContextSizeInfo(Caller.second, ContextSizeInfo);
+  }
+}
+
 // Recursive helper to trim contexts and create metadata nodes.
 // Caller should have pushed Node's loc to MIBCallStack. Doing this in the
 // caller makes it simpler to handle the many early returns in this method.
@@ -206,8 +237,10 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
   // Trim context below the first node in a prefix with a single alloc type.
   // Add an MIB record for the current call stack prefix.
   if (hasSingleAllocType(Node->AllocTypes)) {
+    std::vector<ContextTotalSize> ContextSizeInfo;
+    collectContextSizeInfo(Node, ContextSizeInfo);
     MIBNodes.push_back(createMIBNode(
-        Ctx, MIBCallStack, (AllocationType)Node->AllocTypes, Node->TotalSize));
+        Ctx, MIBCallStack, (AllocationType)Node->AllocTypes, ContextSizeInfo));
     return true;
   }
 
@@ -243,8 +276,10 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
   // non-cold allocation type.
   if (!CalleeHasAmbiguousCallerContext)
     return false;
+  std::vector<ContextTotalSize> ContextSizeInfo;
+  collectContextSizeInfo(Node, ContextSizeInfo);
   MIBNodes.push_back(createMIBNode(Ctx, MIBCallStack, AllocationType::NotCold,
-                                   Node->TotalSize));
+                                   ContextSizeInfo));
   return true;
 }
 
@@ -256,11 +291,14 @@ bool CallStackTrie::buildAndAttachMIBMetadata(CallBase *CI) {
   if (hasSingleAllocType(Alloc->AllocTypes)) {
     addAllocTypeAttribute(Ctx, CI, (AllocationType)Alloc->AllocTypes);
     if (MemProfReportHintedSizes) {
-      assert(Alloc->TotalSize);
-      errs() << "Total size for allocation with location hash " << AllocStackId
-             << " and single alloc type "
-             << getAllocTypeAttributeString((AllocationType)Alloc->AllocTypes)
-             << ": " << Alloc->TotalSize << "\n";
+      std::vector<ContextTotalSize> ContextSizeInfo;
+      collectContextSizeInfo(Alloc, ContextSizeInfo);
+      for (const auto &Info : ContextSizeInfo) {
+        errs() << "Total size for full allocation context hash "
+               << Info.FullStackId << " and single alloc type "
+               << getAllocTypeAttributeString((AllocationType)Alloc->AllocTypes)
+               << ": " << Info.TotalSize << "\n";
+      }
     }
     return false;
   }
diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index 004e8b76a3c851..3273de51a79d9f 100644
--- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -523,6 +523,7 @@ static void computeFunctionSummary(
       if (MemProfMD) {
         std::vector<MIBInfo> MIBs;
         std::vector<uint64_t> TotalSizes;
+        std::vector<std::vector<unsigned>> ContextSizeInfoIndices;
         for (auto &MDOp : MemProfMD->operands()) {
           auto *MIBMD = cast<const MDNode>(MDOp);
           MDNode *StackNode = getMIBStackNode(MIBMD);
@@ -540,18 +541,34 @@ static void computeFunctionSummary(
             if (StackIdIndices.empty() || StackIdIndices.back() != StackIdIdx)
               StackIdIndices.push_back(StackIdIdx);
           }
+          // If we have context size information, collect it for inclusion in
+          // the summary.
+          assert(MIBMD->getNumOperands() > 2 || !MemProfReportHintedSizes);
+          if (MIBMD->getNumOperands() > 2) {
+            std::vector<unsigned> ContextSizeIndices;
+            for (unsigned I = 2; I < MIBMD->getNumOperands(); I++) {
+              MDNode *ContextSizePair = dyn_cast<MDNode>(MIBMD->getOperand(I));
+              assert(ContextSizePair->getNumOperands() == 2);
+              uint64_t FullStackId = mdconst::dyn_extract<ConstantInt>(
+                                         ContextSizePair->getOperand(0))
+                                         ->getZExtValue();
+              uint64_t TS = mdconst::dyn_extract<ConstantInt>(
+                                ContextSizePair->getOperand(1))
+                                ->getZExtValue();
+              ContextSizeIndices.push_back(
+                  Index.addOrGetContextSizeIndex({FullStackId, TS}));
+            }
+            ContextSizeInfoIndices.push_back(std::move(ContextSizeIndices));
+          }
           MIBs.push_back(
               MIBInfo(getMIBAllocType(MIBMD), std::move(StackIdIndices)));
-          if (MemProfReportHintedSizes) {
-            auto TotalSize = getMIBTotalSize(MIBMD);
-            assert(TotalSize);
-            TotalSizes.push_back(TotalSize);
-          }
         }
         Allocs.push_back(AllocInfo(std::move(MIBs)));
-        if (MemProfReportHintedSizes) {
-          assert(Allocs.back().MIBs.size() == TotalSizes.size());
-          Allocs.back().TotalSizes = std::move(TotalSizes);
+        assert(!ContextSizeInfoIndices.empty() || !MemProfReportHintedSizes);
+        if (!ContextSizeInfoIndices.empty()) {
+          assert(Allocs.back().MIBs.size() == ContextSizeInfoIndices.size());
+          Allocs.back().ContextSizeInfoIndices =
+              std::move(ContextSizeInfoIndices);
         }
       } else if (!InstCallsite.empty()) {
         SmallVector<unsigned> StackIdIndices;
diff --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
index b7ed9cdf631454..419df0d78e1796 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
@@ -328,6 +328,7 @@ GetCodeName(unsigned CodeID, unsigned BlockID,
       STRINGIFY_CODE(FS, COMBINED_CALLSITE_INFO)
       STRINGIFY_CODE(FS, COMBINED_ALLOC_INFO)
       STRINGIFY_CODE(FS, STACK_IDS)
+      STRINGIFY_CODE(FS, CONTEXT_SIZE_INFOS)
     }
   case bitc::METADATA_ATTACHMENT_ID:
     switch (CodeID) {
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 446c98c8cecd88..6d95f68f328baf 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -987,6 +987,11 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase {
   /// ids from the lists in the callsite and alloc entries to the index.
   std::vector<uint64_t> StackIds;
 
+  // Saves the context total size information from the CONTEXT_SIZE_INFOS record
+  // to consult when adding this from the lists in the alloc entries to the
+  // index.
+  std::vector<ContextTotalSize> ContextSizeInfos;
+
 public:
   ModuleSummaryIndexBitcodeReader(
       BitstreamCursor Stream, StringRef Strtab, ModuleSummaryIndex &TheIndex,
@@ -7997,6 +8002,14 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
       break;
     }
 
+    case bitc::FS_CONTEXT_SIZE_INFOS: { // [n x (fullstackid, totalsize)]
+      // Save context size infos in the reader to consult when adding them from
+      // the lists in the alloc node entries.
+      for (auto R = Record.begin(); R != Record.end(); R += 2)
+        ContextSizeInfos.push_back({*R, *(R + 1)});
+      break;
+    }
+
     case bitc::FS_PERMODULE_CALLSITE_INFO: {
       unsigned ValueID = Record[0];
       SmallVector<unsigned> StackIdList;
@@ -8052,18 +8065,30 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
         }
         MIBs.push_back(MIBInfo(AllocType, std::move(StackIdList)));
       }
-      std::vector<uint64_t> TotalSizes;
-      // We either have no sizes or NumMIBs of them.
-      assert(I == Record.size() || Record.size() - I == NumMIBs);
+      // We either have nothing left or at least NumMIBs context size info
+      // indices left.
+      assert(I == Record.size() || Record.size() - I >= NumMIBs);
+      std::vector<std::vector<unsigned>> AllContextSizeIndices;
       if (I < Record.size()) {
         MIBsRead = 0;
-        while (MIBsRead++ < NumMIBs)
-          TotalSizes.push_back(Record[I++]);
+        while (MIBsRead++ < NumMIBs) {
+          unsigned NumContextSizeInfoEntries = Record[I++];
+          assert(Record.size() - I >= NumContextSizeInfoEntries);
+          std::vector<unsigned> ContextSizeIndices;
+          for (unsigned J = 0; J < NumContextSizeInfoEntries; J++) {
+            assert(Record[I] < ContextSizeInfos.size());
+            ContextSizeIndices.push_back(TheIndex.addOrGetContextSizeIndex(
+                ContextSizeInfos[Record[I++]]));
+          }
+          AllContextSizeIndices.push_back(std::move(ContextSizeIndices));
+        }
       }
       PendingAllocs.push_back(AllocInfo(std::move(MIBs)));
-      if (!TotalSizes.empty()) {
-        assert(PendingAllocs.back().MIBs.size() == TotalSizes.size());
-        PendingAllocs.back().TotalSizes = std::move(TotalSizes);
+      if (!AllContextSizeIndices.empty()) {
+        assert(PendingAllocs.back().MIBs.size() ==
+               AllContextSizeIndices.size());
+        PendingAllocs.back().ContextSizeInfoIndices =
+            std::move(AllContextSizeIndices);
       }
       break;
     }
@@ -8091,21 +8116,9 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
       SmallVector<uint8_t> Versions;
       for (unsigned J = 0; J < NumVersions; J++)
         Versions.push_back(Record[I++]);
-      std::vector<uint64_t> TotalSizes;
-      // We either have no sizes or NumMIBs of them.
-      assert(I == Record.size() || Record.size() - I == NumMIBs);
-      if (I < Record.size()) {
-        MIBsRead = 0;
-        while (MIBsRead++ < NumMIBs) {
-          TotalSizes.push_back(Record[I++]);
-        }
-      }
+      assert(I == Record.size());
       PendingAllocs.push_back(
           AllocInfo(std::move(Versions), std::move(MIBs)));
-      if (!TotalSizes.empty()) {
-        assert(PendingAllocs.back().MIBs.size() == TotalSizes.size());
-        PendingAllocs.back().TotalSizes = std::move(TotalSizes);
-      }
       break;
     }
     }
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index ee9cc4b6e0c0eb..867470426962dc 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -4195,7 +4195,8 @@ static void writeFunctionHeapProfileRecords(
     BitstreamWriter &Stream, FunctionSummary *FS, unsigned CallsiteAbbrev,
     unsigned AllocAbbrev, bool PerModule,
     std::function<unsigned(const ValueInfo &VI)> GetValueID,
-    std::function<unsigned(unsigned)> GetStackIndex) {
+    std::function<unsigned(unsigned)> GetStackIndex,
+    bool WriteContextSizeInfoIndex) {
   SmallVector<uint64_t> Record;
 
   for (auto &CI : FS->callsites()) {
@@ -4237,10 +4238,14 @@ static void writeFunctionHeapProfileRecords(
       for (auto V : AI.Versions)
         Record.push_back(V);
     }
-    assert(AI.TotalSizes.empty() || AI.TotalSizes.size() == AI.MIBs.size());
-    if (!AI.TotalSizes.empty()) {
-      for (auto Size : AI.TotalSizes)
-        Record.push_back(Size);
+    assert(AI.ContextSizeInfoIndices.empty() ||
+           AI.ContextSizeInfoIndices.size() == AI.MIBs.size());
+    if (WriteContextSizeInfoIndex && !AI.ContextSizeInfoIndices.empty()) {
+      for (auto Indices : AI.ContextSizeInfoIndices) {
+        Record.push_back(Indices.size());
+        for (auto Id : Indices)
+          Record.push_back(Id);
+      }
     }
     Stream.EmitRecord(PerModule ? bitc::FS_PERMODULE_ALLOC_INFO
                                 : bitc::FS_COMBINED_ALLOC_INFO,
@@ -4267,7 +4272,8 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
       Stream, FS, CallsiteAbbrev, AllocAbbrev,
       /*PerModule*/ true,
       /*GetValueId*/ [&](const ValueInfo &VI) { return getValueId(VI); },
-      /*GetStackIndex*/ [&](unsigned I) { return I; });
+      /*GetStackIndex*/ [&](unsigned I) { return I; },
+      /*WriteContextSizeInfoIndex*/ true);
 
   auto SpecialRefCnts = FS->specialRefCounts();
   NameVals.push_back(getEncodedGVSummaryFlags(FS->flags()));
@@ -4404,6 +4410,24 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
     Stream.EmitRecord(bitc::FS_STACK_IDS, Index->stackIds(), StackIdAbbvId);
   }
 
+  SmallVector<uint64_t, 64> NameVals;
+  if (!Index->contextSizeInfos().empty()) {
+    auto ContextSizeInfoAbbv = std::make_shared<BitCodeAbbrev>();
+    ContextSizeInfoAbbv->Add(BitCodeAbbrevOp(bitc::FS_CONTEXT_SIZE_INFOS));
+    // numids x (fullStackid, totalsize)
+    ContextSizeInfoAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+    ContextSizeInfoAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+    unsigned ContextSizeInfoAbbvId =
+        Stream.EmitAbbrev(std::move(ContextSizeInfoAbbv));
+    for (const auto &Info : Index->contextSizeInfos()) {
+      NameVals.push_back(Info.FullStackId);
+      NameVals.push_back(Info.TotalSize);
+    }
+    Stream.EmitRecord(bitc::FS_CONTEXT_SIZE_INFOS, NameVals,
+                      ContextSizeInfoAbbvId);
+    NameVals.clear();
+  }
+
   // Abbrev for FS_PERMODULE_PROFILE.
   Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_PROFILE));
@@ -4489,7 +4513,6 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
   unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv));
 
-  SmallVector<uint64_t, 64> NameVals;
   // Iterate over the list of functions instead of the Index to
   // ensure the ordering is stable.
   for (const Function &F : M) {
@@ -4757,7 +4780,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
           // the case of distributed indexes).
           assert(StackIdIndicesToIndex.contains(I));
           return StackIdIndicesToIndex[I];
-        });
+        },
+        /*WriteContextSizeInfoIndex*/ false);
 
     NameVals.push_back(*ValueId);
     assert(ModuleIdMap.count(FS->modulePath()));
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index ee807ca13787d5..61f9c0cfe69f2b 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -4998,14 +4998,35 @@ void Verifier::visitMemProfMetadata(Instruction &I, MDNode *MD) {
     MDNode *StackMD = dyn_cast<MDNode>(MIB->getOperand(0));
     visitCallStackMetadata(StackMD);
 
-    // Check that remaining operands, except possibly the last, are MDString.
-    Check(llvm::all_of(MIB->operands().drop_front().drop_back(),
-                       [](const MDOperand &Op) { return isa<MDString>(Op); }),
-          "Not all !memprof MemInfoBlock operands 1 to N-1 are MDString", MIB);
-    // The last operand might be the total profiled size so can be an integer.
-    auto &LastOperand = MIB->operands().back();
-    Check(isa<MDString>(LastOperand) || mdconst::hasa<ConstantInt>(LastOperand),
-          "Last !memprof MemInfoBlock operand not MDString or int", MIB);
+    // The next set of 1 or more operands should be MDString.
+    unsigned I = 1;
+    for (; I < MIB->getNumOperands(); ++I) {
+      if (!isa<MDString>(MIB->getOperand(I))) {
+        Check(I > 1,
+              "!memprof MemInfoBlock second operand should be an MDString",
+              MIB);
+        break;
+      }
+    }
+
+    // Any remaining should be MDNode that are pairs of integers
+    for (; I < MIB->getNumOperands(); ++I) {
+      MDNode *OpNode = dyn_cast<MDNode>(MIB->getOperand(I));
+      Check(OpNode, "Not all !memprof MemInfoBlock operands 2 to N are MDNode",
+            MIB);
+      Check(OpNode->getNumOperands() == 2,
+            "Not all !memprof MemInfoBlock operands 2 to N are MDNode with 2 "
+            "operands",
+            MIB);
+      // Check that all of Op's operands are ConstantInt.
+      Check(llvm::all_of(OpNode->operands(),
+                         [](const MDOperand &Op) {
+                           return mdconst::hasa<ConstantInt>(Op);
+                         }),
+            "Not all !memprof MemInfoBlock operands 2 to N are MDNode with "
+            "ConstantInt operands",
+            MIB);
+    }
   }
 }
 
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index da5ded23ecc045..6d5b790e5f6793 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -475,7 +475,8 @@ class CallsiteContextGraph {
   void addStackNodesForMIB(ContextNode *AllocNode,
                            CallStack<NodeT, IteratorT> &StackContext,
                            CallStack<NodeT, IteratorT> &CallsiteContext,
-                           AllocationType AllocType, uint64_t TotalSize);
+                           AllocationType AllocType,
+                           ArrayRef<ContextTotalSize> ContextSizeInfo);
 
   /// Matches all callsite metadata (or summary) to the nodes created for
   /// allocation memprof MIB metadata, synthesizing new nodes to reflect any
@@ -705,9 +706,10 @@ class CallsiteContextGraph {
   /// Map from each context ID to the AllocationType assigned to that context.
   DenseMap<uint32_t, AllocationType> ContextIdToAllocationType;
 
-  /// Map from each contextID to the profiled aggregate allocation size,
+  /// Map from each contextID to the profiled full contexts and their total
+  /// sizes (there may be more than one due to context trimming),
   /// optionally populated when requested (via MemProfReportHintedSizes).
-  DenseMap<uint32_t, uint64_t> ContextIdToTotalSize;
+  DenseMap<uint32_t, std::vector<ContextTotalSize>> ContextIdToContextSizeInfos;
 
   /// Identifies the context node created for a stack id when adding the MIB
   /// contexts to the graph. This is used to locate the context nodes when
@@ -1203,8 +1205,7 @@ template <class NodeT, class IteratorT>
 void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
     ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,
     CallStack<NodeT, IteratorT> &CallsiteContext, AllocationType AllocType,
-    uint64_t TotalSize) {
-  assert(!MemProfReportHintedSizes || TotalSize > 0);
+    ArrayRef<ContextTotalSize> ContextSizeInfo) {
   // Treating the hot alloc type as NotCold before the disambiguation for "hot"
   // is done.
   if (AllocType == AllocationType::Hot)
@@ -1213,8 +1214,9 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
   ContextIdToAllocationType[++LastContextId] = AllocType;
 
   if (MemProfReportHintedSizes) {
-    assert(TotalSize);
-    ContextIdToTotalSize[LastContextId] = TotalSize;
+    assert(!ContextSizeInfo.empty());
+    auto &Entry = ContextIdToContextSizeInfos[LastContextId];
+    Entry.insert(Entry.begin(), ContextSizeInfo.begin(), ContextSizeInfo.end());
   }
 
   // Update alloc type and context ids for this MIB.
@@ -1259,10 +1261,6 @@ CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
     assert(ContextIdToAllocationType.count(OldId));
     // The new context has the same allocation type as original.
     ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
-    // For now set this to 0 so we don't duplicate sizes. Not clear how to divvy
-    // up the size. Assume that if we are able to duplicate context ids that we
-    // will be able to disambiguate all copies.
-    ContextIdToTotalSize[LastContextId] = 0;
   }
   return NewContextIds;
 }
@@ -1961,12 +1959,28 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
           // Add all of the MIBs and their stack nodes.
           for (auto &MDOp : MemProfMD->operands()) {
             auto *MIBMD = cast<const MDNode>(MDOp);
+            std::vector<ContextTotalSize> ContextSizeInfo;
+            // Collect the context size information if it exists.
+            if (MIBMD->getNumOperands() > 2) {
+              for (unsigned I = 2; I < MIBMD->getNumOperands(); I++) {
+                MDNode *ContextSizePair =
+                    dyn_cast<MDNode>(MIBMD->getOperand(I));
+                assert(ContextSizePair->getNumOperands() == 2);
+                uint64_t FullStackId = mdconst::dyn_extract<ConstantInt>(
+                                           ContextSizePair->getOperand(0))
+                                           ->getZExtValue();
+                uint64_t TotalSize = mdconst::dyn_extract<ConstantInt>(
+                                         ContextSizePair->getOperand(1))
+                                         ->getZExtValue();
+                ContextSizeInfo.push_back({FullStackId, TotalSize});
+              }
+            }
             MDNode *StackNode = getMIBStackNode(MIBMD);
             assert(StackNode);
             CallStack<MDNode, MDNode::op_iterator> StackContext(StackNode);
             addStackNodesForMIB<MDNode, MDNode::op_iterator>(
                 AllocNode, StackContext, CallsiteContext,
-                getMIBAllocType(MIBMD), getMIBTotalSize(MIBMD));
+                getMIBAllocType(MIBMD), ContextSizeInfo);
           }
           assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
           // Memprof and callsite metadata on memory allocations no longer
@@ -2042,17 +2056,21 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph(
               EmptyContext;
           unsigned I = 0;
           assert(!MemProfReportHintedSizes ||
-                 AN.TotalSizes.size() == AN.MIBs.size());
+                 AN.ContextSizeInfoIndices.size() == AN.MIBs.size());
           // Now add all of the MIBs and their stack nodes.
           for (auto &MIB : AN.MIBs) {
             CallStack<MIBInfo, SmallVector<unsigned>::const_iterator>
                 StackContext(&MIB);
-            uint64_t TotalSize = 0;
-            if (MemProfReportHintedSizes)
-              TotalSize = AN.TotalSizes[I];
+            std::vector<ContextTotalSize> ContextSizeInfo;
+            if (MemProfReportHintedSizes) {
+              for (auto Id : AN.ContextSizeInfoIndices[I]) {
+                auto Info = Index.getContextSizeInfoAtIndex(Id);
+                ContextSizeInfo.push_back({Info.FullStackId, Info.TotalSize});
+              }
+            }
             addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
                 AllocNode, StackContext, EmptyContext, MIB.AllocType,
-                TotalSize);
+                ContextSizeInfo);
             I++;
           }
           assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
@@ -2824,13 +2842,17 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::printTotalSizes(
     std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
     std::sort(SortedIds.begin(), SortedIds.end());
     for (auto Id : SortedIds) {
-      auto SizeI = ContextIdToTotalSize.find(Id);
-      assert(SizeI != ContextIdToTotalSize.end());
       auto TypeI = ContextIdToAllocationType.find(Id);
       assert(TypeI != ContextIdToAllocationType.end());
-      OS << getAllocTypeString((uint8_t)TypeI->second) << " context " << Id
-         << " with total size " << SizeI->second << " is "
-         << getAllocTypeString(Node->AllocTypes) << " after cloning\n";
+      auto CSI = ContextIdToContextSizeInfos.find(Id);
+      if (CSI != ContextIdToContextSizeInfos.end()) {
+        for (auto &Info : CSI->second) {
+          OS << getAllocTypeString((uint8_t)TypeI->second)
+             << " full allocation context " << Info.FullStackId
+             << " with total size " << Info.TotalSize << " is "
+             << getAllocTypeString(Node->AllocTypes) << " after cloning\n";
+        }
+      }
     }
   }
 }
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 4a43120c9a9e7f..42c01fe832572e 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -716,19 +716,22 @@ computeFullStackId(const std::vector<memprof::Frame> &CallStack) {
 }
 
 static AllocationType addCallStack(CallStackTrie &AllocTrie,
-                                   const AllocationInfo *AllocInfo) {
+                                   const AllocationInfo *AllocInfo,
+                                   uint64_t FullStackId) {
   SmallVector<uint64_t> StackIds;
   for (const auto &StackFrame : AllocInfo->CallStack)
     StackIds.push_back(computeStackId(StackFrame));
   auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(),
                                 AllocInfo->Info.getAllocCount(),
                                 AllocInfo->Info.getTotalLifetime());
-  uint64_t TotalSize = 0;
+  std::vector<ContextTotalSize> ContextSizeInfo;
   if (MemProfReportHintedSizes) {
-    TotalSize = AllocInfo->Info.getTotalSize();
+    auto TotalSize = AllocInfo->Info.getTotalSize();
     assert(TotalSize);
+    assert(FullStackId != 0);
+    ContextSizeInfo.push_back({FullStackId, TotalSize});
   }
-  AllocTrie.addCallStack(AllocType, StackIds, TotalSize);
+  AllocTrie.addCallStack(AllocType, StackIds, std::move(ContextSizeInfo));
   return AllocType;
 }
 
@@ -964,11 +967,14 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
           if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack,
                                                  InlinedCallStack)) {
             NumOfMemProfMatchedAllocContexts++;
-            auto AllocType = addCallStack(AllocTrie, AllocInfo);
+            uint64_t FullStackId = 0;
+            if (ClPrintMemProfMatchInfo || MemProfReportHintedSizes)
+              FullStackId = computeFullStackId(AllocInfo->CallStack);
+            auto AllocType = addCallStack(AllocTrie, AllocInfo, FullStackId);
             // Record information about the allocation if match info printing
             // was requested.
             if (ClPrintMemProfMatchInfo) {
-              auto FullStackId = computeFullStackId(AllocInfo->CallStack);
+              assert(FullStackId != 0);
               FullStackIdToAllocMatchInfo[FullStackId] = {
                   AllocInfo->Info.getTotalSize(), AllocType, /*Matched=*/true};
             }
diff --git a/llvm/test/ThinLTO/X86/memprof-aliased-location1.ll b/llvm/test/ThinLTO/X86/memprof-aliased-location1.ll
index 42819d5421ca0f..3e8aa9766d6c5f 100644
--- a/llvm/test/ThinLTO/X86/memprof-aliased-location1.ll
+++ b/llvm/test/ThinLTO/X86/memprof-aliased-location1.ll
@@ -62,9 +62,9 @@ attributes #0 = { noinline optnone }
 !0 = !{i64 8632435727821051414}
 !1 = !{i64 -3421689549917153178}
 !2 = !{!3, !5}
-!3 = !{!4, !"notcold", i64 100}
+!3 = !{!4, !"notcold"}
 !4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
-!5 = !{!6, !"cold", i64 400}
+!5 = !{!6, !"cold"}
 !6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
 !7 = !{i64 9086428284934609951}
 !8 = !{i64 -5964873800580613432}
diff --git a/llvm/test/ThinLTO/X86/memprof-aliased-location2.ll b/llvm/test/ThinLTO/X86/memprof-aliased-location2.ll
index 663f8525043c2f..9169cc03d08d6f 100644
--- a/llvm/test/ThinLTO/X86/memprof-aliased-location2.ll
+++ b/llvm/test/ThinLTO/X86/memprof-aliased-location2.ll
@@ -62,9 +62,9 @@ attributes #0 = { noinline optnone }
 !0 = !{i64 8632435727821051414}
 !1 = !{i64 -3421689549917153178}
 !2 = !{!3, !5}
-!3 = !{!4, !"notcold", i64 100}
+!3 = !{!4, !"notcold"}
 !4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
-!5 = !{!6, !"cold", i64 400}
+!5 = !{!6, !"cold"}
 !6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
 !7 = !{i64 9086428284934609951}
 !8 = !{i64 -5964873800580613432}
diff --git a/llvm/test/ThinLTO/X86/memprof-basic.ll b/llvm/test/ThinLTO/X86/memprof-basic.ll
index 6922dbfd368467..96d5459c78793e 100644
--- a/llvm/test/ThinLTO/X86/memprof-basic.ll
+++ b/llvm/test/ThinLTO/X86/memprof-basic.ll
@@ -128,13 +128,16 @@ attributes #0 = { noinline optnone }
 !0 = !{i64 8632435727821051414}
 !1 = !{i64 -3421689549917153178}
 !2 = !{!3, !5}
-!3 = !{!4, !"notcold", i64 100}
+!3 = !{!4, !"notcold", !10}
 !4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
-!5 = !{!6, !"cold", i64 400}
+!5 = !{!6, !"cold", !11, !12}
 !6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
 !7 = !{i64 9086428284934609951}
 !8 = !{i64 -5964873800580613432}
 !9 = !{i64 2732490490862098848}
+!10 = !{i64 123, i64 100}
+!11 = !{i64 456, i64 200}
+!12 = !{i64 789, i64 300}
 
 
 ; DUMP: CCG before cloning:
@@ -267,8 +270,9 @@ attributes #0 = { noinline optnone }
 ; DUMP: 		Edge from Callee [[BAR2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 2
 ; DUMP:		Clone of [[BAR]]
 
-; SIZES: NotCold context 1 with total size 100 is NotCold after cloning
-; SIZES: Cold context 2 with total size 400 is Cold after cloning
+; SIZES: NotCold full allocation context 123 with total size 100 is NotCold after cloning
+; SIZES: Cold full allocation context 456 with total size 200 is Cold after cloning
+; SIZES: Cold full allocation context 789 with total size 300 is Cold after cloning
 
 ; REMARKS: call in clone main assigned to call function clone _Z3foov.memprof.1
 ; REMARKS: created clone _Z3barv.memprof.1
diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/aliased-location1.ll b/llvm/test/Transforms/MemProfContextDisambiguation/aliased-location1.ll
index 8f9df20471e41c..c2810dfabffbd7 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/aliased-location1.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/aliased-location1.ll
@@ -66,9 +66,9 @@ attributes #6 = { builtin }
 !0 = !{i64 8632435727821051414}
 !1 = !{i64 -3421689549917153178}
 !2 = !{!3, !5}
-!3 = !{!4, !"notcold", i64 100}
+!3 = !{!4, !"notcold"}
 !4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
-!5 = !{!6, !"cold", i64 400}
+!5 = !{!6, !"cold"}
 !6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
 !7 = !{i64 9086428284934609951}
 !8 = !{i64 -5964873800580613432}
diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/aliased-location2.ll b/llvm/test/Transforms/MemProfContextDisambiguation/aliased-location2.ll
index c3c164d4928632..068e1f116519e8 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/aliased-location2.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/aliased-location2.ll
@@ -66,9 +66,9 @@ attributes #6 = { builtin }
 !0 = !{i64 8632435727821051414}
 !1 = !{i64 -3421689549917153178}
 !2 = !{!3, !5}
-!3 = !{!4, !"notcold", i64 100}
+!3 = !{!4, !"notcold"}
 !4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
-!5 = !{!6, !"cold", i64 400}
+!5 = !{!6, !"cold"}
 !6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
 !7 = !{i64 9086428284934609951}
 !8 = !{i64 -5964873800580613432}
diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
index a82f872d51c7d5..952e2519bbf0b3 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
@@ -106,13 +106,16 @@ attributes #6 = { builtin }
 !0 = !{i64 8632435727821051414}
 !1 = !{i64 -3421689549917153178}
 !2 = !{!3, !5}
-!3 = !{!4, !"notcold", i64 100}
+!3 = !{!4, !"notcold", !10}
 !4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
-!5 = !{!6, !"cold", i64 400}
+!5 = !{!6, !"cold", !11, !12}
 !6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
 !7 = !{i64 9086428284934609951}
 !8 = !{i64 -5964873800580613432}
 !9 = !{i64 2732490490862098848}
+!10 = !{i64 123, i64 100}
+!11 = !{i64 456, i64 200}
+!12 = !{i64 789, i64 300}
 
 
 ; DUMP: CCG before cloning:
@@ -249,8 +252,9 @@ attributes #6 = { builtin }
 ; REMARKS: call in clone _Z3bazv assigned to call function clone _Z3barv
 ; REMARKS: call in clone _Z3barv marked with memprof allocation attribute notcold
 
-; SIZES: NotCold context 1 with total size 100 is NotCold after cloning
-; SIZES: Cold context 2 with total size 400 is Cold after cloning
+; SIZES: NotCold full allocation context 123 with total size 100 is NotCold after cloning
+; SIZES: Cold full allocation context 456 with total size 200 is Cold after cloning
+; SIZES: Cold full allocation context 789 with total size 300 is Cold after cloning
 
 ; IR: define {{.*}} @main
 ;; The first call to foo does not allocate cold memory. It should call the
diff --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll
index e1457ca7251ed8..d6c86bb7ad5a8a 100644
--- a/llvm/test/Transforms/PGOProfile/memprof.ll
+++ b/llvm/test/Transforms/PGOProfile/memprof.ll
@@ -335,17 +335,24 @@ for.end:                                          ; preds = %for.cond
 ; MEMPROF: ![[C11]] = !{i64 1544787832369987002}
 
 ;; For non-context sensitive allocations that get attributes we emit a message
-;; with the allocation hash, type, and size in bytes.
-; TOTALSIZES: Total size for allocation with location hash 6792096022461663180 and single alloc type notcold: 10
-; TOTALSIZES: Total size for allocation with location hash 15737101490731057601 and single alloc type cold: 10
-;; For context sensitive allocations the size in bytes is included on the MIB
-;; metadata.
-; TOTALSIZES: !"cold", i64 10}
-; TOTALSIZES: !"cold", i64 10}
-; TOTALSIZES: !"notcold", i64 10}
-; TOTALSIZES: !"cold", i64 20}
-; TOTALSIZES: !"notcold", i64 10}
-
+;; with the full allocation context hash, type, and size in bytes.
+; TOTALSIZES: Total size for full allocation context hash 6792096022461663180 and single alloc type notcold: 10
+; TOTALSIZES: Total size for full allocation context hash 15737101490731057601 and single alloc type cold: 10
+;; For context sensitive allocations the full context hash and size in bytes
+;; are in separate metadata nodes included on the MIB metadata.
+; TOTALSIZES: !"cold", ![[CONTEXT1:[0-9]+]]}
+; TOTALSIZES: ![[CONTEXT1]] = !{i64 8525406123785421946, i64 10}
+; TOTALSIZES: !"cold", ![[CONTEXT2:[0-9]+]]}
+; TOTALSIZES: ![[CONTEXT2]] = !{i64 -6732513409544482918, i64 10}
+; TOTALSIZES: !"notcold", ![[CONTEXT3:[0-9]+]]}
+; TOTALSIZES: ![[CONTEXT3]] = !{i64 5725971306423925017, i64 10}
+;; There can be more than one context id / size pair due to context trimming
+;; when we match.
+; TOTALSIZES: !"cold", ![[CONTEXT4:[0-9]+]], ![[CONTEXT5:[0-9]+]]}
+; TOTALSIZES: ![[CONTEXT4]] = !{i64 -2103941543456458045, i64 10}
+; TOTALSIZES: ![[CONTEXT5]] = !{i64 -191931298737547222, i64 10}
+; TOTALSIZES: !"notcold", ![[CONTEXT6:[0-9]+]]}
+; TOTALSIZES: ![[CONTEXT6]] = !{i64 1093248920606587996, i64 10}
 
 ; MEMPROFNOCOLINFO: #[[A1]] = { builtin allocsize(0) "memprof"="notcold" }
 ; MEMPROFNOCOLINFO: #[[A2]] = { builtin allocsize(0) "memprof"="cold" }
diff --git a/llvm/test/Verifier/memprof-metadata-bad.ll b/llvm/test/Verifier/memprof-metadata-bad.ll
index f4f1f6bb0a4635..b8c2c2d8a2c993 100644
--- a/llvm/test/Verifier/memprof-metadata-bad.ll
+++ b/llvm/test/Verifier/memprof-metadata-bad.ll
@@ -43,7 +43,7 @@ declare dso_local noalias noundef ptr @malloc(i64 noundef)
 !6 = !{i64 0}
 !7 = !{!8}
 ; CHECK: call stack metadata should have at least 1 operand
-; CHECK: Not all !memprof MemInfoBlock operands 1 to N-1 are MDString
+; CHECK: Not all !memprof MemInfoBlock operands 2 to N are MDNode
 !8 = !{!0, !"default", i64 0, i64 5}
 !9 = !{i64 123}
 ; CHECK: call stack metadata operand should be constant integer

>From cb1c09834221bd1e25ad6740a646f86b3fb150d9 Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Thu, 31 Oct 2024 14:11:51 -0700
Subject: [PATCH 2/2] clang format

---
 llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 6d95f68f328baf..11506b8e246ac6 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -8117,8 +8117,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
       for (unsigned J = 0; J < NumVersions; J++)
         Versions.push_back(Record[I++]);
       assert(I == Record.size());
-      PendingAllocs.push_back(
-          AllocInfo(std::move(Versions), std::move(MIBs)));
+      PendingAllocs.push_back(AllocInfo(std::move(Versions), std::move(MIBs)));
       break;
     }
     }



More information about the llvm-commits mailing list