[llvm] [MemProf] Use radix tree for alloc contexts in bitcode summaries (PR #117066)

Teresa Johnson via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 20 14:02:22 PST 2024


https://github.com/teresajohnson created https://github.com/llvm/llvm-project/pull/117066

Leverage the support added to represent allocation contexts in a more
compact way via a radix tree in the indexed profile to similarly reduce
sizes of the bitcode summaries.

For a large target, this reduced the size of the per-module summaries by
about 18% and in the distributed combined index files by 28%.


>From 669d377b3370f0cb95c8238efda41aab484168dd Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Wed, 20 Nov 2024 08:47:29 -0800
Subject: [PATCH] [MemProf] Use radix tree for alloc contexts in bitcode
 summaries

Leverage the support added to represent allocation contexts in a more
compact way via a radix tree in the indexed profile to similarly reduce
sizes of the bitcode summaries.

For a large target, this reduced the size of the per-module summaries by
about 18% and in the distributed combined index files by 28%.
---
 llvm/include/llvm/Bitcode/LLVMBitCodes.h      |  10 +-
 llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp   |   1 +
 llvm/lib/Bitcode/Reader/BitcodeReader.cpp     |  68 ++++++--
 llvm/lib/Bitcode/Writer/BitcodeWriter.cpp     | 154 ++++++++++++++++--
 llvm/lib/ProfileData/MemProf.cpp              |   5 +
 .../memprof-old-alloc-context-summary.bc      | Bin 0 -> 2272 bytes
 .../X86/memprof-old-alloc-context-summary.ll  |  28 ++++
 7 files changed, 236 insertions(+), 30 deletions(-)
 create mode 100644 llvm/test/ThinLTO/X86/Inputs/memprof-old-alloc-context-summary.bc
 create mode 100644 llvm/test/ThinLTO/X86/memprof-old-alloc-context-summary.ll

diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index a0fb32f67e3858..41909a8fc1d590 100644
--- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -307,12 +307,12 @@ enum GlobalValueSummarySymtabCodes {
   // [valueid, n x stackidindex]
   FS_PERMODULE_CALLSITE_INFO = 26,
   // Summary of per-module allocation memprof metadata.
-  // [nummib, nummib x (alloc type, numstackids, numstackids x stackidindex),
+  // [nummib, nummib x (alloc type, context radix tree index),
   // [nummib x (numcontext x total size)]?]
   FS_PERMODULE_ALLOC_INFO = 27,
   // Summary of combined index memprof callsite metadata.
-  // [valueid, numstackindices, numver,
-  //  numstackindices x stackidindex, numver x version]
+  // [valueid, context radix tree index, numver,
+  //  numver x version]
   FS_COMBINED_CALLSITE_INFO = 28,
   // Summary of combined index allocation memprof metadata.
   // [nummib, numver,
@@ -331,6 +331,10 @@ enum GlobalValueSummarySymtabCodes {
   // the entries must be in the exact same order as the corresponding sizes.
   // [nummib x (numcontext x full stack id)]
   FS_ALLOC_CONTEXT_IDS = 31,
+  // Linearized radix tree of allocation contexts. See the description above the
+  // CallStackRadixTreeBuilder class in ProfileData/MemProf.h for format.
+  // [n x entry]
+  FS_CONTEXT_RADIX_TREE_ARRAY = 32,
 };
 
 enum MetadataCodes {
diff --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
index 8f79ccdb9ff75f..032c0de3c7a00f 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
@@ -329,6 +329,7 @@ GetCodeName(unsigned CodeID, unsigned BlockID,
       STRINGIFY_CODE(FS, COMBINED_ALLOC_INFO)
       STRINGIFY_CODE(FS, STACK_IDS)
       STRINGIFY_CODE(FS, ALLOC_CONTEXT_IDS)
+      STRINGIFY_CODE(FS, CONTEXT_RADIX_TREE_ARRAY)
     }
   case bitc::METADATA_ATTACHMENT_ID:
     switch (CodeID) {
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 3e6abacac27261..8472d23816a9a4 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -987,6 +987,10 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase {
   /// ids from the lists in the callsite and alloc entries to the index.
   std::vector<uint64_t> StackIds;
 
+  /// Linearized radix tree of allocation contexts. See the description above
+  /// the CallStackRadixTreeBuilder class in ProfileData/MemProf.h for format.
+  std::vector<uint64_t> RadixArray;
+
 public:
   ModuleSummaryIndexBitcodeReader(
       BitstreamCursor Stream, StringRef Strtab, ModuleSummaryIndex &TheIndex,
@@ -1013,6 +1017,8 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase {
                                        TypeIdCompatibleVtableInfo &TypeId);
   std::vector<FunctionSummary::ParamAccess>
   parseParamAccesses(ArrayRef<uint64_t> Record);
+  SmallVector<unsigned> parseAllocInfoContext(ArrayRef<uint64_t> Record,
+                                              unsigned &I);
 
   template <bool AllowNullValueInfo = false>
   std::pair<ValueInfo, GlobalValue::GUID>
@@ -7544,6 +7550,45 @@ void ModuleSummaryIndexBitcodeReader::parseTypeIdCompatibleVtableSummaryRecord(
     parseTypeIdCompatibleVtableInfo(Record, Slot, TypeId);
 }
 
+SmallVector<unsigned> ModuleSummaryIndexBitcodeReader::parseAllocInfoContext(
+    ArrayRef<uint64_t> Record, unsigned &I) {
+  SmallVector<unsigned> StackIdList;
+  // For backwards compatibility with old format before radix tree was
+  // used, simply see if we found a radix tree array record.
+  if (RadixArray.empty()) {
+    unsigned NumStackEntries = Record[I++];
+    assert(Record.size() - I >= NumStackEntries);
+    for (unsigned J = 0; J < NumStackEntries; J++) {
+      assert(Record[I] < StackIds.size());
+      StackIdList.push_back(
+          TheIndex.addOrGetStackIdIndex(StackIds[Record[I++]]));
+    }
+  } else {
+    unsigned RadixIndex = Record[I++];
+    // See the comments above CallStackRadixTreeBuilder in ProfileData/MemProf.h
+    // for a detailed description of the radix tree array format. Briefly, the
+    // first entry will be the number of frames, any negative values are the
+    // negative of the offset of the next frame, and otherwise the frames are in
+    // increasing linear order.
+    assert(RadixIndex < RadixArray.size());
+    unsigned NumStackIds = RadixArray[RadixIndex++];
+    while (NumStackIds--) {
+      assert(RadixIndex < RadixArray.size());
+      unsigned Elem = RadixArray[RadixIndex];
+      if (static_cast<std::make_signed_t<unsigned>>(Elem) < 0) {
+        RadixIndex = RadixIndex - Elem;
+        assert(RadixIndex < RadixArray.size());
+        Elem = RadixArray[RadixIndex];
+        // We shouldn't encounter a second offset in a row.
+        assert(static_cast<std::make_signed_t<unsigned>>(Elem) >= 0);
+      }
+      RadixIndex++;
+      StackIdList.push_back(TheIndex.addOrGetStackIdIndex(StackIds[Elem]));
+    }
+  }
+  return StackIdList;
+}
+
 static void setSpecialRefs(SmallVectorImpl<ValueInfo> &Refs, unsigned ROCnt,
                            unsigned WOCnt) {
   // Readonly and writeonly refs are in the end of the refs list.
@@ -8010,6 +8055,11 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
       break;
     }
 
+    case bitc::FS_CONTEXT_RADIX_TREE_ARRAY: { // [n x entry]
+      RadixArray = ArrayRef<uint64_t>(Record);
+      break;
+    }
+
     case bitc::FS_PERMODULE_CALLSITE_INFO: {
       unsigned ValueID = Record[0];
       SmallVector<unsigned> StackIdList;
@@ -8065,14 +8115,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
              (Version < 10 && I < Record.size())) {
         assert(Record.size() - I >= 2);
         AllocationType AllocType = (AllocationType)Record[I++];
-        unsigned NumStackEntries = Record[I++];
-        assert(Record.size() - I >= NumStackEntries);
-        SmallVector<unsigned> StackIdList;
-        for (unsigned J = 0; J < NumStackEntries; J++) {
-          assert(Record[I] < StackIds.size());
-          StackIdList.push_back(
-              TheIndex.addOrGetStackIdIndex(StackIds[Record[I++]]));
-        }
+        auto StackIdList = parseAllocInfoContext(Record, I);
         MIBs.push_back(MIBInfo(AllocType, std::move(StackIdList)));
       }
       // We either have nothing left or at least NumMIBs context size info
@@ -8123,14 +8166,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
       while (MIBsRead++ < NumMIBs) {
         assert(Record.size() - I >= 2);
         AllocationType AllocType = (AllocationType)Record[I++];
-        unsigned NumStackEntries = Record[I++];
-        assert(Record.size() - I >= NumStackEntries);
-        SmallVector<unsigned> StackIdList;
-        for (unsigned J = 0; J < NumStackEntries; J++) {
-          assert(Record[I] < StackIds.size());
-          StackIdList.push_back(
-              TheIndex.addOrGetStackIdIndex(StackIds[Record[I++]]));
-        }
+        auto StackIdList = parseAllocInfoContext(Record, I);
         MIBs.push_back(MIBInfo(AllocType, std::move(StackIdList)));
       }
       assert(Record.size() - I >= NumVersions);
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 24a4c2e8303d5a..7f6deeec6cd1e8 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -60,6 +60,7 @@
 #include "llvm/MC/StringTableBuilder.h"
 #include "llvm/MC/TargetRegistry.h"
 #include "llvm/Object/IRSymtab.h"
+#include "llvm/ProfileData/MemProf.h"
 #include "llvm/Support/AtomicOrdering.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
@@ -83,6 +84,7 @@
 #include <vector>
 
 using namespace llvm;
+using namespace llvm::memprof;
 
 static cl::opt<unsigned>
     IndexThreshold("bitcode-mdindex-threshold", cl::Hidden, cl::init(25),
@@ -231,7 +233,8 @@ class ModuleBitcodeWriterBase : public BitcodeWriterBase {
       SmallVector<uint64_t, 64> &NameVals, GlobalValueSummary *Summary,
       unsigned ValueID, unsigned FSCallsAbbrev, unsigned FSCallsProfileAbbrev,
       unsigned CallsiteAbbrev, unsigned AllocAbbrev, unsigned ContextIdAbbvId,
-      const Function &F);
+      const Function &F, DenseMap<CallStackId, LinearCallStackId> &CallStackPos,
+      CallStackId &CallStackCount);
   void writeModuleLevelReferences(const GlobalVariable &V,
                                   SmallVector<uint64_t, 64> &NameVals,
                                   unsigned FSModRefsAbbrev,
@@ -4195,12 +4198,58 @@ static void writeTypeIdCompatibleVtableSummaryRecord(
   }
 }
 
+// Adds the allocation contexts to the CallStacks map. We simply use the
+// size at the time the context was added as the CallStackId. This works because
+// when we look up the call stacks later on we process the function summaries
+// and their allocation records in the same exact order.
+static void collectMemProfCallStacks(
+    FunctionSummary *FS, std::function<LinearFrameId(unsigned)> GetStackIndex,
+    MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> &CallStacks) {
+  // The interfaces in ProfileData/MemProf.h use a type alias for a stack frame
+  // id offset into the index of the full stack frames. The ModuleSummaryIndex
+  // currently uses unsigned. Make sure these stay in sync.
+  static_assert(std::is_same_v<LinearFrameId, unsigned>);
+  for (auto &AI : FS->allocs()) {
+    for (auto &MIB : AI.MIBs) {
+      SmallVector<unsigned> StackIdIndices;
+      StackIdIndices.reserve(MIB.StackIdIndices.size());
+      for (auto Id : MIB.StackIdIndices)
+        StackIdIndices.push_back(GetStackIndex(Id));
+      // The CallStackId is the size at the time this context was inserted.
+      CallStacks.insert({CallStacks.size(), StackIdIndices});
+    }
+  }
+}
+
+// Build the radix tree from the accumulated CallStacks, write out the resulting
+// linearized radix tree array, and return the map of call stack positions into
+// this array for use when writing the allocation records. The returned map is
+// indexed by a CallStackId which in this case is implicitly determined by the
+// order of function summaries and their allocation infos being written.
+static DenseMap<CallStackId, LinearCallStackId> writeMemoryProfileRadixTree(
+    MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> &CallStacks,
+    BitstreamWriter &Stream, unsigned RadixAbbrev) {
+  assert(!CallStacks.empty());
+  DenseMap<unsigned, FrameStat> FrameHistogram =
+      computeFrameHistogram<LinearFrameId>(CallStacks);
+  CallStackRadixTreeBuilder<LinearFrameId> Builder;
+  // We don't need a MemProfFrameIndexes map as we have already converted the
+  // full stack id hash to a linear offset into the StackIds array.
+  Builder.build(std::move(CallStacks), /*MemProfFrameIndexes=*/std::nullopt,
+                FrameHistogram);
+  Stream.EmitRecord(bitc::FS_CONTEXT_RADIX_TREE_ARRAY, Builder.getRadixArray(),
+                    RadixAbbrev);
+  return Builder.takeCallStackPos();
+}
+
 static void writeFunctionHeapProfileRecords(
     BitstreamWriter &Stream, FunctionSummary *FS, unsigned CallsiteAbbrev,
     unsigned AllocAbbrev, unsigned ContextIdAbbvId, bool PerModule,
     std::function<unsigned(const ValueInfo &VI)> GetValueID,
     std::function<unsigned(unsigned)> GetStackIndex,
-    bool WriteContextSizeInfoIndex) {
+    bool WriteContextSizeInfoIndex,
+    DenseMap<CallStackId, LinearCallStackId> &CallStackPos,
+    CallStackId &CallStackCount) {
   SmallVector<uint64_t> Record;
 
   for (auto &CI : FS->callsites()) {
@@ -4234,9 +4283,9 @@ static void writeFunctionHeapProfileRecords(
       Record.push_back(AI.Versions.size());
     for (auto &MIB : AI.MIBs) {
       Record.push_back((uint8_t)MIB.AllocType);
-      Record.push_back(MIB.StackIdIndices.size());
-      for (auto Id : MIB.StackIdIndices)
-        Record.push_back(GetStackIndex(Id));
+      // Record the index into the radix tree array for this context.
+      assert(CallStackCount <= CallStackPos.size());
+      Record.push_back(CallStackPos[CallStackCount++]);
     }
     if (!PerModule) {
       for (auto V : AI.Versions)
@@ -4282,7 +4331,9 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
     SmallVector<uint64_t, 64> &NameVals, GlobalValueSummary *Summary,
     unsigned ValueID, unsigned FSCallsRelBFAbbrev,
     unsigned FSCallsProfileAbbrev, unsigned CallsiteAbbrev,
-    unsigned AllocAbbrev, unsigned ContextIdAbbvId, const Function &F) {
+    unsigned AllocAbbrev, unsigned ContextIdAbbvId, const Function &F,
+    DenseMap<CallStackId, LinearCallStackId> &CallStackPos,
+    CallStackId &CallStackCount) {
   NameVals.push_back(ValueID);
 
   FunctionSummary *FS = cast<FunctionSummary>(Summary);
@@ -4297,7 +4348,7 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
       /*PerModule*/ true,
       /*GetValueId*/ [&](const ValueInfo &VI) { return getValueId(VI); },
       /*GetStackIndex*/ [&](unsigned I) { return I; },
-      /*WriteContextSizeInfoIndex*/ true);
+      /*WriteContextSizeInfoIndex*/ true, CallStackPos, CallStackCount);
 
   auto SpecialRefCnts = FS->specialRefCounts();
   NameVals.push_back(getEncodedGVSummaryFlags(FS->flags()));
@@ -4530,12 +4581,52 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
   Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_ALLOC_INFO));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // nummib
-  // n x (alloc type, numstackids, numstackids x stackidindex)
+  // n x (alloc type, context radix tree index)
   // optional: nummib x (numcontext x total size)
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
   unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv));
 
+  Abbv = std::make_shared<BitCodeAbbrev>();
+  Abbv->Add(BitCodeAbbrevOp(bitc::FS_CONTEXT_RADIX_TREE_ARRAY));
+  // n x entry
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+  unsigned RadixAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
+  // First walk through all the functions and collect the allocation contexts in
+  // their associated summaries, for use in constructing a radix tree of
+  // contexts. Note that we need to do this in the same order as the functions
+  // are processed further below since the call stack positions in the resulting
+  // radix tree array are identified based on this order.
+  MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> CallStacks;
+  for (const Function &F : M) {
+    // Summary emission does not support anonymous functions, they have to
+    // renamed using the anonymous function renaming pass.
+    if (!F.hasName())
+      report_fatal_error("Unexpected anonymous function when writing summary");
+
+    ValueInfo VI = Index->getValueInfo(F.getGUID());
+    if (!VI || VI.getSummaryList().empty()) {
+      // Only declarations should not have a summary (a declaration might
+      // however have a summary if the def was in module level asm).
+      assert(F.isDeclaration());
+      continue;
+    }
+    auto *Summary = VI.getSummaryList()[0].get();
+    FunctionSummary *FS = cast<FunctionSummary>(Summary);
+    collectMemProfCallStacks(
+        FS, /*GetStackIndex*/ [&](unsigned I) { return I; }, CallStacks);
+  }
+  // Finalize the radix tree, write it out, and get the map of positions in the
+  // linearized tree array.
+  DenseMap<CallStackId, LinearCallStackId> CallStackPos;
+  if (!CallStacks.empty())
+    CallStackPos = writeMemoryProfileRadixTree(CallStacks, Stream, RadixAbbrev);
+
+  // Keep track of the current index into the CallStackPos map.
+  CallStackId CallStackCount = 0;
+
   SmallVector<uint64_t, 64> NameVals;
   // Iterate over the list of functions instead of the Index to
   // ensure the ordering is stable.
@@ -4555,7 +4646,8 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
     auto *Summary = VI.getSummaryList()[0].get();
     writePerModuleFunctionSummaryRecord(
         NameVals, Summary, VE.getValueID(&F), FSCallsRelBFAbbrev,
-        FSCallsProfileAbbrev, CallsiteAbbrev, AllocAbbrev, ContextIdAbbvId, F);
+        FSCallsProfileAbbrev, CallsiteAbbrev, AllocAbbrev, ContextIdAbbvId, F,
+        CallStackPos, CallStackCount);
   }
 
   // Capture references from GlobalVariable initializers, which are outside
@@ -4692,13 +4784,20 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
   Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_ALLOC_INFO));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // nummib
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numver
-  // nummib x (alloc type, numstackids, numstackids x stackidindex),
+  // nummib x (alloc type, context radix tree index),
   // numver x version
   // optional: nummib x total size
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
   unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv));
 
+  Abbv = std::make_shared<BitCodeAbbrev>();
+  Abbv->Add(BitCodeAbbrevOp(bitc::FS_CONTEXT_RADIX_TREE_ARRAY));
+  // n x entry
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+  unsigned RadixAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
   auto shouldImportValueAsDecl = [&](GlobalValueSummary *GVS) -> bool {
     if (DecSummaries == nullptr)
       return false;
@@ -4735,6 +4834,39 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
     NameVals.clear();
   };
 
+  // First walk through all the functions and collect the allocation contexts in
+  // their associated summaries, for use in constructing a radix tree of
+  // contexts. Note that we need to do this in the same order as the functions
+  // are processed further below since the call stack positions in the resulting
+  // radix tree array are identified based on this order.
+  MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> CallStacks;
+  forEachSummary([&](GVInfo I, bool IsAliasee) {
+    GlobalValueSummary *S = I.second;
+    assert(S);
+    auto *FS = dyn_cast<FunctionSummary>(S);
+    if (!FS)
+      return;
+    collectMemProfCallStacks(
+        FS,
+        /*GetStackIndex*/
+        [&](unsigned I) {
+          // Get the corresponding index into the list of StackIds actually
+          // being written for this combined index (which may be a subset in
+          // the case of distributed indexes).
+          assert(StackIdIndicesToIndex.contains(I));
+          return StackIdIndicesToIndex[I];
+        },
+        CallStacks);
+  });
+  // Finalize the radix tree, write it out, and get the map of positions in the
+  // linearized tree array.
+  DenseMap<CallStackId, LinearCallStackId> CallStackPos;
+  if (!CallStacks.empty())
+    CallStackPos = writeMemoryProfileRadixTree(CallStacks, Stream, RadixAbbrev);
+
+  // Keep track of the current index into the CallStackPos map.
+  CallStackId CallStackCount = 0;
+
   DenseSet<GlobalValue::GUID> DefOrUseGUIDs;
   forEachSummary([&](GVInfo I, bool IsAliasee) {
     GlobalValueSummary *S = I.second;
@@ -4813,7 +4945,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
           assert(StackIdIndicesToIndex.contains(I));
           return StackIdIndicesToIndex[I];
         },
-        /*WriteContextSizeInfoIndex*/ false);
+        /*WriteContextSizeInfoIndex*/ false, CallStackPos, CallStackCount);
 
     NameVals.push_back(*ValueId);
     assert(ModuleIdMap.count(FS->modulePath()));
diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp
index 9d5ac748d7975d..12ee3d7a53c4b1 100644
--- a/llvm/lib/ProfileData/MemProf.cpp
+++ b/llvm/lib/ProfileData/MemProf.cpp
@@ -615,6 +615,7 @@ void CallStackRadixTreeBuilder<FrameIdTy>::build(
 
 // Explicitly instantiate class with the utilized FrameIdTy.
 template class CallStackRadixTreeBuilder<FrameId>;
+template class CallStackRadixTreeBuilder<LinearFrameId>;
 
 template <typename FrameIdTy>
 llvm::DenseMap<FrameIdTy, FrameStat>
@@ -637,6 +638,10 @@ computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
 template llvm::DenseMap<FrameId, FrameStat> computeFrameHistogram<FrameId>(
     llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
         &MemProfCallStackData);
+template llvm::DenseMap<LinearFrameId, FrameStat>
+computeFrameHistogram<LinearFrameId>(
+    llvm::MapVector<CallStackId, llvm::SmallVector<LinearFrameId>>
+        &MemProfCallStackData);
 
 void verifyIndexedMemProfRecord(const IndexedMemProfRecord &Record) {
   for (const auto &AS : Record.AllocSites) {
diff --git a/llvm/test/ThinLTO/X86/Inputs/memprof-old-alloc-context-summary.bc b/llvm/test/ThinLTO/X86/Inputs/memprof-old-alloc-context-summary.bc
new file mode 100644
index 0000000000000000000000000000000000000000..c98308f4637f8425b00151cbf96acf25e9631f8d
GIT binary patch
literal 2272
zcmXw54NP0t6}}G}?hBY_lhW2+p1t=1iP#d7XG0x3Hk*G)<0*;i{A^-rCv1M4uz)ef
zfPt*S*wb at Tt1=OqSeve9K~<xv8cC6sO4$Z}jJk=JInlI63qy8+{7f{^hDO;Mbvw^A
z*E;9id*8YDp7Wh^?zPQ6z3@^wLWKySCK*?G=%;^r at 6gv%uWU(G))Try#L5s_BoHbo
zU{L|+sDN;&V#Jz~{-{WyGoB+NvTcQ=N+W%vX-KxOz;>?0)}qjr8%X7WVx2DP at XfLn
z^_%&a!g{WtB}HB;XY4OWnNJ9vF4927N7(xK`0#HFR06*Kyh;~~ZBwZC22u!}VGx3&
z(N>RCaoINfiQ)*bcvsTtDBiT)K38bdZ6~Sk6L(3=IGM3b(}Xex+6X<zot~DMo)nrA
zVg38KuL1W3`9UAAckuxqKQPVf4Y==pzVtmO)sdkFfFrrzhkXg$&ENqJ`!hvr4DM6p
z`}xTY1%CqfTJV4i2QozufaixDE-HxdfCBr6a7UI3CUBSQ+l8Rq!Ig9|IG89)F2!a4
zT-Lw#js6b{?fxBU&!0d3;EOwlb}v5op#P8mqOL#Mr5|(@(nQ|hyc5!CLU!@Zraa*J
z34{#jOliSBL4wd_ODW9hG-EPV*}-^h)As#yQu~qrB=2menjd?<#N|8mQ>P!F)IA!s
z5O06|&1dh1f4}?i0hbh^OG<<?u(pOAqGxlor<|yZ+6 at sQW*fr&LEJsoCA*8{pDU>K
zA*x1Z(<EoqH^ylXSD04|Z%Iq)Quex-q%Iwd1$=mr#QhH5=feX&NpFz%F}x~WMy<rD
z94Gg3^2If3%}LcLX4MNUeVbJ$-5L*f#&k7o%Gixl67$utIU+Ho*UEccc<?0NbCUN1
zAL4y+-si%-Rk-&_A#B?>#5a<Bmy-97@%>|=oe$Tjhgs^YX=;U&G^NJrrExl)=ksp*
z8cWZH)Ysfa|A^a-DZ6nj&#wrEQM(~3F^vkl;n1hUroT%}lY;EyHs063`+ecSNgRay
zQ1XEUuexzm{t(H(bW%%kYSALkG1Llp!BnZ|XEaHx<`zrOhcxq9dMT&=H=*$;4;v=!
zd(VeWX=mlPj=1=~AP$ZlKDZv2iwJ0buDRx>@3ZQK8TIRgMl_gJuepnk{W5Gw3#LhX
z9RTt)X<o9MqA(O<e<=n}`983R!~<2>KNfmzQ9-RtQ_D{35mzyD%c at z*y?8sPUH}6e
zD$``RZYsPbA{b(JV_Gm at nK7mW@X?%hOm&eMtm*;)8+g^7Icmiv|2zvRE??&4%Z&W7
zlkzI4MUH~uqioTGEVbmMo^TbPt+?rANPRo^;tH$2%c^HtHAL_}`&|$7j_Ins?ux`X
z8aAdIu0r&|*#WTA!T0%i)%APSDkslnsRtJMg9Mx+5Rz`FLnYrh2B{(<9LPW at LlLVn
zD*$wL5UI3fHle;YuFhID3p2E*vYDfDVtnPW$uehW!#zse?cf^|{OKWBdufy~#{~1J
zV940_LOP`ZF`0vX-saYTsW)cSM?&SEgC{s at m65Mzi&mZTb#W_dIa_p>k>@ht>;+4>
z_ayEodDW5cToFu=hZA-~8ZeX?<6(1JVg at ir1=;@6%s8DSG<hi0b8gM8kmeeJ`k9S;
z(64bY_NYv?$**%T6`|P3$j$nMqN>HL)!E*Rvlo<#7tm&?w|!oBQ+w-CUwd2N;IUVJ
z>}?AmR6*|34m*`lW?ahQ8rC&6e|W09ZLs2Z%{#ia%_XL{w`_xIMd%{%YLF$x{wk;i
zJka191N=qkj2U70E<HD1dfzIRD)_#UV=vS$x~O|jNL~4on85OTPHKH}D1`f~@|6|j
zAuGhZYVe&G3{!+ at B3w5r6r6%0rRQ_>JVP(JMSee|NxEr3AhnSU?ioV94gdn4LsUt0
zBPfM`Ge8J#L!k+f5+=w%xF1pylW6-*1wut2aoRi)Hl2q8l+-1{W+=wE08a{XGYGLD
zpOad5f@=P{n<zs`68c|o2vH!n?oY)J7tcyCEbw*)nmgNDL=xAGF{B{1GI|nUwmQ$2
zCP&&8%I5H7?UrG<B`b;w(XfkS3QBSo7@}WU$mz?p#aeqbLJ-=GvtBK>$llWW;44Do
z3_ at y<7o=$?!Bk3BAzLDTQ;?qBN^kr1>du|Ve)Gfdrw1$_uJWx<xsdG`VT+JutSu5P
z+uSp(P}%CE3brXSK{NGp<e7eJR903|xkRwK$ix&=pCUJ1v;zCcI!k_}BnVVM5ZG6V
z5CgX|LS1mSXrqo&Y>l9I#V&f|Y|YibYQFbrK=#$l>odn+!oLW-x?cV=J!S1ht?)2z
z1D(y1(JB!~ae??&>VOUciE|A=ViTqKZb!udv>yP8iS-j8FaTj7u|4}P_C at _??b&xx
z{tp^rHr9y$7sNSHiu0tn8ML2)AzML$ZT}U=jmK-6-2UFi<6d`%j at t#l6Kw&eRa<M&
z?eTC1ouNfr+v?HoaX0PJ*J$;vhT2+Vixvj0I=yyJ(4cSB*H-s<PkB50yw&Y(-kxCf
t32#qFYlqL@>8XZHsCKuvcQ#iycX|V@!9aC)Pe+H_- at mK9T|6D3{{w_#=C}X=

literal 0
HcmV?d00001

diff --git a/llvm/test/ThinLTO/X86/memprof-old-alloc-context-summary.ll b/llvm/test/ThinLTO/X86/memprof-old-alloc-context-summary.ll
new file mode 100644
index 00000000000000..20f95617915cc5
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/memprof-old-alloc-context-summary.ll
@@ -0,0 +1,28 @@
+;; Check that we can read the old *_ALLOC_INFO summary format that placed the
+;; stack id indexes directly in the alloc info summary, rather than encoding as
+;; a separate radix tree.
+;;
+;; The old bitcode was generated by the older compiler from `opt -thinlto-bc`
+;; on the following LLVM assembly:
+;;
+;; target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+;; target triple = "x86_64-unknown-linux-gnu"
+;;
+;; define internal ptr @_Z3barv() #0 {
+;; entry:
+;;   %call = call ptr @_Znam(i64 0), !memprof !1, !callsite !6
+;;   ret ptr null
+;; }
+;;
+;; declare ptr @_Znam(i64)
+;;
+;; !1 = !{!2, !4}
+;; !2 = !{!3, !"notcold"}
+;; !3 = !{i64 9086428284934609951, i64 8632435727821051414}
+;; !4 = !{!5, !"cold"}
+;; !5 = !{i64 9086428284934609951, i64 2732490490862098848}
+;; !6 = !{i64 9086428284934609951}
+
+; RUN: llvm-dis %S/Inputs/memprof-old-alloc-context-summary.bc -o - | FileCheck %s
+; CHECK: stackIds: (8632435727821051414)
+; CHECK-SAME: stackIds: (2732490490862098848)



More information about the llvm-commits mailing list