[llvm] [MemProf] Determine stack id references in BitcodeWriter without sorting (PR #94285)

Teresa Johnson via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 3 15:06:03 PDT 2024


https://github.com/teresajohnson created https://github.com/llvm/llvm-project/pull/94285

A cycle profile of a thin link showed a lot of time spent in sort called
from the BitcodeWriter, which was being used to compute the unique
references to stack ids in the summaries emitted for each backend in a
distributed thinlto build. We were also frequently invoking lower_bound
to locate stack id indices in the resulting vector when writing out the
referencing memprof records.

Change this to use a map to uniquify the references, and to hold the
index of the corresponding stack id in the StackIds vector, which is
now populated at the same time.

This reduced the time of a large thin link by about 10%.

>From eba69aa102319ab63382ece23fe146036d9dc361 Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson at google.com>
Date: Mon, 3 Jun 2024 14:48:24 -0700
Subject: [PATCH] [MemProf] Determine stack id references in BitcodeWriter
 without sorting

A cycle profile of a thin link showed a lot of time spent in sort called
from the BitcodeWriter, which was being used to compute the unique
references to stack ids in the summaries emitted for each backend in a
distributed thinlto build. We were also frequently invoking lower_bound
to locate stack id indices in the resulting vector when writing out the
referencing memprof records.

Change this to use a map to uniquify the references, and to hold the
index of the corresponding stack id in the StackIds vector, which is
now populated at the same time.

This reduced the time of a large thin link by about 10%.
---
 llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 52 ++++++++++++++---------
 1 file changed, 31 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 9016af7d6e05d..a2007a268b5cd 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -437,10 +437,14 @@ class IndexBitcodeWriter : public BitcodeWriterBase {
   /// index and a value id generated by this class to use in references.
   std::map<GlobalValue::GUID, unsigned> GUIDToValueIdMap;
 
-  // The sorted stack id indices actually used in the summary entries being
-  // written, which will be a subset of those in the full index in the case of
-  // distributed indexes.
-  std::vector<unsigned> StackIdIndices;
+  // The stack ids used by this index, which will be a subset of those in
+  // the full index in the case of distributed indexes.
+  std::vector<uint64_t> StackIds;
+
+  // Keep a map of the stack id indices used by records being written for this
+  // index to the index of the corresponding stack id in the above StackIds
+  // vector. Ensures we write each referenced stack id once.
+  DenseMap<unsigned, unsigned> StackIdIndicesToIndex;
 
   /// Tracks the last value id recorded in the GUIDToValueMap.
   unsigned GlobalValueId = 0;
@@ -464,6 +468,19 @@ class IndexBitcodeWriter : public BitcodeWriterBase {
       : BitcodeWriterBase(Stream, StrtabBuilder), Index(Index),
         DecSummaries(DecSummaries),
         ModuleToSummariesForIndex(ModuleToSummariesForIndex) {
+
+    // See if the StackIdIndex was already added to the StackId map and
+    // vector. If not, record it.
+    auto RecordStackIdReference = [&](unsigned StackIdIndex) {
+      // If the StackIdIndex is not yet in the map, the below insert ensures
+      // that it will point to the new StackIds vector entry we push to just
+      // below.
+      auto Inserted =
+          StackIdIndicesToIndex.insert({StackIdIndex, StackIds.size()});
+      if (Inserted.second)
+        StackIds.push_back(Index.getStackIdAtIndex(StackIdIndex));
+    };
+
     // Assign unique value ids to all summaries to be written, for use
     // in writing out the call graph edges. Save the mapping from GUID
     // to the new global value id to use when writing those edges, which
@@ -494,17 +511,13 @@ class IndexBitcodeWriter : public BitcodeWriterBase {
           continue;
         }
         for (auto Idx : CI.StackIdIndices)
-          StackIdIndices.push_back(Idx);
+          RecordStackIdReference(Idx);
       }
       for (auto &AI : FS->allocs())
         for (auto &MIB : AI.MIBs)
           for (auto Idx : MIB.StackIdIndices)
-            StackIdIndices.push_back(Idx);
+            RecordStackIdReference(Idx);
     });
-    llvm::sort(StackIdIndices);
-    StackIdIndices.erase(
-        std::unique(StackIdIndices.begin(), StackIdIndices.end()),
-        StackIdIndices.end());
   }
 
   /// The below iterator returns the GUID and associated summary.
@@ -4492,18 +4505,15 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
                       ArrayRef<uint64_t>{GVI.second, GVI.first});
   }
 
-  if (!StackIdIndices.empty()) {
+  // Write the stack ids used by this index, which will be a subset of those in
+  // the full index in the case of distributed indexes.
+  if (!StackIds.empty()) {
     auto StackIdAbbv = std::make_shared<BitCodeAbbrev>();
     StackIdAbbv->Add(BitCodeAbbrevOp(bitc::FS_STACK_IDS));
     // numids x stackid
     StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
     StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
     unsigned StackIdAbbvId = Stream.EmitAbbrev(std::move(StackIdAbbv));
-    // Write the stack ids used by this index, which will be a subset of those in
-    // the full index in the case of distributed indexes.
-    std::vector<uint64_t> StackIds;
-    for (auto &I : StackIdIndices)
-      StackIds.push_back(Index.getStackIdAtIndex(I));
     Stream.EmitRecord(bitc::FS_STACK_IDS, StackIds, StackIdAbbvId);
   }
 
@@ -4669,11 +4679,11 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
           return *ValueID;
         },
         /*GetStackIndex*/ [&](unsigned I) {
-          // Get the corresponding index into the list of StackIdIndices
-          // actually being written for this combined index (which may be a
-          // subset in the case of distributed indexes).
-          auto Lower = llvm::lower_bound(StackIdIndices, I);
-          return std::distance(StackIdIndices.begin(), Lower);
+          // Get the corresponding index into the list of StackIds actually
+          // being written for this combined index (which may be a subset in
+          // the case of distributed indexes).
+          assert(StackIdIndicesToIndex.contains(I));
+          return StackIdIndicesToIndex[I];
         });
 
     NameVals.push_back(*ValueId);



More information about the llvm-commits mailing list