[llvm] [IR] Optimize CFI in `writeCombinedGlobalValueSummary` (PR #130382)

Vitaly Buka via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 7 19:00:19 PST 2025


https://github.com/vitalybuka created https://github.com/llvm/llvm-project/pull/130382

Before the patch,
`writeCombinedGlobalValueSummary` traversed entire
`cfiFunction*` for each module, just to pick a few
symbols from `DefOrUseGUIDs`.

Now we change internals of `cfiFunctionDefs` and
`cfiFunctionDecls` to maintain a map from GUID to StringSet.

So now we iterate `DefOrUseGUIDs`, usually small,
and pick exact subset of symbols.

Sorting is not strictly necessary, but it
preserves the order of emitted values.


>From e5af6e037f685db2c1aba1527dbaa8489d5737db Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Fri, 7 Mar 2025 19:00:09 -0800
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?=
 =?UTF-8?q?l=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4
---
 llvm/include/llvm/IR/ModuleSummaryIndex.h | 106 ++++++++++++++++++----
 llvm/lib/Bitcode/Writer/BitcodeWriter.cpp |  27 ++++--
 2 files changed, 103 insertions(+), 30 deletions(-)

diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h
index 2183675d84e84..36575cf76cbbf 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -1291,37 +1291,86 @@ struct TypeIdSummary {
 };
 
 class CfiFunctionIndex {
-  std::set<std::string, std::less<>> Index;
+  DenseMap<GlobalValue::GUID, std::set<std::string, std::less<>>> Index;
+  using IndexIterator =
+      DenseMap<GlobalValue::GUID,
+               std::set<std::string, std::less<>>>::const_iterator;
+  using NestedIterator = std::set<std::string, std::less<>>::const_iterator;
 
 public:
-  class GUIDIterator
-      : public iterator_adaptor_base<
-            GUIDIterator, std::set<std::string, std::less<>>::const_iterator,
-            std::forward_iterator_tag, GlobalValue::GUID> {
-    using base = iterator_adaptor_base<
-        GUIDIterator, std::set<std::string, std::less<>>::const_iterator,
-        std::forward_iterator_tag, GlobalValue::GUID>;
+  // Iterates keys of the DenseMap.
+  class GUIDIterator : public iterator_adaptor_base<GUIDIterator, IndexIterator,
+                                                    std::forward_iterator_tag,
+                                                    GlobalValue::GUID> {
+    using base = GUIDIterator::iterator_adaptor_base;
 
   public:
     GUIDIterator() = default;
-    explicit GUIDIterator(std::set<std::string, std::less<>>::const_iterator I)
-        : base(std::move(I)) {}
+    explicit GUIDIterator(IndexIterator I) : base(I) {}
 
-    GlobalValue::GUID operator*() const {
-      return GlobalValue::getGUID(
-          GlobalValue::dropLLVMManglingEscape(*this->wrapped()));
+    GlobalValue::GUID operator*() const { return this->wrapped()->first; }
+  };
+
+  // Iterates merged values of the DenseMap.
+  class SymbolIterator
+      : public iterator_facade_base<SymbolIterator, std::forward_iterator_tag,
+                                    std::string> {
+    using base = SymbolIterator::iterator_facade_base;
+
+    IndexIterator Outer;
+    IndexIterator OuterEnd;
+    NestedIterator Inner;
+
+  public:
+    SymbolIterator() = default;
+    SymbolIterator(IndexIterator Outer, IndexIterator OuterEnd)
+        : Outer(Outer), OuterEnd(OuterEnd),
+          Inner(Outer != OuterEnd ? Outer->second.begin() : NestedIterator{}) {}
+    SymbolIterator(SymbolIterator &R) = default;
+
+    const std::string &operator*() const { return *Inner; }
+
+    SymbolIterator &operator++() {
+      // `DenseMap` never contains empty sets. So:
+      // 1. `Outer` points to non-empty set, if `Outer` != `OuterEnd`.
+      // 2. `Inner` always is valid and dereferenceable, if `Outer` !=
+      // `OuterEnd`.
+      assert(Outer != OuterEnd);
+      assert(!Outer->second.empty());
+      ++Inner;
+      if (Inner == Outer->second.end()) {
+        ++Outer;
+        Inner = Outer != OuterEnd ? Outer->second.begin() : NestedIterator{};
+      }
+      return *this;
+    }
+
+    SymbolIterator &operator=(const SymbolIterator &R) {
+      if (this != &R) {
+        Outer = R.Outer;
+        OuterEnd = R.OuterEnd;
+        Inner = R.Inner;
+      }
+      return *this;
+    }
+
+    bool operator==(const SymbolIterator &R) const {
+      return Outer == R.Outer && Inner == R.Inner;
     }
   };
 
   CfiFunctionIndex() = default;
-  template <typename It> CfiFunctionIndex(It B, It E) : Index(B, E) {}
+  template <typename It> CfiFunctionIndex(It B, It E) {
+    for (; B != E; ++B)
+      emplace(*B);
+  }
 
-  std::set<std::string, std::less<>>::const_iterator begin() const {
-    return Index.begin();
+  SymbolIterator begin() const {
+    return SymbolIterator(Index.begin(), Index.end());
   }
 
-  std::set<std::string, std::less<>>::const_iterator end() const {
-    return Index.end();
+  SymbolIterator end() const {
+    return SymbolIterator(Index.end(), Index.end());
   }
 
   GUIDIterator guid_begin() const { return GUIDIterator(Index.begin()); }
@@ -1330,11 +1379,28 @@ class CfiFunctionIndex {
     return make_range(guid_begin(), guid_end());
   }
 
+  iterator_range<NestedIterator> forGuid(GlobalValue::GUID GUID) const {
+    auto I = Index.find(GUID);
+    if (I == Index.end())
+      return make_range(NestedIterator{}, NestedIterator{});
+    return make_range(I->second.begin(), I->second.end());
+  }
+
   template <typename... Args> void emplace(Args &&...A) {
-    Index.emplace(std::forward<Args>(A)...);
+    StringRef S(std::forward<Args>(A)...);
+    GlobalValue::GUID GUID =
+        GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(S));
+    Index[GUID].emplace(S);
   }
 
-  size_t count(StringRef S) const { return Index.count(S); }
+  size_t count(StringRef S) const {
+    GlobalValue::GUID GUID =
+        GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(S));
+    auto I = Index.find(GUID);
+    if (I == Index.end())
+      return 0;
+    return I->second.count(S);
+  }
 };
 
 /// 160 bits SHA1
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index bddc2cd2180b1..a7f72c7588899 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -5064,28 +5064,35 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
       getReferencedTypeIds(FS, ReferencedTypeIds);
   }
 
-  for (auto &S : Index.cfiFunctionDefs()) {
-    if (DefOrUseGUIDs.contains(
-            GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(S)))) {
+  SmallVector<StringRef, 4> Functions;
+  for (GlobalValue::GUID GUID : DefOrUseGUIDs) {
+    auto Defs = Index.cfiFunctionDefs().forGuid(GUID);
+    Functions.insert(Functions.end(), Defs.begin(), Defs.end());
+  }
+  if (!Functions.empty()) {
+    std::sort(Functions.begin(), Functions.end());
+    for (const auto &S : Functions) {
       NameVals.push_back(StrtabBuilder.add(S));
       NameVals.push_back(S.size());
     }
-  }
-  if (!NameVals.empty()) {
     Stream.EmitRecord(bitc::FS_CFI_FUNCTION_DEFS, NameVals);
     NameVals.clear();
+    Functions.clear();
   }
 
-  for (auto &S : Index.cfiFunctionDecls()) {
-    if (DefOrUseGUIDs.contains(
-            GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(S)))) {
+  for (GlobalValue::GUID GUID : DefOrUseGUIDs) {
+    auto Decls = Index.cfiFunctionDecls().forGuid(GUID);
+    Functions.insert(Functions.end(), Decls.begin(), Decls.end());
+  }
+  if (!Functions.empty()) {
+    std::sort(Functions.begin(), Functions.end());
+    for (const auto &S : Functions) {
       NameVals.push_back(StrtabBuilder.add(S));
       NameVals.push_back(S.size());
     }
-  }
-  if (!NameVals.empty()) {
     Stream.EmitRecord(bitc::FS_CFI_FUNCTION_DECLS, NameVals);
     NameVals.clear();
+    Functions.clear();
   }
 
   // Walk the GUIDs that were referenced, and write the



More information about the llvm-commits mailing list