[clang-tools-extra] Improve loading speed of indexes used by clangd (PR #156185)
via cfe-commits
cfe-commits at lists.llvm.org
Sat Aug 30 07:33:19 PDT 2025
https://github.com/JVApen created https://github.com/llvm/llvm-project/pull/156185
Fixes clangd/clangd#2122
Use an alternative builder implementation in cases were no duplicate entries are expected. In practice, this is when loading a previously created index as the deduplicaton happened while making that one. By using a simple std::vector over a DenseSet, we don't have the overhead of keeping all entries sorted.
For our use-case, this reduces the total loading time from 1 minute 15 seconds to 1 minute and reduces the CPU seconds from 50 to 30.
>From 14eed6410c1183f67974c62cf9ca0674e0a477af Mon Sep 17 00:00:00 2001
From: JVApen <vanantwerpenjeroen at gmail.com>
Date: Sat, 30 Aug 2025 16:29:54 +0200
Subject: [PATCH] Fixes clangd/clangd#2122
Use an alternative builder implementation in cases were no duplicate entries are expected.
In practice, this is when loading a previously created index as the deduplicaton happened while making that one.
By using a simple std::vector over a DenseSet, we don't have the overhead of keeping all entries sorted.
For our use-case, this reduces the total loading time from 1 minute 15 seconds to 1 minute and reduces the CPU seconds from 50 to 30.
---
clang-tools-extra/clangd/index/FileIndex.cpp | 2 +-
clang-tools-extra/clangd/index/Ref.cpp | 27 +++++++++++++++
clang-tools-extra/clangd/index/Ref.h | 33 +++++++++++++++++++
.../clangd/index/Serialization.cpp | 2 +-
.../clangd/index/YAMLSerialization.cpp | 2 +-
5 files changed, 63 insertions(+), 3 deletions(-)
diff --git a/clang-tools-extra/clangd/index/FileIndex.cpp b/clang-tools-extra/clangd/index/FileIndex.cpp
index c49de377d54ca..8fdcfce06656b 100644
--- a/clang-tools-extra/clangd/index/FileIndex.cpp
+++ b/clang-tools-extra/clangd/index/FileIndex.cpp
@@ -204,7 +204,7 @@ FileShardedIndex::getShard(llvm::StringRef Uri) const {
SymB.insert(*S);
IF.Symbols = std::move(SymB).build();
- RefSlab::Builder RefB;
+ RefSlab::BuilderExpectUnique RefB;
for (const auto *Ref : It->getValue().Refs) {
auto SID = RefToSymID.lookup(Ref);
RefB.insert(SID, *Ref);
diff --git a/clang-tools-extra/clangd/index/Ref.cpp b/clang-tools-extra/clangd/index/Ref.cpp
index e622a3f78e42e..5640199e45175 100644
--- a/clang-tools-extra/clangd/index/Ref.cpp
+++ b/clang-tools-extra/clangd/index/Ref.cpp
@@ -64,5 +64,32 @@ RefSlab RefSlab::Builder::build() && {
return RefSlab(std::move(Result), std::move(Arena), Entries.size());
}
+void RefSlab::BuilderExpectUnique::insert(const SymbolID &ID, const Ref &S) {
+ Entry E = {ID, S};
+ E.Reference.Location.FileURI = UniqueStrings.save(S.Location.FileURI).data();
+ Entries.emplace_back(std::move(E));
+}
+
+RefSlab RefSlab::BuilderExpectUnique::build() && {
+ std::vector<std::pair<SymbolID, llvm::ArrayRef<Ref>>> Result;
+ // We'll reuse the arena, as it only has unique strings and we need them all.
+ // We need to group refs by symbol and form contiguous arrays on the arena.
+ // Group by SymbolID.
+ std::sort(Entries.begin(), Entries.end());
+ Entries.erase(std::unique(Entries.begin(), Entries.end()), Entries.end());
+ std::vector<Ref> Refs;
+ // Loop over symbols, copying refs for each onto the arena.
+ for (auto I = Entries.begin(), End = Entries.end(); I != End;) {
+ SymbolID Sym = I->Symbol;
+ Refs.clear();
+ do {
+ Refs.push_back(I->Reference);
+ ++I;
+ } while (I != End && I->Symbol == Sym);
+ Result.emplace_back(Sym, llvm::ArrayRef<Ref>(Refs).copy(Arena));
+ }
+ return RefSlab(std::move(Result), std::move(Arena), Entries.size());
+}
+
} // namespace clangd
} // namespace clang
diff --git a/clang-tools-extra/clangd/index/Ref.h b/clang-tools-extra/clangd/index/Ref.h
index 870f77f56e6cb..9a7e7aadba9f5 100644
--- a/clang-tools-extra/clangd/index/Ref.h
+++ b/clang-tools-extra/clangd/index/Ref.h
@@ -132,6 +132,8 @@ class RefSlab {
}
/// RefSlab::Builder is a mutable container that can 'freeze' to RefSlab.
+ /// This variant is optimized to receive duplicate symbols.
+ /// Use this when parsing files
class Builder {
public:
Builder() : UniqueStrings(Arena) {}
@@ -154,6 +156,37 @@ class RefSlab {
llvm::DenseSet<Entry> Entries;
};
+ /// RefSlab::Builder is a mutable container that can 'freeze' to RefSlab.
+ /// This variant is optimized to receive unique symbols
+ /// Use this when reading symbols which where previously written in a file.
+ class BuilderExpectUnique {
+ public:
+ BuilderExpectUnique() : UniqueStrings(Arena) {}
+ /// Adds a ref to the slab. Deep copy: Strings will be owned by the slab.
+ void insert(const SymbolID &ID, const Ref &S);
+ /// Consumes the builder to finalize the slab.
+ RefSlab build() &&;
+ private:
+ // A ref we're storing with its symbol to consume with build().
+ // All strings are interned, so DenseMapInfo can use pointer comparisons.
+ struct Entry {
+ SymbolID Symbol;
+ Ref Reference;
+ friend bool operator<(const Entry &L, const Entry &R) noexcept {
+ return std::tie(L.Symbol, L.Reference) <
+ std::tie(R.Symbol, R.Reference);
+ }
+ friend bool operator==(const Entry &L, const Entry &R) noexcept {
+ return std::tie(L.Symbol, L.Reference) ==
+ std::tie(R.Symbol, R.Reference);
+ }
+ };
+ friend struct llvm::DenseMapInfo<Entry>;
+ llvm::BumpPtrAllocator Arena;
+ llvm::UniqueStringSaver UniqueStrings; // Contents on the arena.
+ std::vector<Entry> Entries;
+ };
+
private:
RefSlab(std::vector<value_type> Refs, llvm::BumpPtrAllocator Arena,
size_t NumRefs)
diff --git a/clang-tools-extra/clangd/index/Serialization.cpp b/clang-tools-extra/clangd/index/Serialization.cpp
index f03839599612c..d21e16c7a33dd 100644
--- a/clang-tools-extra/clangd/index/Serialization.cpp
+++ b/clang-tools-extra/clangd/index/Serialization.cpp
@@ -516,7 +516,7 @@ llvm::Expected<IndexFileIn> readRIFF(llvm::StringRef Data,
}
if (Chunks.count("refs")) {
Reader RefsReader(Chunks.lookup("refs"));
- RefSlab::Builder Refs;
+ RefSlab::BuilderExpectUnique Refs;
while (!RefsReader.eof()) {
auto RefsBundle = readRefs(RefsReader, Strings->Strings);
for (const auto &Ref : RefsBundle.second) // FIXME: bulk insert?
diff --git a/clang-tools-extra/clangd/index/YAMLSerialization.cpp b/clang-tools-extra/clangd/index/YAMLSerialization.cpp
index 495d8a2ff487a..eb90f167c1b51 100644
--- a/clang-tools-extra/clangd/index/YAMLSerialization.cpp
+++ b/clang-tools-extra/clangd/index/YAMLSerialization.cpp
@@ -473,7 +473,7 @@ void writeYAML(const IndexFileOut &O, llvm::raw_ostream &OS) {
llvm::Expected<IndexFileIn> readYAML(llvm::StringRef Data,
SymbolOrigin Origin) {
SymbolSlab::Builder Symbols;
- RefSlab::Builder Refs;
+ RefSlab::BuilderExpectUnique Refs;
RelationSlab::Builder Relations;
llvm::BumpPtrAllocator
Arena; // store the underlying data of Position::FileURI.
More information about the cfe-commits
mailing list