[clang-tools-extra] [clangd] Add background index format support to clangd-indexer (PR #175209)
Jason Williams via cfe-commits
cfe-commits at lists.llvm.org
Mon Jan 26 09:00:42 PST 2026
https://github.com/jasonwilliams updated https://github.com/llvm/llvm-project/pull/175209
>From 9869a5bc33271c741ee35703fd984f00c9d9af79 Mon Sep 17 00:00:00 2001
From: jasonwilliams <jase.williams at gmail.com>
Date: Fri, 9 Jan 2026 17:23:51 +0000
Subject: [PATCH 1/6] [clangd] Add background index format support to
clangd-indexer
Add support for generating background index shards directly from clangd-indexer, enabling offline pre-indexing of projects for use with clangd's background index.
---
.../clangd/index/Serialization.h | 5 +-
.../clangd/indexer/IndexerMain.cpp | 205 ++++++++++++++++--
2 files changed, 191 insertions(+), 19 deletions(-)
diff --git a/clang-tools-extra/clangd/index/Serialization.h b/clang-tools-extra/clangd/index/Serialization.h
index bf8e036afcb6c..1553e702a5881 100644
--- a/clang-tools-extra/clangd/index/Serialization.h
+++ b/clang-tools-extra/clangd/index/Serialization.h
@@ -35,8 +35,9 @@ namespace clang {
namespace clangd {
enum class IndexFileFormat {
- RIFF, // Versioned binary format, suitable for production use.
- YAML, // Human-readable format, suitable for experiments and debugging.
+ RIFF, // Versioned binary format, suitable for production use.
+ YAML, // Human-readable format, suitable for experiments and debugging.
+ BACKGROUND // Background index format, suitable for language server use.
};
// Holds the contents of an index file that was read.
diff --git a/clang-tools-extra/clangd/indexer/IndexerMain.cpp b/clang-tools-extra/clangd/indexer/IndexerMain.cpp
index bc5d1a7408991..5c9e540fee0ba 100644
--- a/clang-tools-extra/clangd/indexer/IndexerMain.cpp
+++ b/clang-tools-extra/clangd/indexer/IndexerMain.cpp
@@ -12,6 +12,8 @@
#include "CompileCommands.h"
#include "Compiler.h"
+#include "GlobalCompilationDatabase.h"
+#include "index/Background.h"
#include "index/IndexAction.h"
#include "index/Merge.h"
#include "index/Ref.h"
@@ -30,13 +32,14 @@ namespace clang {
namespace clangd {
namespace {
-static llvm::cl::opt<IndexFileFormat>
- Format("format", llvm::cl::desc("Format of the index to be written"),
- llvm::cl::values(clEnumValN(IndexFileFormat::YAML, "yaml",
- "human-readable YAML format"),
- clEnumValN(IndexFileFormat::RIFF, "binary",
- "binary RIFF format")),
- llvm::cl::init(IndexFileFormat::RIFF));
+static llvm::cl::opt<IndexFileFormat> Format(
+ "format", llvm::cl::desc("Format of the index to be written"),
+ llvm::cl::values(
+ clEnumValN(IndexFileFormat::YAML, "yaml", "human-readable YAML format"),
+ clEnumValN(IndexFileFormat::RIFF, "binary", "binary RIFF format"),
+ clEnumValN(IndexFileFormat::BACKGROUND, "background",
+ "background index format for language servers")),
+ llvm::cl::init(IndexFileFormat::RIFF));
static llvm::cl::list<std::string> QueryDriverGlobs{
"query-driver",
@@ -48,6 +51,16 @@ static llvm::cl::list<std::string> QueryDriverGlobs{
llvm::cl::CommaSeparated,
};
+static llvm::cl::opt<std::string> ProjectRoot{
+ "project-root",
+ llvm::cl::desc(
+ "Path to the project root for --format=background. "
+ "Determines where to store index shards. Shards are stored in "
+ "<project-root>/.cache/clangd/index/. "
+ "Defaults to current directory if not specified."),
+};
+
+// Action factory that merges all symbols into a single index (for YAML/RIFF).
class IndexActionFactory : public tooling::FrontendActionFactory {
public:
IndexActionFactory(IndexFileIn &Result) : Result(Result) {}
@@ -123,6 +136,117 @@ class IndexActionFactory : public tooling::FrontendActionFactory {
RelationSlab::Builder Relations;
};
+// Action factory that writes per-file shards (for background index format).
+class BackgroundIndexActionFactory : public tooling::FrontendActionFactory {
+public:
+ BackgroundIndexActionFactory(BackgroundIndexStorage &Storage)
+ : Storage(Storage), Symbols(std::make_unique<SymbolSlab::Builder>()),
+ Refs(std::make_unique<RefSlab::Builder>()),
+ Relations(std::make_unique<RelationSlab::Builder>()) {}
+
+ std::unique_ptr<FrontendAction> create() override {
+ SymbolCollector::Options Opts;
+ Opts.CountReferences = true;
+ Opts.FileFilter = [&](const SourceManager &SM, FileID FID) {
+ const auto F = SM.getFileEntryRefForID(FID);
+ if (!F)
+ return false;
+ auto AbsPath = getCanonicalPath(*F, SM.getFileManager());
+ if (!AbsPath)
+ return false;
+ std::lock_guard<std::mutex> Lock(FilesMu);
+ return Files.insert(*AbsPath).second;
+ };
+ return createStaticIndexingAction(
+ Opts,
+ [&](SymbolSlab S) {
+ std::lock_guard<std::mutex> Lock(SymbolsMu);
+ for (const auto &Sym : S) {
+ if (const auto *Existing = Symbols->find(Sym.ID))
+ Symbols->insert(mergeSymbol(*Existing, Sym));
+ else
+ Symbols->insert(Sym);
+ }
+ },
+ [&](RefSlab S) {
+ std::lock_guard<std::mutex> Lock(RefsMu);
+ for (const auto &Sym : S) {
+ for (const auto &Ref : Sym.second)
+ Refs->insert(Sym.first, Ref);
+ }
+ },
+ [&](RelationSlab S) {
+ std::lock_guard<std::mutex> Lock(RelsMu);
+ for (const auto &R : S)
+ Relations->insert(R);
+ },
+ /*IncludeGraphCallback=*/nullptr);
+ }
+
+ bool runInvocation(std::shared_ptr<CompilerInvocation> Invocation,
+ FileManager *Files,
+ std::shared_ptr<PCHContainerOperations> PCHContainerOps,
+ DiagnosticConsumer *DiagConsumer) override {
+ disableUnsupportedOptions(*Invocation);
+
+ // Get the main file path before running.
+ std::string MainFile;
+ if (!Invocation->getFrontendOpts().Inputs.empty())
+ MainFile = Invocation->getFrontendOpts().Inputs[0].getFile().str();
+
+ bool Success = tooling::FrontendActionFactory::runInvocation(
+ std::move(Invocation), Files, std::move(PCHContainerOps), DiagConsumer);
+
+ // After processing, write a shard for this file.
+ if (Success && !MainFile.empty())
+ writeShardForFile(MainFile);
+
+ return Success;
+ }
+
+private:
+ void writeShardForFile(llvm::StringRef MainFile) {
+ IndexFileIn Data;
+ {
+ std::lock_guard<std::mutex> Lock(SymbolsMu);
+ Data.Symbols = std::move(*Symbols).build();
+ Symbols = std::make_unique<SymbolSlab::Builder>();
+ }
+ {
+ std::lock_guard<std::mutex> Lock(RefsMu);
+ Data.Refs = std::move(*Refs).build();
+ Refs = std::make_unique<RefSlab::Builder>();
+ }
+ {
+ std::lock_guard<std::mutex> Lock(RelsMu);
+ Data.Relations = std::move(*Relations).build();
+ Relations = std::make_unique<RelationSlab::Builder>();
+ }
+
+ IndexFileOut Out(Data);
+ Out.Format = IndexFileFormat::RIFF; // Shards use RIFF format.
+
+ if (auto Err = Storage.storeShard(MainFile, Out)) {
+ elog("Failed to write shard for {0}: {1}", MainFile, std::move(Err));
+ } else {
+ std::lock_guard<std::mutex> Lock(FilesMu);
+ ++ShardsWritten;
+ log("Wrote shard for {0} ({1} total)", MainFile, ShardsWritten);
+ }
+ }
+
+ BackgroundIndexStorage &Storage;
+ std::mutex FilesMu;
+ llvm::StringSet<> Files;
+ unsigned ShardsWritten = 0;
+ std::mutex SymbolsMu;
+ std::unique_ptr<SymbolSlab::Builder> Symbols;
+ std::mutex RefsMu;
+ std::unique_ptr<RefSlab::Builder> Refs;
+ std::mutex RelsMu;
+ std::unique_ptr<RelationSlab::Builder> Relations;
+};
+
} // namespace
} // namespace clangd
} // namespace clang
@@ -141,6 +265,13 @@ int main(int argc, const char **argv) {
$ clangd-indexer File1.cpp File2.cpp ... FileN.cpp > clangd.dex
+ Example usage for background index format (writes shards to disk):
+
+ $ clangd-indexer --format=background --executor=all-TUs build/
+
+ This writes index shards to .cache/clangd/index/ in the current directory.
+ Use --project-root to specify a different location for the shards.
+
Note: only symbols from header files will be indexed.
)";
@@ -152,23 +283,63 @@ int main(int argc, const char **argv) {
return 1;
}
- // Collect symbols found in each translation unit, merging as we go.
- clang::clangd::IndexFileIn Data;
auto Mangler = std::make_shared<clang::clangd::CommandMangler>(
clang::clangd::CommandMangler::detect());
Mangler->SystemIncludeExtractor = clang::clangd::getSystemIncludeExtractor(
static_cast<llvm::ArrayRef<std::string>>(
clang::clangd::QueryDriverGlobs));
+
+ auto Adjuster = clang::tooling::ArgumentsAdjuster(
+ [Mangler = std::move(Mangler)](const std::vector<std::string> &Args,
+ llvm::StringRef File) {
+ clang::tooling::CompileCommand Cmd;
+ Cmd.CommandLine = Args;
+ Mangler->operator()(Cmd, File);
+ return Cmd.CommandLine;
+ });
+
+ // Handle background index format separately - writes per-file shards.
+ if (clang::clangd::Format == clang::clangd::IndexFileFormat::BACKGROUND) {
+ // Default to current directory if --project-root not specified.
+ std::string Root = clang::clangd::ProjectRoot;
+ if (Root.empty()) {
+ llvm::SmallString<256> CurrentDir;
+ if (auto EC = llvm::sys::fs::current_path(CurrentDir)) {
+ llvm::errs() << "Error: Failed to get current directory: "
+ << EC.message() << "\n";
+ return 1;
+ }
+ Root = std::string(CurrentDir);
+ }
+
+ // Create storage factory for disk-backed index shards.
+ auto IndexStorageFactory =
+ clang::clangd::BackgroundIndexStorage::createDiskBackedStorageFactory(
+ [Root](clang::clangd::PathRef) {
+ return clang::clangd::ProjectInfo{Root};
+ });
+
+ // Get storage for the project root.
+ clang::clangd::BackgroundIndexStorage *Storage = IndexStorageFactory(Root);
+
+ auto Err = Executor->get()->execute(
+ std::make_unique<clang::clangd::BackgroundIndexActionFactory>(*Storage),
+ std::move(Adjuster));
+ if (Err) {
+ clang::clangd::elog("{0}", std::move(Err));
+ return 1;
+ }
+
+ llvm::errs() << "Background index shards written to " << Root
+ << "/.cache/clangd/index/\n";
+ return 0;
+ }
+
+ // Standard mode: collect and merge symbols, then emit to stdout.
+ clang::clangd::IndexFileIn Data;
auto Err = Executor->get()->execute(
std::make_unique<clang::clangd::IndexActionFactory>(Data),
- clang::tooling::ArgumentsAdjuster(
- [Mangler = std::move(Mangler)](const std::vector<std::string> &Args,
- llvm::StringRef File) {
- clang::tooling::CompileCommand Cmd;
- Cmd.CommandLine = Args;
- Mangler->operator()(Cmd, File);
- return Cmd.CommandLine;
- }));
+ std::move(Adjuster));
if (Err) {
clang::clangd::elog("{0}", std::move(Err));
}
>From 6b761f2fa859c353ff32932ea055d7931f39a468 Mon Sep 17 00:00:00 2001
From: jasonwilliams <jase.williams at gmail.com>
Date: Sun, 11 Jan 2026 15:01:40 +0000
Subject: [PATCH 2/6] handle background case for serialization
---
clang-tools-extra/clangd/index/Serialization.cpp | 2 ++
1 file changed, 2 insertions(+)
diff --git a/clang-tools-extra/clangd/index/Serialization.cpp b/clang-tools-extra/clangd/index/Serialization.cpp
index f03839599612c..10388b1948f43 100644
--- a/clang-tools-extra/clangd/index/Serialization.cpp
+++ b/clang-tools-extra/clangd/index/Serialization.cpp
@@ -686,6 +686,8 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O) {
case IndexFileFormat::YAML:
writeYAML(O, OS);
break;
+ case IndexFileFormat::BACKGROUND:
+ llvm_unreachable("BACKGROUND format not supported for serialization");
}
return OS;
}
>From 17b72a6156af495fc2ee8ed4babbc2e40c778c7d Mon Sep 17 00:00:00 2001
From: jasonwilliams <jase.williams at gmail.com>
Date: Fri, 23 Jan 2026 10:43:13 +0000
Subject: [PATCH 3/6] change name to sharded instead of background
---
.../clangd/index/Serialization.cpp | 4 ++--
.../clangd/index/Serialization.h | 6 ++---
.../clangd/indexer/IndexerMain.cpp | 22 +++++++++----------
3 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/clang-tools-extra/clangd/index/Serialization.cpp b/clang-tools-extra/clangd/index/Serialization.cpp
index 10388b1948f43..fbb672f184dd2 100644
--- a/clang-tools-extra/clangd/index/Serialization.cpp
+++ b/clang-tools-extra/clangd/index/Serialization.cpp
@@ -686,8 +686,8 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O) {
case IndexFileFormat::YAML:
writeYAML(O, OS);
break;
- case IndexFileFormat::BACKGROUND:
- llvm_unreachable("BACKGROUND format not supported for serialization");
+ case IndexFileFormat::SHARDED:
+ llvm_unreachable("SHARDED format not supported for serialization");
}
return OS;
}
diff --git a/clang-tools-extra/clangd/index/Serialization.h b/clang-tools-extra/clangd/index/Serialization.h
index 1553e702a5881..d0939e051ecaf 100644
--- a/clang-tools-extra/clangd/index/Serialization.h
+++ b/clang-tools-extra/clangd/index/Serialization.h
@@ -35,9 +35,9 @@ namespace clang {
namespace clangd {
enum class IndexFileFormat {
- RIFF, // Versioned binary format, suitable for production use.
- YAML, // Human-readable format, suitable for experiments and debugging.
- BACKGROUND // Background index format, suitable for language server use.
+ RIFF, // Versioned binary format, suitable for production use.
+ YAML, // Human-readable format, suitable for experiments and debugging.
+ SHARDED // Sharded index format, suitable for language server use.
};
// Holds the contents of an index file that was read.
diff --git a/clang-tools-extra/clangd/indexer/IndexerMain.cpp b/clang-tools-extra/clangd/indexer/IndexerMain.cpp
index 5c9e540fee0ba..52732eea18568 100644
--- a/clang-tools-extra/clangd/indexer/IndexerMain.cpp
+++ b/clang-tools-extra/clangd/indexer/IndexerMain.cpp
@@ -37,8 +37,8 @@ static llvm::cl::opt<IndexFileFormat> Format(
llvm::cl::values(
clEnumValN(IndexFileFormat::YAML, "yaml", "human-readable YAML format"),
clEnumValN(IndexFileFormat::RIFF, "binary", "binary RIFF format"),
- clEnumValN(IndexFileFormat::BACKGROUND, "background",
- "background index format for language servers")),
+ clEnumValN(IndexFileFormat::SHARDED, "sharded",
+ "Sharded index format for language servers")),
llvm::cl::init(IndexFileFormat::RIFF));
static llvm::cl::list<std::string> QueryDriverGlobs{
@@ -54,7 +54,7 @@ static llvm::cl::list<std::string> QueryDriverGlobs{
static llvm::cl::opt<std::string> ProjectRoot{
"project-root",
llvm::cl::desc(
- "Path to the project root for --format=background. "
+ "Path to the project root for --format=sharded. "
"Determines where to store index shards. Shards are stored in "
"<project-root>/.cache/clangd/index/. "
"Defaults to current directory if not specified."),
@@ -136,10 +136,10 @@ class IndexActionFactory : public tooling::FrontendActionFactory {
RelationSlab::Builder Relations;
};
-// Action factory that writes per-file shards (for background index format).
-class BackgroundIndexActionFactory : public tooling::FrontendActionFactory {
+// Action factory that writes per-file shards (for sharded index format).
+class ShardedIndexActionFactory : public tooling::FrontendActionFactory {
public:
- BackgroundIndexActionFactory(BackgroundIndexStorage &Storage)
+ ShardedIndexActionFactory(BackgroundIndexStorage &Storage)
: Storage(Storage), Symbols(std::make_unique<SymbolSlab::Builder>()),
Refs(std::make_unique<RefSlab::Builder>()),
Relations(std::make_unique<RelationSlab::Builder>()) {}
@@ -265,9 +265,9 @@ int main(int argc, const char **argv) {
$ clangd-indexer File1.cpp File2.cpp ... FileN.cpp > clangd.dex
- Example usage for background index format (writes shards to disk):
+ Example usage for sharded index format (writes shards to disk):
- $ clangd-indexer --format=background --executor=all-TUs build/
+ $ clangd-indexer --format=sharded --executor=all-TUs build/
This writes index shards to .cache/clangd/index/ in the current directory.
Use --project-root to specify a different location for the shards.
@@ -298,8 +298,8 @@ int main(int argc, const char **argv) {
return Cmd.CommandLine;
});
- // Handle background index format separately - writes per-file shards.
- if (clang::clangd::Format == clang::clangd::IndexFileFormat::BACKGROUND) {
+ // Handle sharded index format separately - writes per-file shards.
+ if (clang::clangd::Format == clang::clangd::IndexFileFormat::SHARDED) {
// Default to current directory if --project-root not specified.
std::string Root = clang::clangd::ProjectRoot;
if (Root.empty()) {
@@ -323,7 +323,7 @@ int main(int argc, const char **argv) {
clang::clangd::BackgroundIndexStorage *Storage = IndexStorageFactory(Root);
auto Err = Executor->get()->execute(
- std::make_unique<clang::clangd::BackgroundIndexActionFactory>(*Storage),
+ std::make_unique<clang::clangd::ShardedIndexActionFactory>(*Storage),
std::move(Adjuster));
if (Err) {
clang::clangd::elog("{0}", std::move(Err));
>From 24c58ffc9d6eb7faceaa45271047091de63afd2a Mon Sep 17 00:00:00 2001
From: jasonwilliams <jase.williams at gmail.com>
Date: Fri, 23 Jan 2026 10:50:59 +0000
Subject: [PATCH 4/6] create base class for both indexActionFactory and
ShardedIndexActionFactory
---
.../clangd/indexer/IndexerMain.cpp | 102 ++++++------------
1 file changed, 32 insertions(+), 70 deletions(-)
diff --git a/clang-tools-extra/clangd/indexer/IndexerMain.cpp b/clang-tools-extra/clangd/indexer/IndexerMain.cpp
index 52732eea18568..395357d6545a3 100644
--- a/clang-tools-extra/clangd/indexer/IndexerMain.cpp
+++ b/clang-tools-extra/clangd/indexer/IndexerMain.cpp
@@ -60,10 +60,13 @@ static llvm::cl::opt<std::string> ProjectRoot{
"Defaults to current directory if not specified."),
};
-// Action factory that merges all symbols into a single index (for YAML/RIFF).
-class IndexActionFactory : public tooling::FrontendActionFactory {
+// Base class for index action factories that provides common symbol collection.
+class IndexActionFactoryBase : public tooling::FrontendActionFactory {
public:
- IndexActionFactory(IndexFileIn &Result) : Result(Result) {}
+ IndexActionFactoryBase()
+ : Symbols(std::make_unique<SymbolSlab::Builder>()),
+ Refs(std::make_unique<RefSlab::Builder>()),
+ Relations(std::make_unique<RelationSlab::Builder>()) {}
std::unique_ptr<FrontendAction> create() override {
SymbolCollector::Options Opts;
@@ -84,10 +87,10 @@ class IndexActionFactory : public tooling::FrontendActionFactory {
// Merge as we go.
std::lock_guard<std::mutex> Lock(SymbolsMu);
for (const auto &Sym : S) {
- if (const auto *Existing = Symbols.find(Sym.ID))
- Symbols.insert(mergeSymbol(*Existing, Sym));
+ if (const auto *Existing = Symbols->find(Sym.ID))
+ Symbols->insert(mergeSymbol(*Existing, Sym));
else
- Symbols.insert(Sym);
+ Symbols->insert(Sym);
}
},
[&](RefSlab S) {
@@ -95,13 +98,13 @@ class IndexActionFactory : public tooling::FrontendActionFactory {
for (const auto &Sym : S) {
// Deduplication happens during insertion.
for (const auto &Ref : Sym.second)
- Refs.insert(Sym.first, Ref);
+ Refs->insert(Sym.first, Ref);
}
},
[&](RelationSlab S) {
std::lock_guard<std::mutex> Lock(RelsMu);
for (const auto &R : S) {
- Relations.insert(R);
+ Relations->insert(R);
}
},
/*IncludeGraphCallback=*/nullptr);
@@ -116,72 +119,39 @@ class IndexActionFactory : public tooling::FrontendActionFactory {
std::move(Invocation), Files, std::move(PCHContainerOps), DiagConsumer);
}
+protected:
+ std::mutex FilesMu;
+ llvm::StringSet<> Files;
+ std::mutex SymbolsMu;
+ std::unique_ptr<SymbolSlab::Builder> Symbols;
+ std::mutex RefsMu;
+ std::unique_ptr<RefSlab::Builder> Refs;
+ std::mutex RelsMu;
+ std::unique_ptr<RelationSlab::Builder> Relations;
+};
+
+// Action factory that merges all symbols into a single index (for YAML/RIFF).
+class IndexActionFactory : public IndexActionFactoryBase {
+public:
+ IndexActionFactory(IndexFileIn &Result) : Result(Result) {}
+
// Awkward: we write the result in the destructor, because the executor
// takes ownership so it's the easiest way to get our data back out.
~IndexActionFactory() {
- Result.Symbols = std::move(Symbols).build();
- Result.Refs = std::move(Refs).build();
- Result.Relations = std::move(Relations).build();
+ Result.Symbols = std::move(*Symbols).build();
+ Result.Refs = std::move(*Refs).build();
+ Result.Relations = std::move(*Relations).build();
}
private:
IndexFileIn &Result;
- std::mutex FilesMu;
- llvm::StringSet<> Files;
- std::mutex SymbolsMu;
- SymbolSlab::Builder Symbols;
- std::mutex RefsMu;
- RefSlab::Builder Refs;
- std::mutex RelsMu;
- RelationSlab::Builder Relations;
};
// Action factory that writes per-file shards (for sharded index format).
-class ShardedIndexActionFactory : public tooling::FrontendActionFactory {
+class ShardedIndexActionFactory : public IndexActionFactoryBase {
public:
ShardedIndexActionFactory(BackgroundIndexStorage &Storage)
- : Storage(Storage), Symbols(std::make_unique<SymbolSlab::Builder>()),
- Refs(std::make_unique<RefSlab::Builder>()),
- Relations(std::make_unique<RelationSlab::Builder>()) {}
-
- std::unique_ptr<FrontendAction> create() override {
- SymbolCollector::Options Opts;
- Opts.CountReferences = true;
- Opts.FileFilter = [&](const SourceManager &SM, FileID FID) {
- const auto F = SM.getFileEntryRefForID(FID);
- if (!F)
- return false;
- auto AbsPath = getCanonicalPath(*F, SM.getFileManager());
- if (!AbsPath)
- return false;
- std::lock_guard<std::mutex> Lock(FilesMu);
- return Files.insert(*AbsPath).second;
- };
- return createStaticIndexingAction(
- Opts,
- [&](SymbolSlab S) {
- std::lock_guard<std::mutex> Lock(SymbolsMu);
- for (const auto &Sym : S) {
- if (const auto *Existing = Symbols->find(Sym.ID))
- Symbols->insert(mergeSymbol(*Existing, Sym));
- else
- Symbols->insert(Sym);
- }
- },
- [&](RefSlab S) {
- std::lock_guard<std::mutex> Lock(RefsMu);
- for (const auto &Sym : S) {
- for (const auto &Ref : Sym.second)
- Refs->insert(Sym.first, Ref);
- }
- },
- [&](RelationSlab S) {
- std::lock_guard<std::mutex> Lock(RelsMu);
- for (const auto &R : S)
- Relations->insert(R);
- },
- /*IncludeGraphCallback=*/nullptr);
- }
+ : Storage(Storage) {}
bool runInvocation(std::shared_ptr<CompilerInvocation> Invocation,
FileManager *Files,
@@ -236,15 +206,7 @@ class ShardedIndexActionFactory : public tooling::FrontendActionFactory {
}
BackgroundIndexStorage &Storage;
- std::mutex FilesMu;
- llvm::StringSet<> Files;
unsigned ShardsWritten = 0;
- std::mutex SymbolsMu;
- std::unique_ptr<SymbolSlab::Builder> Symbols;
- std::mutex RefsMu;
- std::unique_ptr<RefSlab::Builder> Refs;
- std::mutex RelsMu;
- std::unique_ptr<RelationSlab::Builder> Relations;
};
} // namespace
>From ef5d7ee38cc0d678a33d2417f1188db1e57d4c26 Mon Sep 17 00:00:00 2001
From: jasonwilliams <jase.williams at gmail.com>
Date: Mon, 26 Jan 2026 11:32:44 +0000
Subject: [PATCH 5/6] Change sharded from format to index-type instead
---
.../clangd/index/Serialization.cpp | 2 -
.../clangd/index/Serialization.h | 5 +--
.../clangd/indexer/IndexerMain.cpp | 37 ++++++++++++-------
3 files changed, 26 insertions(+), 18 deletions(-)
diff --git a/clang-tools-extra/clangd/index/Serialization.cpp b/clang-tools-extra/clangd/index/Serialization.cpp
index fbb672f184dd2..f03839599612c 100644
--- a/clang-tools-extra/clangd/index/Serialization.cpp
+++ b/clang-tools-extra/clangd/index/Serialization.cpp
@@ -686,8 +686,6 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O) {
case IndexFileFormat::YAML:
writeYAML(O, OS);
break;
- case IndexFileFormat::SHARDED:
- llvm_unreachable("SHARDED format not supported for serialization");
}
return OS;
}
diff --git a/clang-tools-extra/clangd/index/Serialization.h b/clang-tools-extra/clangd/index/Serialization.h
index d0939e051ecaf..bf8e036afcb6c 100644
--- a/clang-tools-extra/clangd/index/Serialization.h
+++ b/clang-tools-extra/clangd/index/Serialization.h
@@ -35,9 +35,8 @@ namespace clang {
namespace clangd {
enum class IndexFileFormat {
- RIFF, // Versioned binary format, suitable for production use.
- YAML, // Human-readable format, suitable for experiments and debugging.
- SHARDED // Sharded index format, suitable for language server use.
+ RIFF, // Versioned binary format, suitable for production use.
+ YAML, // Human-readable format, suitable for experiments and debugging.
};
// Holds the contents of an index file that was read.
diff --git a/clang-tools-extra/clangd/indexer/IndexerMain.cpp b/clang-tools-extra/clangd/indexer/IndexerMain.cpp
index 395357d6545a3..1494497a1c35b 100644
--- a/clang-tools-extra/clangd/indexer/IndexerMain.cpp
+++ b/clang-tools-extra/clangd/indexer/IndexerMain.cpp
@@ -30,16 +30,27 @@
namespace clang {
namespace clangd {
+
+enum class IndexOutputMode { Monolithic, Sharded };
+
namespace {
-static llvm::cl::opt<IndexFileFormat> Format(
- "format", llvm::cl::desc("Format of the index to be written"),
+static llvm::cl::opt<IndexFileFormat>
+ Format("format", llvm::cl::desc("Format of the index to be written"),
+ llvm::cl::values(clEnumValN(IndexFileFormat::YAML, "yaml",
+ "human-readable YAML format"),
+ clEnumValN(IndexFileFormat::RIFF, "binary",
+ "binary RIFF format")),
+ llvm::cl::init(IndexFileFormat::RIFF));
+
+static llvm::cl::opt<IndexOutputMode> OutputMode(
+ "index-type", llvm::cl::desc("Type of index output"),
llvm::cl::values(
- clEnumValN(IndexFileFormat::YAML, "yaml", "human-readable YAML format"),
- clEnumValN(IndexFileFormat::RIFF, "binary", "binary RIFF format"),
- clEnumValN(IndexFileFormat::SHARDED, "sharded",
- "Sharded index format for language servers")),
- llvm::cl::init(IndexFileFormat::RIFF));
+ clEnumValN(IndexOutputMode::Monolithic, "monolithic",
+ "Single merged index file written to stdout (default)"),
+ clEnumValN(IndexOutputMode::Sharded, "sharded",
+ "Per-file shards written to disk")),
+ llvm::cl::init(IndexOutputMode::Monolithic));
static llvm::cl::list<std::string> QueryDriverGlobs{
"query-driver",
@@ -54,7 +65,7 @@ static llvm::cl::list<std::string> QueryDriverGlobs{
static llvm::cl::opt<std::string> ProjectRoot{
"project-root",
llvm::cl::desc(
- "Path to the project root for --format=sharded. "
+ "Path to the project root for --index-type=sharded. "
"Determines where to store index shards. Shards are stored in "
"<project-root>/.cache/clangd/index/. "
"Defaults to current directory if not specified."),
@@ -227,9 +238,9 @@ int main(int argc, const char **argv) {
$ clangd-indexer File1.cpp File2.cpp ... FileN.cpp > clangd.dex
- Example usage for sharded index format (writes shards to disk):
+ Example usage for sharded index (writes shards to disk):
- $ clangd-indexer --format=sharded --executor=all-TUs build/
+ $ clangd-indexer --index-type=sharded --executor=all-TUs build/
This writes index shards to .cache/clangd/index/ in the current directory.
Use --project-root to specify a different location for the shards.
@@ -260,8 +271,8 @@ int main(int argc, const char **argv) {
return Cmd.CommandLine;
});
- // Handle sharded index format separately - writes per-file shards.
- if (clang::clangd::Format == clang::clangd::IndexFileFormat::SHARDED) {
+ // Handle sharded index type separately - writes per-file shards.
+ if (clang::clangd::OutputMode == clang::clangd::IndexOutputMode::Sharded) {
// Default to current directory if --project-root not specified.
std::string Root = clang::clangd::ProjectRoot;
if (Root.empty()) {
@@ -292,7 +303,7 @@ int main(int argc, const char **argv) {
return 1;
}
- llvm::errs() << "Background index shards written to " << Root
+ llvm::errs() << "Index shards written to " << Root
<< "/.cache/clangd/index/\n";
return 0;
}
>From c478d5fbcddbfbea27c1518fdb6c79aaf18039b1 Mon Sep 17 00:00:00 2001
From: jasonwilliams <jase.williams at gmail.com>
Date: Mon, 26 Jan 2026 17:00:25 +0000
Subject: [PATCH 6/6] Match how clangd's background index works
---
.../clangd/indexer/IndexerMain.cpp | 107 ++++++++++++++++--
1 file changed, 96 insertions(+), 11 deletions(-)
diff --git a/clang-tools-extra/clangd/indexer/IndexerMain.cpp b/clang-tools-extra/clangd/indexer/IndexerMain.cpp
index 1494497a1c35b..eafe4dd33e07a 100644
--- a/clang-tools-extra/clangd/indexer/IndexerMain.cpp
+++ b/clang-tools-extra/clangd/indexer/IndexerMain.cpp
@@ -14,6 +14,7 @@
#include "Compiler.h"
#include "GlobalCompilationDatabase.h"
#include "index/Background.h"
+#include "index/FileIndex.h"
#include "index/IndexAction.h"
#include "index/Merge.h"
#include "index/Ref.h"
@@ -21,6 +22,7 @@
#include "index/Symbol.h"
#include "index/SymbolCollector.h"
#include "support/Logger.h"
+#include "URI.h"
#include "clang/Tooling/ArgumentsAdjusters.h"
#include "clang/Tooling/Execution.h"
#include "clang/Tooling/Tooling.h"
@@ -164,6 +166,54 @@ class ShardedIndexActionFactory : public IndexActionFactoryBase {
ShardedIndexActionFactory(BackgroundIndexStorage &Storage)
: Storage(Storage) {}
+ std::unique_ptr<FrontendAction> create() override {
+ SymbolCollector::Options Opts;
+ Opts.CountReferences = true;
+ Opts.FileFilter = [&](const SourceManager &SM, FileID FID) {
+ const auto F = SM.getFileEntryRefForID(FID);
+ if (!F)
+ return false; // Skip invalid files.
+ auto AbsPath = getCanonicalPath(*F, SM.getFileManager());
+ if (!AbsPath)
+ return false; // Skip files without absolute path.
+ std::lock_guard<std::mutex> Lock(FilesMu);
+ return Files.insert(*AbsPath).second; // Skip already processed files.
+ };
+ return createStaticIndexingAction(
+ Opts,
+ [&](SymbolSlab S) {
+ // Merge as we go.
+ std::lock_guard<std::mutex> Lock(SymbolsMu);
+ for (const auto &Sym : S) {
+ if (const auto *Existing = Symbols->find(Sym.ID))
+ Symbols->insert(mergeSymbol(*Existing, Sym));
+ else
+ Symbols->insert(Sym);
+ }
+ },
+ [&](RefSlab S) {
+ std::lock_guard<std::mutex> Lock(RefsMu);
+ for (const auto &Sym : S) {
+ // Deduplication happens during insertion.
+ for (const auto &Ref : Sym.second)
+ Refs->insert(Sym.first, Ref);
+ }
+ },
+ [&](RelationSlab S) {
+ std::lock_guard<std::mutex> Lock(RelsMu);
+ for (const auto &R : S) {
+ Relations->insert(R);
+ }
+ },
+ [&](IncludeGraph IG) {
+ std::lock_guard<std::mutex> Lock(SourcesMu);
+ for (auto &Entry : IG) {
+ // Merge include graphs from different TUs.
+ Sources.try_emplace(Entry.first(), Entry.second);
+ }
+ });
+ }
+
bool runInvocation(std::shared_ptr<CompilerInvocation> Invocation,
FileManager *Files,
std::shared_ptr<PCHContainerOperations> PCHContainerOps,
@@ -178,15 +228,16 @@ class ShardedIndexActionFactory : public IndexActionFactoryBase {
bool Success = tooling::FrontendActionFactory::runInvocation(
std::move(Invocation), Files, std::move(PCHContainerOps), DiagConsumer);
- // After processing, write a shard for this file.
+ // After processing, write shards for all files in this TU.
if (Success && !MainFile.empty())
- writeShardForFile(MainFile);
+ writeShardsForTU(MainFile);
return Success;
}
private:
- void writeShardForFile(llvm::StringRef MainFile) {
+ void writeShardsForTU(llvm::StringRef MainFile) {
+ // Build the complete index data for this TU.
IndexFileIn Data;
{
std::lock_guard<std::mutex> Lock(SymbolsMu);
@@ -203,20 +254,54 @@ class ShardedIndexActionFactory : public IndexActionFactoryBase {
Data.Relations = std::move(*Relations).build();
Relations = std::make_unique<RelationSlab::Builder>();
}
+ {
+ std::lock_guard<std::mutex> Lock(SourcesMu);
+ Data.Sources = std::move(Sources);
+ Sources.clear();
+ }
- IndexFileOut Out(Data);
- Out.Format = IndexFileFormat::RIFF; // Shards use RIFF format.
+ // Shard the index data per-file.
+ FileShardedIndex ShardedIndex(std::move(Data));
- if (auto Err = Storage.storeShard(MainFile, Out)) {
- elog("Failed to write shard for {0}: {1}", MainFile, std::move(Err));
- } else {
- std::lock_guard<std::mutex> Lock(FilesMu);
- ++ShardsWritten;
- log("Wrote shard for {0} ({1} total)", MainFile, ShardsWritten);
+ // Write a shard for each file.
+ unsigned TUShardsWritten = 0;
+ for (llvm::StringRef Uri : ShardedIndex.getAllSources()) {
+ auto Shard = ShardedIndex.getShard(Uri);
+ if (!Shard) {
+ elog("Failed to get shard for {0}", Uri);
+ continue;
+ }
+
+ // Resolve URI to absolute path.
+ auto AbsPath = URI::resolve(Uri, MainFile);
+ if (!AbsPath) {
+ elog("Failed to resolve URI {0}: {1}", Uri, AbsPath.takeError());
+ continue;
+ }
+
+ // Only store command line for the main file.
+ if (*AbsPath != MainFile)
+ Shard->Cmd.reset();
+
+ IndexFileOut Out(*Shard);
+ Out.Format = IndexFileFormat::RIFF; // Shards use RIFF format.
+
+ if (auto Err = Storage.storeShard(*AbsPath, Out)) {
+ elog("Failed to write shard for {0}: {1}", *AbsPath, std::move(Err));
+ } else {
+ ++TUShardsWritten;
+ }
}
+
+ std::lock_guard<std::mutex> Lock(FilesMu);
+ ShardsWritten += TUShardsWritten;
+ log("Wrote {0} shards for TU {1} ({2} total)", TUShardsWritten, MainFile,
+ ShardsWritten);
}
BackgroundIndexStorage &Storage;
+ std::mutex SourcesMu;
+ IncludeGraph Sources;
unsigned ShardsWritten = 0;
};
More information about the cfe-commits
mailing list