[clang-tools-extra] [clangd] Drop the optimization where only shards for modified files are updated in the background index (PR #140651)
Nathan Ridge via cfe-commits
cfe-commits at lists.llvm.org
Tue May 20 13:58:57 PDT 2025
https://github.com/HighCommander4 updated https://github.com/llvm/llvm-project/pull/140651
>From 7f48d903c6d5f97a75208c799e0e96a26948af05 Mon Sep 17 00:00:00 2001
From: Nathan Ridge <zeratul976 at hotmail.com>
Date: Sat, 17 May 2025 22:11:45 -0400
Subject: [PATCH 1/2] [clangd] Drop the optimization where only shards for
modified files are updated in the background index
The optimization is not valid, given that the target of a symbol
reference can change without the spelling of the reference (or
anything else in the file containing the reference) changing,
as illustrated in the added test case.
Partially fixes https://github.com/clangd/clangd/issues/1104
---
clang-tools-extra/clangd/index/Background.cpp | 46 ++---------
clang-tools-extra/clangd/index/Background.h | 9 +--
.../clangd/unittests/BackgroundIndexTests.cpp | 76 ++++++++++++++++++-
3 files changed, 83 insertions(+), 48 deletions(-)
diff --git a/clang-tools-extra/clangd/index/Background.cpp b/clang-tools-extra/clangd/index/Background.cpp
index 496d1455def4b..c4bab3c4d4b20 100644
--- a/clang-tools-extra/clangd/index/Background.cpp
+++ b/clang-tools-extra/clangd/index/Background.cpp
@@ -180,13 +180,10 @@ void BackgroundIndex::boostRelated(llvm::StringRef Path) {
Queue.boost(filenameWithoutExtension(Path), IndexBoostedFile);
}
-/// Given index results from a TU, only update symbols coming from files that
-/// are different or missing from than \p ShardVersionsSnapshot. Also stores new
-/// index information on IndexStorage.
-void BackgroundIndex::update(
- llvm::StringRef MainFile, IndexFileIn Index,
- const llvm::StringMap<ShardVersion> &ShardVersionsSnapshot,
- bool HadErrors) {
+/// Given index results from a TU, update symbols coming from all files. Also
+/// stores new index information on IndexStorage.
+void BackgroundIndex::update(llvm::StringRef MainFile, IndexFileIn Index,
+ bool HadErrors) {
// Keys are URIs.
llvm::StringMap<std::pair<Path, FileDigest>> FilesToUpdate;
// Note that sources do not contain any information regarding missing headers,
@@ -198,12 +195,7 @@ void BackgroundIndex::update(
elog("Failed to resolve URI: {0}", AbsPath.takeError());
continue;
}
- const auto DigestIt = ShardVersionsSnapshot.find(*AbsPath);
- // File has different contents, or indexing was successful this time.
- if (DigestIt == ShardVersionsSnapshot.end() ||
- DigestIt->getValue().Digest != IGN.Digest ||
- (DigestIt->getValue().HadErrors && !HadErrors))
- FilesToUpdate[IGN.URI] = {std::move(*AbsPath), IGN.Digest};
+ FilesToUpdate[IGN.URI] = {std::move(*AbsPath), IGN.Digest};
}
// Shard slabs into files.
@@ -263,13 +255,6 @@ llvm::Error BackgroundIndex::index(tooling::CompileCommand Cmd) {
return llvm::errorCodeToError(Buf.getError());
auto Hash = digest(Buf->get()->getBuffer());
- // Take a snapshot of the versions to avoid locking for each file in the TU.
- llvm::StringMap<ShardVersion> ShardVersionsSnapshot;
- {
- std::lock_guard<std::mutex> Lock(ShardVersionsMu);
- ShardVersionsSnapshot = ShardVersions;
- }
-
vlog("Indexing {0} (digest:={1})", Cmd.Filename, llvm::toHex(Hash));
ParseInputs Inputs;
Inputs.TFS = &TFS;
@@ -286,25 +271,6 @@ llvm::Error BackgroundIndex::index(tooling::CompileCommand Cmd) {
return error("Couldn't build compiler instance");
SymbolCollector::Options IndexOpts;
- // Creates a filter to not collect index results from files with unchanged
- // digests.
- IndexOpts.FileFilter = [&ShardVersionsSnapshot](const SourceManager &SM,
- FileID FID) {
- const auto F = SM.getFileEntryRefForID(FID);
- if (!F)
- return false; // Skip invalid files.
- auto AbsPath = getCanonicalPath(*F, SM.getFileManager());
- if (!AbsPath)
- return false; // Skip files without absolute path.
- auto Digest = digestFile(SM, FID);
- if (!Digest)
- return false;
- auto D = ShardVersionsSnapshot.find(*AbsPath);
- if (D != ShardVersionsSnapshot.end() && D->second.Digest == Digest &&
- !D->second.HadErrors)
- return false; // Skip files that haven't changed, without errors.
- return true;
- };
IndexOpts.CollectMainFileRefs = true;
IndexFileIn Index;
@@ -345,7 +311,7 @@ llvm::Error BackgroundIndex::index(tooling::CompileCommand Cmd) {
for (auto &It : *Index.Sources)
It.second.Flags |= IncludeGraphNode::SourceFlag::HadErrors;
}
- update(AbsolutePath, std::move(Index), ShardVersionsSnapshot, HadErrors);
+ update(AbsolutePath, std::move(Index), HadErrors);
Rebuilder.indexedTU();
return llvm::Error::success();
diff --git a/clang-tools-extra/clangd/index/Background.h b/clang-tools-extra/clangd/index/Background.h
index 448e911201575..87a81666e07b7 100644
--- a/clang-tools-extra/clangd/index/Background.h
+++ b/clang-tools-extra/clangd/index/Background.h
@@ -190,12 +190,9 @@ class BackgroundIndex : public SwapIndex {
bool HadErrors = false;
};
- /// Given index results from a TU, only update symbols coming from files with
- /// different digests than \p ShardVersionsSnapshot. Also stores new index
- /// information on IndexStorage.
- void update(llvm::StringRef MainFile, IndexFileIn Index,
- const llvm::StringMap<ShardVersion> &ShardVersionsSnapshot,
- bool HadErrors);
+ /// Given index results from a TU, update symbols coming from all files. Also
+ /// stores new index information on IndexStorage.
+ void update(llvm::StringRef MainFile, IndexFileIn Index, bool HadErrors);
// configuration
const ThreadsafeFS &TFS;
diff --git a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp
index ada14c9939318..4c64b514cef0c 100644
--- a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp
+++ b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp
@@ -14,6 +14,7 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include <deque>
+#include <optional>
#include <thread>
using ::testing::_;
@@ -213,10 +214,11 @@ TEST_F(BackgroundIndexTest, IndexTwoFiles) {
CDB.setCompileCommand(testPath("root/B.cc"), Cmd);
ASSERT_TRUE(Idx.blockUntilIdleForTest());
- // B_CC is dropped as we don't collect symbols from A.h in this compilation.
+ // A_CC is dropped as A.h was now most recently indexed in the context of
+ // B.cc.
EXPECT_THAT(runFuzzyFind(Idx, ""),
UnorderedElementsAre(AllOf(named("common"), numReferences(5U)),
- AllOf(named("A_CC"), numReferences(0U)),
+ AllOf(named("B_CC"), numReferences(0U)),
AllOf(named("g"), numReferences(1U)),
AllOf(named("f_b"), declared(), defined(),
numReferences(1U))));
@@ -682,6 +684,76 @@ TEST_F(BackgroundIndexTest, Reindex) {
EXPECT_EQ(OldShard, Storage.lookup(testPath("A.cc")));
}
+// Test that restarting clangd properly updates the index with changes
+// to files since the last time the index was built.
+TEST_F(BackgroundIndexTest, UpdateAfterRestart) {
+ MockFS FS;
+ llvm::StringMap<std::string> Storage;
+ size_t CacheHits = 0;
+ MemoryShardStorage MSS(Storage, CacheHits);
+ auto CDB = std::make_unique<OverlayCDB>(/*Base=*/nullptr);
+ auto Idx = std::make_unique<BackgroundIndex>(
+ FS, *CDB, [&](llvm::StringRef) { return &MSS; },
+ BackgroundIndex::Options{});
+
+ // Create a header file containing a function declaration, and a source file
+ // containing a call to the function.
+ FS.Files[testPath("test.h")] = R"cpp(
+ #ifndef TEST_H
+ #define TEST_H
+ void waldo(int);
+ #endif
+ )cpp";
+ FS.Files[testPath("test.cc")] = R"cpp(
+ #include "test.h"
+ int main() {
+ waldo(42);
+ }
+ )cpp";
+
+ // Index the files in this state.
+ tooling::CompileCommand Cmd;
+ Cmd.Filename = "../test.cc";
+ Cmd.Directory = testPath("build");
+ Cmd.CommandLine = {"clang++", "../test.cc", "-fsyntax-only"};
+ CDB->setCompileCommand(testPath("test.cc"), Cmd);
+ ASSERT_TRUE(Idx->blockUntilIdleForTest());
+
+ // Verify that the function 'waldo' has two references in the index
+ // (the declaration, and the call site).
+ auto CheckRefCount = [&](std::string SymbolName) {
+ auto Syms = runFuzzyFind(*Idx, SymbolName);
+ EXPECT_THAT(Syms, UnorderedElementsAre(named(SymbolName)));
+ auto Sym = *Syms.begin();
+ return getRefs(*Idx, Sym.ID).numRefs();
+ };
+ EXPECT_EQ(CheckRefCount("waldo"), 2u);
+
+ // Modify the declaration of 'waldo' in a way that changes its SymbolID
+ // without changing how existing call sites are written. Here, we add
+ // a new parameter with a default argument.
+ FS.Files[testPath("test.h")] = R"cpp(
+ #ifndef TEST_H
+ #define TEST_H
+ void waldo(int, int = 0);
+ #endif
+ )cpp";
+
+ // Simulate clangd shutting down and restarting, and the background index
+ // being rebuilt after restart.
+ Idx = nullptr;
+ CDB = std::make_unique<OverlayCDB>(/*Base=*/nullptr);
+ Idx = std::make_unique<BackgroundIndex>(
+ FS, *CDB, [&](llvm::StringRef) { return &MSS; },
+ BackgroundIndex::Options{});
+ CDB->setCompileCommand(testPath("test.cc"), Cmd);
+ ASSERT_TRUE(Idx->blockUntilIdleForTest());
+
+ // The rebuild should have updated things so that 'waldo' now again has
+ // two references in the index.
+ EXPECT_EQ(CheckRefCount("waldo"), 2u);
+}
+
class BackgroundIndexRebuilderTest : public testing::Test {
protected:
BackgroundIndexRebuilderTest()
>From 71c335076d13e3793275adf4ebcbba7b4117a9c7 Mon Sep 17 00:00:00 2001
From: Nathan Ridge <zeratul976 at hotmail.com>
Date: Tue, 20 May 2025 16:58:21 -0400
Subject: [PATCH 2/2] Add a second testcase illustrating a remaining problem
Out of multiple source files including a header, only one
is re-indexed when the header is modified.
---
.../clangd/unittests/BackgroundIndexTests.cpp | 82 +++++++++++++++++++
1 file changed, 82 insertions(+)
diff --git a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp
index 4c64b514cef0c..b435c2814dc5a 100644
--- a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp
+++ b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp
@@ -754,6 +754,88 @@ TEST_F(BackgroundIndexTest, UpdateAfterRestart) {
EXPECT_EQ(CheckRefCount("waldo"), 2u);
}
+// Test that updating a header that's included by multiple source files
+// correctly updates all including source files, even if the source files
+// themselves did not change.
+TEST_F(BackgroundIndexTest, HeaderWithMultipleIncluders) {
+ MockFS FS;
+ llvm::StringMap<std::string> Storage;
+ size_t CacheHits = 0;
+ MemoryShardStorage MSS(Storage, CacheHits);
+ auto CDB = std::make_unique<OverlayCDB>(/*Base=*/nullptr);
+ auto Idx = std::make_unique<BackgroundIndex>(
+ FS, *CDB, [&](llvm::StringRef) { return &MSS; },
+ BackgroundIndex::Options{});
+
+ // Create a header file containing a function declaration, and two source
+ // files each containing a call to the function.
+ FS.Files[testPath("header.h")] = R"cpp(
+ #ifndef TEST_H
+ #define TEST_H
+ void waldo(int);
+ #endif
+ )cpp";
+ FS.Files[testPath("a.cc")] = R"cpp(
+ #include "header.h"
+ void f() {
+ waldo(41);
+ }
+ )cpp";
+ FS.Files[testPath("b.cc")] = R"cpp(
+ #include "header.h"
+ void f() {
+ waldo(42);
+ }
+ )cpp";
+
+ // Index the files in this state.
+ tooling::CompileCommand Cmd1;
+ Cmd1.Filename = "../a.cc";
+ Cmd1.Directory = testPath("build");
+ Cmd1.CommandLine = {"clang++", "../a.cc", "-fsyntax-only"};
+ tooling::CompileCommand Cmd2;
+ Cmd2.Filename = "../b.cc";
+ Cmd2.Directory = testPath("build");
+ Cmd2.CommandLine = {"clang++", "../b.cc", "-fsyntax-only"};
+ CDB->setCompileCommand(testPath("b.cc"), Cmd2);
+ ASSERT_TRUE(Idx->blockUntilIdleForTest());
+
+ // Verify that the function 'waldo' has three references in the index
+ // (the declaration, and the two call sites).
+ auto CheckRefCount = [&](std::string SymbolName) {
+ auto Syms = runFuzzyFind(*Idx, SymbolName);
+ EXPECT_THAT(Syms, UnorderedElementsAre(named(SymbolName)));
+ auto Sym = *Syms.begin();
+ return getRefs(*Idx, Sym.ID).numRefs();
+ };
+ EXPECT_EQ(CheckRefCount("waldo"), 3u);
+
+ // Modify the declaration of 'waldo' in a way that changes its SymbolID
+ // without changing how existing call sites are written. Here, we add
+ // a new parameter with a default argument.
+ FS.Files[testPath("test.h")] = R"cpp(
+ #ifndef TEST_H
+ #define TEST_H
+ void waldo(int, int = 0);
+ #endif
+ )cpp";
+
+ // Simulate clangd shutting down and restarting, and the background index
+ // being rebuilt after restart.
+ Idx = nullptr;
+ CDB = std::make_unique<OverlayCDB>(/*Base=*/nullptr);
+ Idx = std::make_unique<BackgroundIndex>(
+ FS, *CDB, [&](llvm::StringRef) { return &MSS; },
+ BackgroundIndex::Options{});
+ CDB->setCompileCommand(testPath("a.cc"), Cmd1);
+ CDB->setCompileCommand(testPath("b.cc"), Cmd2);
+ ASSERT_TRUE(Idx->blockUntilIdleForTest());
+
+ // The rebuild should have updated things so that 'waldo' now again has
+ // three references in the index.
+ EXPECT_EQ(CheckRefCount("waldo"), 3u);
+}
+
class BackgroundIndexRebuilderTest : public testing::Test {
protected:
BackgroundIndexRebuilderTest()
More information about the cfe-commits
mailing list