[llvm-branch-commits] [clang-tools-extra] 66d5994 - [clangd] Explicitly avoid background-indexing the same file twice.

Sam McCall via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Jan 13 08:35:03 PST 2021


Author: Sam McCall
Date: 2021-01-13T17:29:30+01:00
New Revision: 66d5994bd38a9be4a0c05de2b69f88b64e6845ce

URL: https://github.com/llvm/llvm-project/commit/66d5994bd38a9be4a0c05de2b69f88b64e6845ce
DIFF: https://github.com/llvm/llvm-project/commit/66d5994bd38a9be4a0c05de2b69f88b64e6845ce.diff

LOG: [clangd] Explicitly avoid background-indexing the same file twice.

This used to implicitly never happen due to only discovering each CDB
once.

We may want to carefully support reindexing one day, but we need to do
it carefully (tricky tradeoffs) and it would need further support in
background indexer.

Making this explicit here rather than just turning off rebroadcast in
background index for a few reasons:
- allows *new* files in the same CDB to be indexed
- relying on bugs-at-a-distance cancelling each other out is bound to bite us
- gets us closer to actually supporting reindexing, which requires similar tracking

Differential Revision: https://reviews.llvm.org/D94503

Added: 
    

Modified: 
    clang-tools-extra/clangd/index/Background.cpp
    clang-tools-extra/clangd/index/Background.h
    clang-tools-extra/clangd/index/BackgroundQueue.cpp
    clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clangd/index/Background.cpp b/clang-tools-extra/clangd/index/Background.cpp
index 1649bffea2ed..e4ce1f57ff2f 100644
--- a/clang-tools-extra/clangd/index/Background.cpp
+++ b/clang-tools-extra/clangd/index/Background.cpp
@@ -43,6 +43,7 @@
 #include "llvm/Support/Error.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Threading.h"
+#include "llvm/Support/xxhash.h"
 
 #include <algorithm>
 #include <atomic>
@@ -139,8 +140,8 @@ BackgroundQueue::Task BackgroundIndex::changedFilesTask(
                  std::mt19937(std::random_device{}()));
     std::vector<BackgroundQueue::Task> Tasks;
     Tasks.reserve(NeedsReIndexing.size());
-    for (auto &Cmd : NeedsReIndexing)
-      Tasks.push_back(indexFileTask(std::move(Cmd)));
+    for (const auto &File : NeedsReIndexing)
+      Tasks.push_back(indexFileTask(std::move(File)));
     Queue.append(std::move(Tasks));
   });
 
@@ -156,6 +157,7 @@ static llvm::StringRef filenameWithoutExtension(llvm::StringRef Path) {
 
 BackgroundQueue::Task BackgroundIndex::indexFileTask(std::string Path) {
   std::string Tag = filenameWithoutExtension(Path).str();
+  uint64_t Key = llvm::xxHash64(Path);
   BackgroundQueue::Task T([this, Path(std::move(Path))] {
     llvm::Optional<WithContext> WithProvidedContext;
     if (ContextProvider)
@@ -168,6 +170,7 @@ BackgroundQueue::Task BackgroundIndex::indexFileTask(std::string Path) {
   });
   T.QueuePri = IndexFile;
   T.Tag = std::move(Tag);
+  T.Key = Key;
   return T;
 }
 

diff  --git a/clang-tools-extra/clangd/index/Background.h b/clang-tools-extra/clangd/index/Background.h
index e8f9468889f2..fbcec7014957 100644
--- a/clang-tools-extra/clangd/index/Background.h
+++ b/clang-tools-extra/clangd/index/Background.h
@@ -76,6 +76,8 @@ class BackgroundQueue {
     llvm::ThreadPriority ThreadPri = llvm::ThreadPriority::Background;
     unsigned QueuePri = 0; // Higher-priority tasks will run first.
     std::string Tag;       // Allows priority to be boosted later.
+    uint64_t Key = 0;      // If the key matches a previous task, drop this one.
+                           // (in practice this means we never reindex a file).
 
     bool operator<(const Task &O) const { return QueuePri < O.QueuePri; }
   };
@@ -114,6 +116,7 @@ class BackgroundQueue {
 
 private:
   void notifyProgress() const; // Requires lock Mu
+  bool adjust(Task &T);
 
   std::mutex Mu;
   Stats Stat;
@@ -122,6 +125,7 @@ class BackgroundQueue {
   std::vector<Task> Queue; // max-heap
   llvm::StringMap<unsigned> Boosts;
   std::function<void(Stats)> OnProgress;
+  llvm::DenseSet<uint64_t> SeenKeys;
 };
 
 // Builds an in-memory index by by running the static indexer action over

diff  --git a/clang-tools-extra/clangd/index/BackgroundQueue.cpp b/clang-tools-extra/clangd/index/BackgroundQueue.cpp
index 3262a2f46d38..b0dc2acca356 100644
--- a/clang-tools-extra/clangd/index/BackgroundQueue.cpp
+++ b/clang-tools-extra/clangd/index/BackgroundQueue.cpp
@@ -72,10 +72,24 @@ void BackgroundQueue::stop() {
   CV.notify_all();
 }
 
+// Tweaks the priority of a newly-enqueued task, or returns false to cancel it.
+bool BackgroundQueue::adjust(Task &T) {
+  // It is tempting to drop duplicates of queued tasks, and merely deprioritize
+  // duplicates of completed tasks (i.e. reindexing on CDB changes). But:
+  //  - the background indexer doesn't support reindexing well, e.g. staleness
+  //    is checked at *enqueue* time only, and doesn't account for compile flags
+  //  - reindexing on compile flags is often a poor use of CPU in practice
+  if (T.Key && !SeenKeys.insert(T.Key).second)
+    return false;
+  T.QueuePri = std::max(T.QueuePri, Boosts.lookup(T.Tag));
+  return true;
+}
+
 void BackgroundQueue::push(Task T) {
   {
     std::lock_guard<std::mutex> Lock(Mu);
-    T.QueuePri = std::max(T.QueuePri, Boosts.lookup(T.Tag));
+    if (!adjust(T))
+      return;
     Queue.push_back(std::move(T));
     std::push_heap(Queue.begin(), Queue.end());
     ++Stat.Enqueued;
@@ -87,11 +101,13 @@ void BackgroundQueue::push(Task T) {
 void BackgroundQueue::append(std::vector<Task> Tasks) {
   {
     std::lock_guard<std::mutex> Lock(Mu);
-    for (Task &T : Tasks)
-      T.QueuePri = std::max(T.QueuePri, Boosts.lookup(T.Tag));
-    std::move(Tasks.begin(), Tasks.end(), std::back_inserter(Queue));
+    for (Task &T : Tasks) {
+      if (!adjust(T))
+        continue;
+      Queue.push_back(std::move(T));
+      ++Stat.Enqueued;
+    }
     std::make_heap(Queue.begin(), Queue.end());
-    Stat.Enqueued += Tasks.size();
     notifyProgress();
   }
   CV.notify_all();

diff  --git a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp
index f4a9b2fa2d13..bcbedd2500ce 100644
--- a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp
+++ b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp
@@ -9,6 +9,7 @@
 #include "index/BackgroundRebuild.h"
 #include "clang/Tooling/ArgumentsAdjusters.h"
 #include "clang/Tooling/CompilationDatabase.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/ScopedPrinter.h"
 #include "llvm/Support/Threading.h"
 #include "gmock/gmock.h"
@@ -666,25 +667,45 @@ TEST_F(BackgroundIndexTest, CmdLineHash) {
   // Make sure we only store the Cmd for main file.
   EXPECT_FALSE(MSS.loadShard(testPath("A.h"))->Cmd);
 
-  {
-    tooling::CompileCommand CmdStored = *MSS.loadShard(testPath("A.cc"))->Cmd;
-    EXPECT_EQ(CmdStored.CommandLine, Cmd.CommandLine);
-    EXPECT_EQ(CmdStored.Directory, Cmd.Directory);
-  }
+  tooling::CompileCommand CmdStored = *MSS.loadShard(testPath("A.cc"))->Cmd;
+  EXPECT_EQ(CmdStored.CommandLine, Cmd.CommandLine);
+  EXPECT_EQ(CmdStored.Directory, Cmd.Directory);
+}
 
-  // FIXME: Changing compile commands should be enough to invalidate the cache.
-  FS.Files[testPath("A.cc")] = " ";
-  Cmd.CommandLine = {"clang++", "../A.cc", "-Dfoo", "-fsyntax-only"};
-  CDB.setCompileCommand(testPath("build/../A.cc"), Cmd);
+TEST_F(BackgroundIndexTest, Reindex) {
+  MockFS FS;
+  llvm::StringMap<std::string> Storage;
+  size_t CacheHits = 0;
+  MemoryShardStorage MSS(Storage, CacheHits);
+  OverlayCDB CDB(/*Base=*/nullptr);
+  BackgroundIndex Idx(FS, CDB, [&](llvm::StringRef) { return &MSS; },
+                      /*Opts=*/{});
+
+  // Index a file.
+  FS.Files[testPath("A.cc")] = "int theOldFunction();";
+  tooling::CompileCommand Cmd;
+  Cmd.Filename = "../A.cc";
+  Cmd.Directory = testPath("build");
+  Cmd.CommandLine = {"clang++", "../A.cc", "-fsyntax-only"};
+  CDB.setCompileCommand(testPath("A.cc"), Cmd);
   ASSERT_TRUE(Idx.blockUntilIdleForTest());
 
-  EXPECT_FALSE(MSS.loadShard(testPath("A.h"))->Cmd);
+  // Verify the result is indexed and stored.
+  EXPECT_EQ(1u, runFuzzyFind(Idx, "theOldFunction").size());
+  EXPECT_EQ(0u, runFuzzyFind(Idx, "theNewFunction").size());
+  std::string OldShard = Storage.lookup(testPath("A.cc"));
+  EXPECT_NE("", OldShard);
 
-  {
-    tooling::CompileCommand CmdStored = *MSS.loadShard(testPath("A.cc"))->Cmd;
-    EXPECT_EQ(CmdStored.CommandLine, Cmd.CommandLine);
-    EXPECT_EQ(CmdStored.Directory, Cmd.Directory);
-  }
+  // Change the content and command, and notify to reindex it.
+  Cmd.CommandLine.push_back("-DFOO");
+  FS.Files[testPath("A.cc")] = "int theNewFunction();";
+  CDB.setCompileCommand(testPath("A.cc"), Cmd);
+  ASSERT_TRUE(Idx.blockUntilIdleForTest());
+
+  // Currently, we will never index the same main file again.
+  EXPECT_EQ(1u, runFuzzyFind(Idx, "theOldFunction").size());
+  EXPECT_EQ(0u, runFuzzyFind(Idx, "theNewFunction").size());
+  EXPECT_EQ(OldShard, Storage.lookup(testPath("A.cc")));
 }
 
 class BackgroundIndexRebuilderTest : public testing::Test {
@@ -829,6 +850,31 @@ TEST(BackgroundQueueTest, Boost) {
   }
 }
 
+TEST(BackgroundQueueTest, Duplicates) {
+  std::string Sequence;
+  BackgroundQueue::Task A([&] { Sequence.push_back('A'); });
+  A.QueuePri = 100;
+  A.Key = 1;
+  BackgroundQueue::Task B([&] { Sequence.push_back('B'); });
+  // B has no key, and is not subject to duplicate detection.
+  B.QueuePri = 50;
+
+  BackgroundQueue Q;
+  Q.append({A, B, A, B}); // One A is dropped, the other is high priority.
+  Q.work(/*OnIdle=*/[&] {
+    // The first time we go idle, we enqueue the same task again.
+    if (!llvm::is_contained(Sequence, ' ')) {
+      Sequence.push_back(' ');
+      Q.append({A, B, A, B}); // Both As are dropped.
+    } else {
+      Q.stop();
+    }
+  });
+
+  // This could reasonably be "ABB BBA", if we had good *re*indexing support.
+  EXPECT_EQ("ABB BB", Sequence);
+}
+
 TEST(BackgroundQueueTest, Progress) {
   using testing::AnyOf;
   BackgroundQueue::Stats S;


        


More information about the llvm-branch-commits mailing list