[clang-tools-extra] r320688 - [clangd] Symbol index interfaces and an in-memory index implementation.

Eric Liu via cfe-commits cfe-commits at lists.llvm.org
Thu Dec 14 03:25:50 PST 2017


Author: ioeric
Date: Thu Dec 14 03:25:49 2017
New Revision: 320688

URL: http://llvm.org/viewvc/llvm-project?rev=320688&view=rev
Log:
[clangd] Symbol index interfaces and an in-memory index implementation.

Summary:
o Index interfaces to support using different index sources (e.g. AST index, global index) for code completion, cross-reference finding etc. This patch focuses on code completion.

The following changes in the original patch has been split out.
o Implement an AST-based index.
o Add an option to replace sema code completion for qualified-id with index-based completion.
o Implement an initial naive code completion index which matches symbols that have the query string as substring.

Reviewers: malaperle, sammccall

Reviewed By: sammccall

Subscribers: hokein, klimek, malaperle, mgorny, ilya-biryukov, cfe-commits

Differential Revision: https://reviews.llvm.org/D40548

Added:
    clang-tools-extra/trunk/clangd/index/MemIndex.cpp
    clang-tools-extra/trunk/clangd/index/MemIndex.h
    clang-tools-extra/trunk/unittests/clangd/IndexTests.cpp
Modified:
    clang-tools-extra/trunk/clangd/CMakeLists.txt
    clang-tools-extra/trunk/clangd/index/Index.h
    clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt

Modified: clang-tools-extra/trunk/clangd/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/CMakeLists.txt?rev=320688&r1=320687&r2=320688&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/CMakeLists.txt (original)
+++ clang-tools-extra/trunk/clangd/CMakeLists.txt Thu Dec 14 03:25:49 2017
@@ -19,6 +19,7 @@ add_clang_library(clangDaemon
   Protocol.cpp
   ProtocolHandlers.cpp
   Trace.cpp
+  index/MemIndex.cpp
   index/Index.cpp
   index/SymbolCollector.cpp
 

Modified: clang-tools-extra/trunk/clangd/index/Index.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/Index.h?rev=320688&r1=320687&r2=320688&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/Index.h (original)
+++ clang-tools-extra/trunk/clangd/index/Index.h Thu Dec 14 03:25:49 2017
@@ -10,6 +10,7 @@
 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H
 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H
 
+#include "../Context.h"
 #include "clang/Index/IndexSymbol.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Hashing.h"
@@ -110,6 +111,34 @@ private:
   llvm::DenseMap<SymbolID, Symbol> Symbols;
 };
 
+struct FuzzyFindRequest {
+  /// \brief A query string for the fuzzy find. This is matched against symbols'
+  /// qualfified names.
+  std::string Query;
+  /// \brief The maxinum number of candidates to return.
+  size_t MaxCandidateCount = UINT_MAX;
+};
+
+/// \brief Interface for symbol indexes that can be used for searching or
+/// matching symbols among a set of symbols based on names or unique IDs.
+class SymbolIndex {
+public:
+  virtual ~SymbolIndex() = default;
+
+  /// \brief Matches symbols in the index fuzzily and applies \p Callback on
+  /// each matched symbol before returning.
+  ///
+  /// Returns true if the result list is complete, false if it was truncated due
+  /// to MaxCandidateCount
+  virtual bool
+  fuzzyFind(Context &Ctx, const FuzzyFindRequest &Req,
+            std::function<void(const Symbol &)> Callback) const = 0;
+
+  // FIXME: add interfaces for more index use cases:
+  //  - Symbol getSymbolInfo(SymbolID);
+  //  - getAllOccurrences(SymbolID);
+};
+
 } // namespace clangd
 } // namespace clang
 

Added: clang-tools-extra/trunk/clangd/index/MemIndex.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/MemIndex.cpp?rev=320688&view=auto
==============================================================================
--- clang-tools-extra/trunk/clangd/index/MemIndex.cpp (added)
+++ clang-tools-extra/trunk/clangd/index/MemIndex.cpp Thu Dec 14 03:25:49 2017
@@ -0,0 +1,52 @@
+//===--- MemIndex.cpp - Dynamic in-memory symbol index. ----------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===-------------------------------------------------------------------===//
+
+#include "MemIndex.h"
+
+namespace clang {
+namespace clangd {
+
+void MemIndex::build(std::shared_ptr<std::vector<const Symbol *>> Syms) {
+  llvm::DenseMap<SymbolID, const Symbol *> TempIndex;
+  for (const Symbol *Sym : *Syms)
+    TempIndex[Sym->ID] = Sym;
+
+  // Swap out the old symbols and index.
+  {
+    std::lock_guard<std::mutex> Lock(Mutex);
+    Index = std::move(TempIndex);
+    Symbols = std::move(Syms); // Relase old symbols.
+  }
+}
+
+bool MemIndex::fuzzyFind(Context & /*Ctx*/, const FuzzyFindRequest &Req,
+                         std::function<void(const Symbol &)> Callback) const {
+  std::string LoweredQuery = llvm::StringRef(Req.Query).lower();
+  unsigned Matched = 0;
+  {
+    std::lock_guard<std::mutex> Lock(Mutex);
+    for (const auto Pair : Index) {
+      const Symbol *Sym = Pair.second;
+      // Find all symbols that contain the query, igoring cases.
+      // FIXME: consider matching chunks in qualified names instead the whole
+      // string.
+      // FIXME: use better matching algorithm, e.g. fuzzy matcher.
+      if (StringRef(StringRef(Sym->QualifiedName).lower())
+              .contains(LoweredQuery)) {
+        if (++Matched > Req.MaxCandidateCount)
+          return false;
+        Callback(*Sym);
+      }
+    }
+  }
+  return true;
+}
+
+} // namespace clangd
+} // namespace clang

Added: clang-tools-extra/trunk/clangd/index/MemIndex.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/MemIndex.h?rev=320688&view=auto
==============================================================================
--- clang-tools-extra/trunk/clangd/index/MemIndex.h (added)
+++ clang-tools-extra/trunk/clangd/index/MemIndex.h Thu Dec 14 03:25:49 2017
@@ -0,0 +1,41 @@
+//===--- MemIndex.h - Dynamic in-memory symbol index. -------------- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H
+
+#include "Index.h"
+#include <mutex>
+
+namespace clang {
+namespace clangd {
+
+/// \brief This implements an index for a (relatively small) set of symbols that
+/// can be easily managed in memory.
+class MemIndex : public SymbolIndex {
+public:
+  /// \brief (Re-)Build index for `Symbols`. All symbol pointers must remain
+  /// accessible as long as `Symbols` is kept alive.
+  void build(std::shared_ptr<std::vector<const Symbol *>> Symbols);
+
+  bool fuzzyFind(Context &Ctx, const FuzzyFindRequest &Req,
+                 std::function<void(const Symbol &)> Callback) const override;
+
+private:
+  std::shared_ptr<std::vector<const Symbol *>> Symbols;
+  // Index is a set of symbols that are deduplicated by symbol IDs.
+  // FIXME: build smarter index structure.
+  llvm::DenseMap<SymbolID, const Symbol *> Index;
+  mutable std::mutex Mutex;
+};
+
+} // namespace clangd
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_MEMINDEX_H

Modified: clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt?rev=320688&r1=320687&r2=320688&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt (original)
+++ clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt Thu Dec 14 03:25:49 2017
@@ -13,6 +13,7 @@ add_extra_unittest(ClangdTests
   CodeCompleteTests.cpp
   ContextTests.cpp
   FuzzyMatchTests.cpp
+  IndexTests.cpp
   JSONExprTests.cpp
   TestFS.cpp
   TraceTests.cpp

Added: clang-tools-extra/trunk/unittests/clangd/IndexTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/IndexTests.cpp?rev=320688&view=auto
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/IndexTests.cpp (added)
+++ clang-tools-extra/trunk/unittests/clangd/IndexTests.cpp Thu Dec 14 03:25:49 2017
@@ -0,0 +1,114 @@
+//===-- IndexTests.cpp  -------------------------------*- C++ -*-----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "index/Index.h"
+#include "index/MemIndex.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+using testing::UnorderedElementsAre;
+
+namespace clang {
+namespace clangd {
+
+namespace {
+
+Symbol symbol(llvm::StringRef ID) {
+  Symbol Sym;
+  Sym.ID = SymbolID(ID);
+  Sym.QualifiedName = ID;
+  return Sym;
+}
+
+struct SlabAndPointers {
+  SymbolSlab Slab;
+  std::vector<const Symbol *> Pointers;
+};
+
+// Create a slab of symbols with IDs and names [Begin, End]. The life time of
+// the slab is managed by the returned shared pointer. If \p WeakSymbols is
+// provided, it will be pointed to the managed object in the returned shared
+// pointer.
+std::shared_ptr<std::vector<const Symbol *>>
+generateNumSymbols(int Begin, int End,
+                   std::weak_ptr<SlabAndPointers> *WeakSymbols = nullptr) {
+  auto Slab = std::make_shared<SlabAndPointers>();
+  if (WeakSymbols)
+    *WeakSymbols = Slab;
+
+  for (int i = Begin; i <= End; i++)
+    Slab->Slab.insert(symbol(std::to_string(i)));
+
+  for (const auto &Sym : Slab->Slab)
+    Slab->Pointers.push_back(&Sym.second);
+
+  return {std::move(Slab), &Slab->Pointers};
+}
+
+std::vector<std::string> match(const SymbolIndex &I,
+                               const FuzzyFindRequest &Req) {
+  std::vector<std::string> Matches;
+  auto Ctx = Context::empty();
+  I.fuzzyFind(Ctx, Req,
+              [&](const Symbol &Sym) { Matches.push_back(Sym.QualifiedName); });
+  return Matches;
+}
+
+TEST(MemIndexTest, MemIndexSymbolsRecycled) {
+  MemIndex I;
+  std::weak_ptr<SlabAndPointers> Symbols;
+  I.build(generateNumSymbols(0, 10, &Symbols));
+  FuzzyFindRequest Req;
+  Req.Query = "7";
+  EXPECT_THAT(match(I, Req), UnorderedElementsAre("7"));
+
+  EXPECT_FALSE(Symbols.expired());
+  // Release old symbols.
+  I.build(generateNumSymbols(0, 0));
+  EXPECT_TRUE(Symbols.expired());
+}
+
+TEST(MemIndexTest, MemIndexMatchSubstring) {
+  MemIndex I;
+  I.build(generateNumSymbols(5, 25));
+  FuzzyFindRequest Req;
+  Req.Query = "5";
+  EXPECT_THAT(match(I, Req), UnorderedElementsAre("5", "15", "25"));
+}
+
+TEST(MemIndexTest, MemIndexDeduplicate) {
+  auto Symbols = generateNumSymbols(0, 10);
+
+  // Inject some duplicates and make sure we only match the same symbol once.
+  auto Sym = symbol("7");
+  Symbols->push_back(&Sym);
+  Symbols->push_back(&Sym);
+  Symbols->push_back(&Sym);
+
+  FuzzyFindRequest Req;
+  Req.Query = "7";
+  MemIndex I;
+  I.build(std::move(Symbols));
+  auto Matches = match(I, Req);
+  EXPECT_EQ(Matches.size(), 1u);
+}
+
+TEST(MemIndexTest, MemIndexLimitedNumMatches) {
+  MemIndex I;
+  I.build(generateNumSymbols(0, 100));
+  FuzzyFindRequest Req;
+  Req.Query = "5";
+  Req.MaxCandidateCount = 3;
+  auto Matches = match(I, Req);
+  EXPECT_EQ(Matches.size(), Req.MaxCandidateCount);
+}
+
+} // namespace
+} // namespace clangd
+} // namespace clang




More information about the cfe-commits mailing list