[clang-tools-extra] r341542 - [clangd] Implement proximity path boosting for Dex

Kirill Bobyrev via cfe-commits cfe-commits at lists.llvm.org
Thu Sep 6 05:54:43 PDT 2018


Author: omtcyfz
Date: Thu Sep  6 05:54:43 2018
New Revision: 341542

URL: http://llvm.org/viewvc/llvm-project?rev=341542&view=rev
Log:
[clangd] Implement proximity path boosting for Dex

This patch introduces `PathURI` Search Token kind and utilizes it to
uprank symbols which are defined in files with small distance to the
directory where the fuzzy find request is coming from (e.g. files user
is editing).

Reviewed By: ioeric

Reviewers: ioeric, sammccall

Differential Revision: https://reviews.llvm.org/D51481

Modified:
    clang-tools-extra/trunk/clangd/FileDistance.h
    clang-tools-extra/trunk/clangd/URI.cpp
    clang-tools-extra/trunk/clangd/URI.h
    clang-tools-extra/trunk/clangd/index/SymbolYAML.cpp
    clang-tools-extra/trunk/clangd/index/SymbolYAML.h
    clang-tools-extra/trunk/clangd/index/dex/DexIndex.cpp
    clang-tools-extra/trunk/clangd/index/dex/DexIndex.h
    clang-tools-extra/trunk/clangd/index/dex/Token.h
    clang-tools-extra/trunk/clangd/tool/ClangdMain.cpp
    clang-tools-extra/trunk/unittests/clangd/DexIndexTests.cpp

Modified: clang-tools-extra/trunk/clangd/FileDistance.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/FileDistance.h?rev=341542&r1=341541&r2=341542&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/FileDistance.h (original)
+++ clang-tools-extra/trunk/clangd/FileDistance.h Thu Sep  6 05:54:43 2018
@@ -37,6 +37,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H
+
 #include "URI.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseMapInfo.h"
@@ -107,3 +110,5 @@ private:
 
 } // namespace clangd
 } // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H

Modified: clang-tools-extra/trunk/clangd/URI.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/URI.cpp?rev=341542&r1=341541&r2=341542&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/URI.cpp (original)
+++ clang-tools-extra/trunk/clangd/URI.cpp Thu Sep  6 05:54:43 2018
@@ -181,6 +181,26 @@ llvm::Expected<URI> URI::create(llvm::St
   return S->get()->uriFromAbsolutePath(AbsolutePath);
 }
 
+llvm::Expected<URI> URI::create(llvm::StringRef AbsolutePath,
+                                const std::vector<std::string> &Schemes) {
+  if (!llvm::sys::path::is_absolute(AbsolutePath))
+    return make_string_error("Not a valid absolute path: " + AbsolutePath);
+  for (const auto &Scheme : Schemes) {
+    auto URI = URI::create(AbsolutePath, Scheme);
+    // For some paths, conversion to different URI schemes is impossible. These
+    // should be just skipped.
+    if (!URI) {
+      // Ignore the error.
+      llvm::consumeError(URI.takeError());
+      continue;
+    }
+    return URI;
+  }
+  return make_string_error(
+      "Couldn't convert " + AbsolutePath +
+      " to any given scheme: " + llvm::join(Schemes, ", "));
+}
+
 URI URI::createFile(llvm::StringRef AbsolutePath) {
   auto U = create(AbsolutePath, "file");
   if (!U)

Modified: clang-tools-extra/trunk/clangd/URI.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/URI.h?rev=341542&r1=341541&r2=341542&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/URI.h (original)
+++ clang-tools-extra/trunk/clangd/URI.h Thu Sep  6 05:54:43 2018
@@ -45,6 +45,11 @@ public:
   static llvm::Expected<URI> create(llvm::StringRef AbsolutePath,
                                     llvm::StringRef Scheme);
 
+  // Similar to above except this uses the first scheme in \p Schemes that
+  // works.
+  static llvm::Expected<URI> create(llvm::StringRef AbsolutePath,
+                                    const std::vector<std::string> &Schemes);
+
   /// This creates a file:// URI for \p AbsolutePath. The path must be absolute.
   static URI createFile(llvm::StringRef AbsolutePath);
 

Modified: clang-tools-extra/trunk/clangd/index/SymbolYAML.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/SymbolYAML.cpp?rev=341542&r1=341541&r2=341542&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/SymbolYAML.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/SymbolYAML.cpp Thu Sep  6 05:54:43 2018
@@ -185,6 +185,7 @@ std::string SymbolToYAML(Symbol Sym) {
 }
 
 std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename,
+                                       llvm::ArrayRef<std::string> URISchemes,
                                        bool UseDex) {
   trace::Span OverallTracer("LoadIndex");
   auto Buffer = llvm::MemoryBuffer::getFile(SymbolFilename);
@@ -209,7 +210,7 @@ std::unique_ptr<SymbolIndex> loadIndex(l
   if (!Slab)
     return nullptr;
   trace::Span Tracer("BuildIndex");
-  return UseDex ? dex::DexIndex::build(std::move(*Slab))
+  return UseDex ? dex::DexIndex::build(std::move(*Slab), URISchemes)
                 : MemIndex::build(std::move(*Slab), RefSlab());
 }
 

Modified: clang-tools-extra/trunk/clangd/index/SymbolYAML.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/SymbolYAML.h?rev=341542&r1=341541&r2=341542&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/SymbolYAML.h (original)
+++ clang-tools-extra/trunk/clangd/index/SymbolYAML.h Thu Sep  6 05:54:43 2018
@@ -45,6 +45,7 @@ void SymbolsToYAML(const SymbolSlab &Sym
 // The size of global symbols should be relatively small, so that all symbols
 // can be managed in memory.
 std::unique_ptr<SymbolIndex> loadIndex(llvm::StringRef SymbolFilename,
+                                       llvm::ArrayRef<std::string> URISchemes,
                                        bool UseDex = true);
 
 } // namespace clangd

Modified: clang-tools-extra/trunk/clangd/index/dex/DexIndex.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/dex/DexIndex.cpp?rev=341542&r1=341541&r2=341542&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/dex/DexIndex.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/dex/DexIndex.cpp Thu Sep  6 05:54:43 2018
@@ -8,8 +8,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "DexIndex.h"
+#include "../../FileDistance.h"
 #include "../../FuzzyMatch.h"
 #include "../../Logger.h"
+#include "../../Quality.h"
+#include "llvm/ADT/StringSet.h"
 #include <algorithm>
 #include <queue>
 
@@ -26,28 +29,87 @@ namespace {
 // Returns the tokens which are given symbols's characteristics. For example,
 // trigrams and scopes.
 // FIXME(kbobyrev): Support more token types:
-// * Path proximity
 // * Types
+// * Namespace proximity
 std::vector<Token> generateSearchTokens(const Symbol &Sym) {
   std::vector<Token> Result = generateIdentifierTrigrams(Sym.Name);
   Result.emplace_back(Token::Kind::Scope, Sym.Scope);
+  // Skip token generation for symbols with unknown declaration location.
+  if (!Sym.CanonicalDeclaration.FileURI.empty())
+    for (const auto &ProximityURI :
+         generateProximityURIs(Sym.CanonicalDeclaration.FileURI))
+      Result.emplace_back(Token::Kind::ProximityURI, ProximityURI);
   return Result;
 }
 
+// Constructs BOOST iterators for Path Proximities.
+std::vector<std::unique_ptr<Iterator>> createFileProximityIterators(
+    llvm::ArrayRef<std::string> ProximityPaths,
+    llvm::ArrayRef<std::string> URISchemes,
+    const llvm::DenseMap<Token, PostingList> &InvertedIndex) {
+  std::vector<std::unique_ptr<Iterator>> BoostingIterators;
+  // Deduplicate parent URIs extracted from the ProximityPaths.
+  llvm::StringSet<> ParentURIs;
+  llvm::StringMap<SourceParams> Sources;
+  for (const auto &Path : ProximityPaths) {
+    Sources[Path] = SourceParams();
+    auto PathURI = URI::create(Path, URISchemes);
+    if (!PathURI) {
+      elog("Given ProximityPath {0} is can not be converted to any known URI "
+           "scheme. fuzzyFind request will ignore it.",
+           Path);
+      llvm::consumeError(PathURI.takeError());
+    }
+    const auto PathProximityURIs = generateProximityURIs(PathURI->toString());
+    for (const auto &ProximityURI : PathProximityURIs)
+      ParentURIs.insert(ProximityURI);
+  }
+  // Use SymbolRelevanceSignals for symbol relevance evaluation: use defaults
+  // for all parameters except for Proximity Path distance signal.
+  SymbolRelevanceSignals PathProximitySignals;
+  // DistanceCalculator will find the shortest distance from ProximityPaths to
+  // any URI extracted from the ProximityPaths.
+  URIDistance DistanceCalculator(Sources);
+  PathProximitySignals.FileProximityMatch = &DistanceCalculator;
+  // Try to build BOOST iterator for each Proximity Path provided by
+  // ProximityPaths. Boosting factor should depend on the distance to the
+  // Proximity Path: the closer processed path is, the higher boosting factor.
+  for (const auto &ParentURI : ParentURIs.keys()) {
+    const auto It =
+        InvertedIndex.find(Token(Token::Kind::ProximityURI, ParentURI));
+    if (It != InvertedIndex.end()) {
+      // FIXME(kbobyrev): Append LIMIT on top of every BOOST iterator.
+      PathProximitySignals.SymbolURI = ParentURI;
+      BoostingIterators.push_back(
+          createBoost(create(It->second), PathProximitySignals.evaluate()));
+    }
+  }
+  return BoostingIterators;
+}
+
 } // namespace
 
 void DexIndex::buildIndex() {
-  for (const Symbol *Sym : Symbols) {
+  std::vector<std::pair<float, const Symbol *>> ScoredSymbols(Symbols.size());
+
+  for (size_t I = 0; I < Symbols.size(); ++I) {
+    const Symbol *Sym = Symbols[I];
     LookupTable[Sym->ID] = Sym;
-    SymbolQuality[Sym] = quality(*Sym);
+    ScoredSymbols[I] = {quality(*Sym), Sym};
   }
 
   // Symbols are sorted by symbol qualities so that items in the posting lists
   // are stored in the descending order of symbol quality.
-  std::sort(begin(Symbols), end(Symbols),
-            [&](const Symbol *LHS, const Symbol *RHS) {
-              return SymbolQuality[LHS] > SymbolQuality[RHS];
-            });
+  std::sort(begin(ScoredSymbols), end(ScoredSymbols),
+            std::greater<std::pair<float, const Symbol *>>());
+
+  // SymbolQuality was empty up until now.
+  SymbolQuality.resize(Symbols.size());
+  // Populate internal storage using Symbol + Score pairs.
+  for (size_t I = 0; I < ScoredSymbols.size(); ++I) {
+    SymbolQuality[I] = ScoredSymbols[I].first;
+    Symbols[I] = ScoredSymbols[I].second;
+  }
 
   // Populate TempInvertedIndex with posting lists for index symbols.
   for (DocID SymbolRank = 0; SymbolRank < Symbols.size(); ++SymbolRank) {
@@ -96,6 +158,17 @@ bool DexIndex::fuzzyFind(
   if (!ScopeIterators.empty())
     TopLevelChildren.push_back(createOr(move(ScopeIterators)));
 
+  // Add proximity paths boosting.
+  auto BoostingIterators = createFileProximityIterators(
+      Req.ProximityPaths, URISchemes, InvertedIndex);
+  // Boosting iterators do not actually filter symbols. In order to preserve
+  // the validity of resulting query, TRUE iterator should be added along
+  // BOOSTs.
+  if (!BoostingIterators.empty()) {
+    BoostingIterators.push_back(createTrue(Symbols.size()));
+    TopLevelChildren.push_back(createOr(move(BoostingIterators)));
+  }
+
   // Use TRUE iterator if both trigrams and scopes from the query are not
   // present in the symbol index.
   auto QueryIterator = TopLevelChildren.empty()
@@ -106,32 +179,36 @@ bool DexIndex::fuzzyFind(
   // FIXME(kbobyrev): Pre-scoring retrieval threshold should be adjusted as
   // using 100x of the requested number might not be good in practice, e.g.
   // when the requested number of items is small.
-  const unsigned ItemsToRetrieve = 100 * Req.MaxCandidateCount;
+  const size_t ItemsToRetrieve = 100 * Req.MaxCandidateCount;
   auto Root = createLimit(move(QueryIterator), ItemsToRetrieve);
-  // FIXME(kbobyrev): Add boosting to the query and utilize retrieved
-  // boosting scores.
-  std::vector<std::pair<DocID, float>> SymbolDocIDs = consume(*Root);
-
-  // Retrieve top Req.MaxCandidateCount items.
-  std::priority_queue<std::pair<float, const Symbol *>> Top;
-  for (const auto &P : SymbolDocIDs) {
-    const DocID SymbolDocID = P.first;
+
+  using IDAndScore = std::pair<DocID, float>;
+  std::vector<IDAndScore> IDAndScores = consume(*Root);
+
+  auto Compare = [](const IDAndScore &LHS, const IDAndScore &RHS) {
+    return LHS.second > RHS.second;
+  };
+  TopN<IDAndScore, decltype(Compare)> Top(Req.MaxCandidateCount, Compare);
+  for (const auto &IDAndScore : IDAndScores) {
+    const DocID SymbolDocID = IDAndScore.first;
     const auto *Sym = Symbols[SymbolDocID];
     const llvm::Optional<float> Score = Filter.match(Sym->Name);
     if (!Score)
       continue;
-    // Multiply score by a negative factor so that Top stores items with the
-    // highest actual score.
-    Top.emplace(-(*Score) * SymbolQuality.find(Sym)->second, Sym);
-    if (Top.size() > Req.MaxCandidateCount) {
+    // Combine Fuzzy Matching score, precomputed symbol quality and boosting
+    // score for a cumulative final symbol score.
+    const float FinalScore =
+        (*Score) * SymbolQuality[SymbolDocID] * IDAndScore.second;
+    // If Top.push(...) returns true, it means that it had to pop an item. In
+    // this case, it is possible to retrieve more symbols.
+    if (Top.push({SymbolDocID, FinalScore}))
       More = true;
-      Top.pop();
-    }
   }
 
-  // Apply callback to the top Req.MaxCandidateCount items.
-  for (; !Top.empty(); Top.pop())
-    Callback(*Top.top().second);
+  // Apply callback to the top Req.MaxCandidateCount items in the descending
+  // order of cumulative score.
+  for (const auto &Item : std::move(Top).items())
+    Callback(*Symbols[Item.first]);
   return More;
 }
 
@@ -161,6 +238,33 @@ size_t DexIndex::estimateMemoryUsage() c
   return Bytes;
 }
 
+std::vector<std::string> generateProximityURIs(llvm::StringRef URIPath) {
+  std::vector<std::string> Result;
+  auto ParsedURI = URI::parse(URIPath);
+  assert(ParsedURI &&
+         "Non-empty argument of generateProximityURIs() should be a valid "
+         "URI.");
+  StringRef Body = ParsedURI->body();
+  // FIXME(kbobyrev): Currently, this is a heuristic which defines the maximum
+  // size of resulting vector. Some projects might want to have higher limit if
+  // the file hierarchy is deeper. For the generic case, it would be useful to
+  // calculate Limit in the index build stage by calculating the maximum depth
+  // of the project source tree at runtime.
+  size_t Limit = 5;
+  // Insert original URI before the loop: this would save a redundant iteration
+  // with a URI parse.
+  Result.emplace_back(ParsedURI->toString());
+  while (!Body.empty() && --Limit > 0) {
+    // FIXME(kbobyrev): Parsing and encoding path to URIs is not necessary and
+    // could be optimized.
+    Body = llvm::sys::path::parent_path(Body, llvm::sys::path::Style::posix);
+    URI TokenURI(ParsedURI->scheme(), ParsedURI->authority(), Body);
+    if (!Body.empty())
+      Result.emplace_back(TokenURI.toString());
+  }
+  return Result;
+}
+
 } // namespace dex
 } // namespace clangd
 } // namespace clang

Modified: clang-tools-extra/trunk/clangd/index/dex/DexIndex.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/dex/DexIndex.h?rev=341542&r1=341541&r2=341542&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/dex/DexIndex.h (original)
+++ clang-tools-extra/trunk/clangd/index/dex/DexIndex.h Thu Sep  6 05:54:43 2018
@@ -39,22 +39,26 @@ namespace dex {
 class DexIndex : public SymbolIndex {
 public:
   // All symbols must outlive this index.
-  template <typename Range> DexIndex(Range &&Symbols) {
+  template <typename Range>
+  DexIndex(Range &&Symbols, llvm::ArrayRef<std::string> URISchemes)
+      : URISchemes(URISchemes) {
     for (auto &&Sym : Symbols)
       this->Symbols.push_back(&Sym);
     buildIndex();
   }
   // Symbols are owned by BackingData, Index takes ownership.
   template <typename Range, typename Payload>
-  DexIndex(Range &&Symbols, Payload &&BackingData)
-      : DexIndex(std::forward<Range>(Symbols)) {
+  DexIndex(Range &&Symbols, Payload &&BackingData,
+           llvm::ArrayRef<std::string> URISchemes)
+      : DexIndex(std::forward<Range>(Symbols), URISchemes) {
     KeepAlive = std::shared_ptr<void>(
         std::make_shared<Payload>(std::move(BackingData)), nullptr);
   }
 
   /// Builds an index from a slab. The index takes ownership of the slab.
-  static std::unique_ptr<SymbolIndex> build(SymbolSlab Slab) {
-    return llvm::make_unique<DexIndex>(Slab, std::move(Slab));
+  static std::unique_ptr<SymbolIndex>
+  build(SymbolSlab Slab, llvm::ArrayRef<std::string> URISchemes) {
+    return llvm::make_unique<DexIndex>(Slab, std::move(Slab), URISchemes);
   }
 
   bool
@@ -72,21 +76,31 @@ public:
 private:
   void buildIndex();
 
+  /// Stores symbols sorted in the descending order of symbol quality..
   std::vector<const Symbol *> Symbols;
+  /// SymbolQuality[I] is the quality of Symbols[I].
+  std::vector<float> SymbolQuality;
   llvm::DenseMap<SymbolID, const Symbol *> LookupTable;
-  llvm::DenseMap<const Symbol *, float> SymbolQuality;
-  // Inverted index is a mapping from the search token to the posting list,
-  // which contains all items which can be characterized by such search token.
-  // For example, if the search token is scope "std::", the corresponding
-  // posting list would contain all indices of symbols defined in namespace std.
-  // Inverted index is used to retrieve posting lists which are processed during
-  // the fuzzyFind process.
+  /// Inverted index is a mapping from the search token to the posting list,
+  /// which contains all items which can be characterized by such search token.
+  /// For example, if the search token is scope "std::", the corresponding
+  /// posting list would contain all indices of symbols defined in namespace
+  /// std. Inverted index is used to retrieve posting lists which are processed
+  /// during the fuzzyFind process.
   llvm::DenseMap<Token, PostingList> InvertedIndex;
   std::shared_ptr<void> KeepAlive; // poor man's move-only std::any
+
+  const std::vector<std::string> URISchemes;
 };
 
+/// Returns Search Token for a number of parent directories of given Path.
+/// Should be used within the index build process.
+///
+/// This function is exposed for testing only.
+std::vector<std::string> generateProximityURIs(llvm::StringRef URIPath);
+
 } // namespace dex
 } // namespace clangd
 } // namespace clang
 
-#endif
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEXINDEX_H

Modified: clang-tools-extra/trunk/clangd/index/dex/Token.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/dex/Token.h?rev=341542&r1=341541&r2=341542&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/dex/Token.h (original)
+++ clang-tools-extra/trunk/clangd/index/dex/Token.h Thu Sep  6 05:54:43 2018
@@ -41,6 +41,10 @@ struct Token {
   /// Kind specifies Token type which defines semantics for the internal
   /// representation. Each Kind has different representation stored in Data
   /// field.
+  // FIXME(kbobyrev): Storing Data hash would be more efficient than storing raw
+  // strings. For example, PathURI store URIs of each directory and its parents,
+  // which induces a lot of overhead because these paths tend to be long and
+  // each parent directory is a prefix.
   enum class Kind {
     /// Represents trigram used for fuzzy search of unqualified symbol names.
     ///
@@ -48,15 +52,20 @@ struct Token {
     Trigram,
     /// Scope primitives, e.g. "symbol belongs to namespace foo::bar".
     ///
-    /// Data stroes full scope name , e.g. "foo::bar::baz::" or "" (for global
+    /// Data stroes full scope name, e.g. "foo::bar::baz::" or "" (for global
     /// scope).
     Scope,
+    /// Path Proximity URI to symbol declaration.
+    ///
+    /// Data stores path URI of symbol declaration file or its parent.
+    ///
+    /// Example: "file:///path/to/clang-tools-extra/clangd/index/SymbolIndex.h"
+    /// and some amount of its parents.
+    ProximityURI,
     /// Internal Token type for invalid/special tokens, e.g. empty tokens for
     /// llvm::DenseMap.
     Sentinel,
     /// FIXME(kbobyrev): Add other Token Kinds
-    /// * Path with full or relative path to the directory in which symbol is
-    ///   defined
     /// * Type with qualified type name or its USR
   };
 

Modified: clang-tools-extra/trunk/clangd/tool/ClangdMain.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/tool/ClangdMain.cpp?rev=341542&r1=341541&r2=341542&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/tool/ClangdMain.cpp (original)
+++ clang-tools-extra/trunk/clangd/tool/ClangdMain.cpp Thu Sep  6 05:54:43 2018
@@ -275,8 +275,8 @@ int main(int argc, char *argv[]) {
     // Load the index asynchronously. Meanwhile SwapIndex returns no results.
     SwapIndex *Placeholder;
     StaticIdx.reset(Placeholder = new SwapIndex(llvm::make_unique<MemIndex>()));
-    runAsync<void>([Placeholder] {
-      if (auto Idx = loadIndex(YamlSymbolFile))
+    runAsync<void>([Placeholder, &Opts] {
+      if (auto Idx = loadIndex(YamlSymbolFile, Opts.URISchemes, UseDex))
         Placeholder->reset(std::move(Idx));
     });
   }

Modified: clang-tools-extra/trunk/unittests/clangd/DexIndexTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/DexIndexTests.cpp?rev=341542&r1=341541&r2=341542&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/DexIndexTests.cpp (original)
+++ clang-tools-extra/trunk/unittests/clangd/DexIndexTests.cpp Thu Sep  6 05:54:43 2018
@@ -7,6 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "FuzzyMatch.h"
+#include "TestFS.h"
 #include "TestIndex.h"
 #include "index/Index.h"
 #include "index/Merge.h"
@@ -30,6 +32,12 @@ namespace clangd {
 namespace dex {
 namespace {
 
+std::vector<std::string> URISchemes = {"unittest"};
+
+//===----------------------------------------------------------------------===//
+// Query iterator tests.
+//===----------------------------------------------------------------------===//
+
 std::vector<DocID> consumeIDs(Iterator &It) {
   auto IDAndScore = consume(It);
   std::vector<DocID> IDs(IDAndScore.size());
@@ -325,15 +333,24 @@ TEST(DexIndexIterators, Boost) {
   EXPECT_THAT(ElementBoost, 3);
 }
 
+//===----------------------------------------------------------------------===//
+// Search token tests.
+//===----------------------------------------------------------------------===//
+
 testing::Matcher<std::vector<Token>>
-trigramsAre(std::initializer_list<std::string> Trigrams) {
+tokensAre(std::initializer_list<std::string> Strings, Token::Kind Kind) {
   std::vector<Token> Tokens;
-  for (const auto &Symbols : Trigrams) {
-    Tokens.push_back(Token(Token::Kind::Trigram, Symbols));
+  for (const auto &TokenData : Strings) {
+    Tokens.push_back(Token(Kind, TokenData));
   }
   return testing::UnorderedElementsAreArray(Tokens);
 }
 
+testing::Matcher<std::vector<Token>>
+trigramsAre(std::initializer_list<std::string> Trigrams) {
+  return tokensAre(Trigrams, Token::Kind::Trigram);
+}
+
 TEST(DexIndexTrigrams, IdentifierTrigrams) {
   EXPECT_THAT(generateIdentifierTrigrams("X86"),
               trigramsAre({"x86", "x$$", "x8$"}));
@@ -408,8 +425,25 @@ TEST(DexIndexTrigrams, QueryTrigrams) {
                            "hij", "ijk", "jkl", "klm"}));
 }
 
+TEST(DexSearchTokens, SymbolPath) {
+  EXPECT_THAT(generateProximityURIs(
+                  "unittest:///clang-tools-extra/clangd/index/Token.h"),
+              ElementsAre("unittest:///clang-tools-extra/clangd/index/Token.h",
+                          "unittest:///clang-tools-extra/clangd/index",
+                          "unittest:///clang-tools-extra/clangd",
+                          "unittest:///clang-tools-extra", "unittest:///"));
+
+  EXPECT_THAT(generateProximityURIs("unittest:///a/b/c.h"),
+              ElementsAre("unittest:///a/b/c.h", "unittest:///a/b",
+                          "unittest:///a", "unittest:///"));
+}
+
+//===----------------------------------------------------------------------===//
+// Index tests.
+//===----------------------------------------------------------------------===//
+
 TEST(DexIndex, Lookup) {
-  auto I = DexIndex::build(generateSymbols({"ns::abc", "ns::xyz"}));
+  auto I = DexIndex::build(generateSymbols({"ns::abc", "ns::xyz"}), URISchemes);
   EXPECT_THAT(lookup(*I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
   EXPECT_THAT(lookup(*I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
               UnorderedElementsAre("ns::abc", "ns::xyz"));
@@ -421,7 +455,8 @@ TEST(DexIndex, Lookup) {
 TEST(DexIndex, FuzzyFind) {
   auto Index = DexIndex::build(
       generateSymbols({"ns::ABC", "ns::BCD", "::ABC", "ns::nested::ABC",
-                       "other::ABC", "other::A"}));
+                       "other::ABC", "other::A"}),
+      URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "ABC";
   Req.Scopes = {"ns::"};
@@ -443,7 +478,8 @@ TEST(DexIndex, FuzzyFind) {
 
 TEST(DexIndexTest, FuzzyMatchQ) {
   auto I = DexIndex::build(
-      generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}));
+      generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}),
+      URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "lol";
   Req.MaxCandidateCount = 2;
@@ -461,12 +497,12 @@ TEST(DexIndexTest, DexIndexDeduplicate)
                                  symbol("2") /* duplicate */};
   FuzzyFindRequest Req;
   Req.Query = "2";
-  DexIndex I(Symbols);
+  DexIndex I(Symbols, URISchemes);
   EXPECT_THAT(match(I, Req), ElementsAre("2", "2"));
 }
 
 TEST(DexIndexTest, DexIndexLimitedNumMatches) {
-  auto I = DexIndex::build(generateNumSymbols(0, 100));
+  auto I = DexIndex::build(generateNumSymbols(0, 100), URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "5";
   Req.MaxCandidateCount = 3;
@@ -478,7 +514,8 @@ TEST(DexIndexTest, DexIndexLimitedNumMat
 
 TEST(DexIndexTest, FuzzyMatch) {
   auto I = DexIndex::build(
-      generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}));
+      generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}),
+      URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "lol";
   Req.MaxCandidateCount = 2;
@@ -487,14 +524,16 @@ TEST(DexIndexTest, FuzzyMatch) {
 }
 
 TEST(DexIndexTest, MatchQualifiedNamesWithoutSpecificScope) {
-  auto I = DexIndex::build(generateSymbols({"a::y1", "b::y2", "y3"}));
+  auto I =
+      DexIndex::build(generateSymbols({"a::y1", "b::y2", "y3"}), URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "y";
   EXPECT_THAT(match(*I, Req), UnorderedElementsAre("a::y1", "b::y2", "y3"));
 }
 
 TEST(DexIndexTest, MatchQualifiedNamesWithGlobalScope) {
-  auto I = DexIndex::build(generateSymbols({"a::y1", "b::y2", "y3"}));
+  auto I =
+      DexIndex::build(generateSymbols({"a::y1", "b::y2", "y3"}), URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "y";
   Req.Scopes = {""};
@@ -503,7 +542,7 @@ TEST(DexIndexTest, MatchQualifiedNamesWi
 
 TEST(DexIndexTest, MatchQualifiedNamesWithOneScope) {
   auto I = DexIndex::build(
-      generateSymbols({"a::y1", "a::y2", "a::x", "b::y2", "y3"}));
+      generateSymbols({"a::y1", "a::y2", "a::x", "b::y2", "y3"}), URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "y";
   Req.Scopes = {"a::"};
@@ -512,7 +551,7 @@ TEST(DexIndexTest, MatchQualifiedNamesWi
 
 TEST(DexIndexTest, MatchQualifiedNamesWithMultipleScopes) {
   auto I = DexIndex::build(
-      generateSymbols({"a::y1", "a::y2", "a::x", "b::y3", "y3"}));
+      generateSymbols({"a::y1", "a::y2", "a::x", "b::y3", "y3"}), URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "y";
   Req.Scopes = {"a::", "b::"};
@@ -520,7 +559,7 @@ TEST(DexIndexTest, MatchQualifiedNamesWi
 }
 
 TEST(DexIndexTest, NoMatchNestedScopes) {
-  auto I = DexIndex::build(generateSymbols({"a::y1", "a::b::y2"}));
+  auto I = DexIndex::build(generateSymbols({"a::y1", "a::b::y2"}), URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "y";
   Req.Scopes = {"a::"};
@@ -528,7 +567,7 @@ TEST(DexIndexTest, NoMatchNestedScopes)
 }
 
 TEST(DexIndexTest, IgnoreCases) {
-  auto I = DexIndex::build(generateSymbols({"ns::ABC", "ns::abc"}));
+  auto I = DexIndex::build(generateSymbols({"ns::ABC", "ns::abc"}), URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "AB";
   Req.Scopes = {"ns::"};
@@ -536,7 +575,7 @@ TEST(DexIndexTest, IgnoreCases) {
 }
 
 TEST(DexIndexTest, Lookup) {
-  auto I = DexIndex::build(generateSymbols({"ns::abc", "ns::xyz"}));
+  auto I = DexIndex::build(generateSymbols({"ns::abc", "ns::xyz"}), URISchemes);
   EXPECT_THAT(lookup(*I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
   EXPECT_THAT(lookup(*I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
               UnorderedElementsAre("ns::abc", "ns::xyz"));
@@ -545,6 +584,31 @@ TEST(DexIndexTest, Lookup) {
   EXPECT_THAT(lookup(*I, SymbolID("ns::nonono")), UnorderedElementsAre());
 }
 
+TEST(DexIndexTest, ProximityPathsBoosting) {
+  auto RootSymbol = symbol("root::abc");
+  RootSymbol.CanonicalDeclaration.FileURI = "unittest:///file.h";
+  auto CloseSymbol = symbol("close::abc");
+  CloseSymbol.CanonicalDeclaration.FileURI = "unittest:///a/b/c/d/e/f/file.h";
+
+  std::vector<Symbol> Symbols{CloseSymbol, RootSymbol};
+  DexIndex I(Symbols, URISchemes);
+
+  FuzzyFindRequest Req;
+  Req.Query = "abc";
+  // The best candidate can change depending on the proximity paths.
+  Req.MaxCandidateCount = 1;
+
+  // FuzzyFind request comes from the file which is far from the root: expect
+  // CloseSymbol to come out.
+  Req.ProximityPaths = {testPath("a/b/c/d/e/f/file.h")};
+  EXPECT_THAT(match(I, Req), ElementsAre("close::abc"));
+
+  // FuzzyFind request comes from the file which is close to the root: expect
+  // RootSymbol to come out.
+  Req.ProximityPaths = {testPath("file.h")};
+  EXPECT_THAT(match(I, Req), ElementsAre("root::abc"));
+}
+
 } // namespace
 } // namespace dex
 } // namespace clangd




More information about the cfe-commits mailing list