[clang-tools-extra] r343802 - [clangd] Simplify Dex query tree logic and fix missing-posting-list bug

Sam McCall via cfe-commits cfe-commits at lists.llvm.org
Thu Oct 4 10:18:55 PDT 2018


Author: sammccall
Date: Thu Oct  4 10:18:55 2018
New Revision: 343802

URL: http://llvm.org/viewvc/llvm-project?rev=343802&view=rev
Log:
[clangd] Simplify Dex query tree logic and fix missing-posting-list bug

Summary:
The bug being fixed: when a posting list doesn't exist in the index, it
was previously just dropped from the query rather than being treated as
empty. Now that we have the FALSE iterator, we can use it instead.

The query tree logic previously had a bunch of special cases to detect whether
subtrees are empty. Now we just naively build the whole tree, and rely
on the query optimizations to drop the trivial parts.

Finally, there was a bug in trigram generation: the empty query would
generate a single trigram "$$$" instead of no trigrams.
This had no effect (there was no posting list, so the other bug
cancelled it out). But we now have to fix this bug too.

Reviewers: ilya-biryukov

Subscribers: ioeric, MaskRay, jkorous, arphaman, kadircet, cfe-commits

Differential Revision: https://reviews.llvm.org/D52796

Modified:
    clang-tools-extra/trunk/clangd/index/dex/Dex.cpp
    clang-tools-extra/trunk/clangd/index/dex/Dex.h
    clang-tools-extra/trunk/clangd/index/dex/Iterator.h
    clang-tools-extra/trunk/clangd/index/dex/Trigram.cpp
    clang-tools-extra/trunk/unittests/clangd/DexTests.cpp

Modified: clang-tools-extra/trunk/clangd/index/dex/Dex.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/dex/Dex.cpp?rev=343802&r1=343801&r2=343802&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/dex/Dex.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/dex/Dex.cpp Thu Oct  4 10:18:55 2018
@@ -62,7 +62,7 @@ std::vector<Token> generateSearchTokens(
 }
 
 // Constructs BOOST iterators for Path Proximities.
-std::vector<std::unique_ptr<Iterator>> createFileProximityIterators(
+std::unique_ptr<Iterator> createFileProximityIterator(
     llvm::ArrayRef<std::string> ProximityPaths,
     llvm::ArrayRef<std::string> URISchemes,
     const llvm::DenseMap<Token, PostingList> &InvertedIndex,
@@ -105,7 +105,8 @@ std::vector<std::unique_ptr<Iterator>> c
           It->second.iterator(&It->first), PathProximitySignals.evaluate()));
     }
   }
-  return BoostingIterators;
+  BoostingIterators.push_back(Corpus.all());
+  return Corpus.unionOf(std::move(BoostingIterators));
 }
 
 } // namespace
@@ -147,6 +148,12 @@ void Dex::buildIndex() {
         {TokenToPostingList.first, PostingList(TokenToPostingList.second)});
 }
 
+std::unique_ptr<Iterator> Dex::iterator(const Token &Tok) const {
+  auto It = InvertedIndex.find(Tok);
+  return It == InvertedIndex.end() ? Corpus.none()
+                                   : It->second.iterator(&It->first);
+}
+
 /// Constructs iterators over tokens extracted from the query and exhausts it
 /// while applying Callback to each symbol in the order of decreasing quality
 /// of the matched symbols.
@@ -160,64 +167,40 @@ bool Dex::fuzzyFind(const FuzzyFindReque
   // Prevent clients from postfiltering them for longer queries.
   bool More = !Req.Query.empty() && Req.Query.size() < 3;
 
-  std::vector<std::unique_ptr<Iterator>> TopLevelChildren;
+  std::vector<std::unique_ptr<Iterator>> Criteria;
   const auto TrigramTokens = generateQueryTrigrams(Req.Query);
 
   // Generate query trigrams and construct AND iterator over all query
   // trigrams.
   std::vector<std::unique_ptr<Iterator>> TrigramIterators;
-  for (const auto &Trigram : TrigramTokens) {
-    const auto It = InvertedIndex.find(Trigram);
-    if (It != InvertedIndex.end())
-      TrigramIterators.push_back(It->second.iterator(&It->first));
-  }
-  if (!TrigramIterators.empty())
-    TopLevelChildren.push_back(Corpus.intersect(move(TrigramIterators)));
+  for (const auto &Trigram : TrigramTokens)
+    TrigramIterators.push_back(iterator(Trigram));
+  Criteria.push_back(Corpus.intersect(move(TrigramIterators)));
 
   // Generate scope tokens for search query.
   std::vector<std::unique_ptr<Iterator>> ScopeIterators;
-  for (const auto &Scope : Req.Scopes) {
-    Token Tok(Token::Kind::Scope, Scope);
-    const auto It = InvertedIndex.find(Tok);
-    if (It != InvertedIndex.end())
-      ScopeIterators.push_back(It->second.iterator(&It->first));
-  }
-  if (Req.AnyScope)
+  for (const auto &Scope : Req.Scopes)
+    ScopeIterators.push_back(iterator(Token(Token::Kind::Scope, Scope)));
+  if (Req.AnyScope || /*legacy*/ Req.Scopes.empty())
     ScopeIterators.push_back(
         Corpus.boost(Corpus.all(), ScopeIterators.empty() ? 1.0 : 0.2));
+  Criteria.push_back(Corpus.unionOf(move(ScopeIterators)));
 
-  // Add OR iterator for scopes if there are any Scope Iterators.
-  if (!ScopeIterators.empty())
-    TopLevelChildren.push_back(Corpus.unionOf(move(ScopeIterators)));
-
-  // Add proximity paths boosting.
-  auto BoostingIterators = createFileProximityIterators(
-      Req.ProximityPaths, URISchemes, InvertedIndex, Corpus);
-  // Boosting iterators do not actually filter symbols. In order to preserve
-  // the validity of resulting query, TRUE iterator should be added along
-  // BOOSTs.
-  if (!BoostingIterators.empty()) {
-    BoostingIterators.push_back(Corpus.all());
-    TopLevelChildren.push_back(Corpus.unionOf(move(BoostingIterators)));
-  }
+  // Add proximity paths boosting (all symbols, some boosted).
+  Criteria.push_back(createFileProximityIterator(Req.ProximityPaths, URISchemes,
+                                                 InvertedIndex, Corpus));
 
   if (Req.RestrictForCodeCompletion)
-    TopLevelChildren.push_back(
-        InvertedIndex.find(RestrictedForCodeCompletion)
-            ->second.iterator(&RestrictedForCodeCompletion));
+    Criteria.push_back(iterator(RestrictedForCodeCompletion));
 
   // Use TRUE iterator if both trigrams and scopes from the query are not
   // present in the symbol index.
-  auto QueryIterator = TopLevelChildren.empty()
-                           ? Corpus.all()
-                           : Corpus.intersect(move(TopLevelChildren));
+  auto Root = Corpus.intersect(move(Criteria));
   // Retrieve more items than it was requested: some of  the items with high
   // final score might not be retrieved otherwise.
-  // FIXME(kbobyrev): Pre-scoring retrieval threshold should be adjusted as
-  // using 100x of the requested number might not be good in practice, e.g.
-  // when the requested number of items is small.
-  auto Root = Req.Limit ? Corpus.limit(move(QueryIterator), *Req.Limit * 100)
-                        : move(QueryIterator);
+  // FIXME(kbobyrev): Tune this ratio.
+  if (Req.Limit)
+    Root = Corpus.limit(move(Root), *Req.Limit * 100);
   SPAN_ATTACH(Tracer, "query", llvm::to_string(*Root));
   vlog("Dex query tree: {0}", *Root);
 

Modified: clang-tools-extra/trunk/clangd/index/dex/Dex.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/dex/Dex.h?rev=343802&r1=343801&r2=343802&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/dex/Dex.h (original)
+++ clang-tools-extra/trunk/clangd/index/dex/Dex.h Thu Oct  4 10:18:55 2018
@@ -87,6 +87,7 @@ public:
 
 private:
   void buildIndex();
+  std::unique_ptr<Iterator> iterator(const Token &Tok) const;
 
   /// Stores symbols sorted in the descending order of symbol quality..
   std::vector<const Symbol *> Symbols;

Modified: clang-tools-extra/trunk/clangd/index/dex/Iterator.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/dex/Iterator.h?rev=343802&r1=343801&r2=343802&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/dex/Iterator.h (original)
+++ clang-tools-extra/trunk/clangd/index/dex/Iterator.h Thu Oct  4 10:18:55 2018
@@ -106,7 +106,6 @@ protected:
   Iterator(Kind MyKind = Kind::Other) : MyKind(MyKind) {}
 
 private:
-  Kind MyKind;
   virtual llvm::raw_ostream &dump(llvm::raw_ostream &OS) const = 0;
   Kind MyKind;
 };

Modified: clang-tools-extra/trunk/clangd/index/dex/Trigram.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/dex/Trigram.cpp?rev=343802&r1=343801&r2=343802&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/dex/Trigram.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/dex/Trigram.cpp Thu Oct  4 10:18:55 2018
@@ -87,6 +87,8 @@ std::vector<Token> generateIdentifierTri
 }
 
 std::vector<Token> generateQueryTrigrams(llvm::StringRef Query) {
+  if (Query.empty())
+    return {};
   std::string LowercaseQuery = Query.lower();
   if (Query.size() < 3) // short-query trigrams only
     return {Token(Token::Kind::Trigram, LowercaseQuery)};

Modified: clang-tools-extra/trunk/unittests/clangd/DexTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/DexTests.cpp?rev=343802&r1=343801&r2=343802&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/DexTests.cpp (original)
+++ clang-tools-extra/trunk/unittests/clangd/DexTests.cpp Thu Oct  4 10:18:55 2018
@@ -408,6 +408,7 @@ TEST(DexTrigrams, QueryTrigrams) {
   EXPECT_THAT(generateQueryTrigrams("cl"), trigramsAre({"cl"}));
   EXPECT_THAT(generateQueryTrigrams("cla"), trigramsAre({"cla"}));
 
+  EXPECT_THAT(generateQueryTrigrams(""), trigramsAre({}));
   EXPECT_THAT(generateQueryTrigrams("_"), trigramsAre({"_"}));
   EXPECT_THAT(generateQueryTrigrams("__"), trigramsAre({"__"}));
   EXPECT_THAT(generateQueryTrigrams("___"), trigramsAre({}));
@@ -526,8 +527,6 @@ TEST(DexTest, FuzzyMatch) {
               UnorderedElementsAre("LaughingOutLoud", "LittleOldLady"));
 }
 
-// TODO(sammccall): enable after D52796 bugfix.
-#if 0
 TEST(DexTest, ShortQuery) {
   auto I =
       Dex::build(generateSymbols({"OneTwoThreeFour"}), RefSlab(), URISchemes);
@@ -549,7 +548,6 @@ TEST(DexTest, ShortQuery) {
   EXPECT_THAT(match(*I, Req, &Incomplete), ElementsAre("OneTwoThreeFour"));
   EXPECT_FALSE(Incomplete) << "3-char string is not a short query";
 }
-#endif
 
 TEST(DexTest, MatchQualifiedNamesWithoutSpecificScope) {
   auto I = Dex::build(generateSymbols({"a::y1", "b::y2", "y3"}), RefSlab(),
@@ -614,6 +612,15 @@ TEST(DexTest, IgnoreCases) {
   EXPECT_THAT(match(*I, Req), UnorderedElementsAre("ns::ABC", "ns::abc"));
 }
 
+TEST(DexTest, UnknownPostingList) {
+  // Regression test: we used to ignore unknown scopes and accept any symbol.
+  auto I = Dex::build(generateSymbols({"ns::ABC", "ns::abc"}), RefSlab(),
+                      URISchemes);
+  FuzzyFindRequest Req;
+  Req.Scopes = {"ns2::"};
+  EXPECT_THAT(match(*I, Req), UnorderedElementsAre());
+}
+
 TEST(DexTest, Lookup) {
   auto I = Dex::build(generateSymbols({"ns::abc", "ns::xyz"}), RefSlab(), URISchemes);
   EXPECT_THAT(lookup(*I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));




More information about the cfe-commits mailing list