[clang-tools-extra] r340605 - [clangd] Implement LIMIT iterator

Kirill Bobyrev via cfe-commits cfe-commits at lists.llvm.org
Fri Aug 24 04:25:43 PDT 2018


Author: omtcyfz
Date: Fri Aug 24 04:25:43 2018
New Revision: 340605

URL: http://llvm.org/viewvc/llvm-project?rev=340605&view=rev
Log:
[clangd] Implement LIMIT iterator

This patch introduces LIMIT iterator, which is very important for
improving the quality of search query. LIMIT iterators can be applied on
top of BOOST iterators to prevent populating query request with a huge
number of low-quality symbols.

Reviewed by: sammccall

Differential Revision: https://reviews.llvm.org/D51029

Modified:
    clang-tools-extra/trunk/clangd/index/dex/DexIndex.cpp
    clang-tools-extra/trunk/clangd/index/dex/Iterator.cpp
    clang-tools-extra/trunk/clangd/index/dex/Iterator.h
    clang-tools-extra/trunk/unittests/clangd/DexIndexTests.cpp

Modified: clang-tools-extra/trunk/clangd/index/dex/DexIndex.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/dex/DexIndex.cpp?rev=340605&r1=340604&r2=340605&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/dex/DexIndex.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/dex/DexIndex.cpp Fri Aug 24 04:25:43 2018
@@ -128,10 +128,10 @@ bool DexIndex::fuzzyFind(
     // using 100x of the requested number might not be good in practice, e.g.
     // when the requested number of items is small.
     const unsigned ItemsToRetrieve = 100 * Req.MaxCandidateCount;
+    auto Root = createLimit(move(QueryIterator), ItemsToRetrieve);
     // FIXME(kbobyrev): Add boosting to the query and utilize retrieved
     // boosting scores.
-    std::vector<std::pair<DocID, float>> SymbolDocIDs =
-        consume(*QueryIterator, ItemsToRetrieve);
+    std::vector<std::pair<DocID, float>> SymbolDocIDs = consume(*Root);
 
     // Retrieve top Req.MaxCandidateCount items.
     std::priority_queue<std::pair<float, const Symbol *>> Top;

Modified: clang-tools-extra/trunk/clangd/index/dex/Iterator.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/dex/Iterator.cpp?rev=340605&r1=340604&r2=340605&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/dex/Iterator.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/dex/Iterator.cpp Fri Aug 24 04:25:43 2018
@@ -46,7 +46,7 @@ public:
     return *Index;
   }
 
-  float consume(DocID ID) override { return DEFAULT_BOOST_SCORE; }
+  float consume() override { return DEFAULT_BOOST_SCORE; }
 
 private:
   llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
@@ -105,16 +105,12 @@ public:
 
   DocID peek() const override { return Children.front()->peek(); }
 
-  // If not exhausted and points to the given item, consume() returns the
-  // product of Children->consume(ID). Otherwise, DEFAULT_BOOST_SCORE is
-  // returned.
-  float consume(DocID ID) override {
-    if (reachedEnd() || peek() != ID)
-      return DEFAULT_BOOST_SCORE;
+  float consume() override {
+    assert(!reachedEnd() && "AndIterator can't consume() at the end.");
     return std::accumulate(
         begin(Children), end(Children), DEFAULT_BOOST_SCORE,
         [&](float Current, const std::unique_ptr<Iterator> &Child) {
-          return Current * Child->consume(ID);
+          return Current * Child->consume();
         });
   }
 
@@ -226,15 +222,16 @@ public:
 
   // Returns the maximum boosting score among all Children when iterator is not
   // exhausted and points to the given ID, DEFAULT_BOOST_SCORE otherwise.
-  float consume(DocID ID) override {
-    if (reachedEnd() || peek() != ID)
-      return DEFAULT_BOOST_SCORE;
+  float consume() override {
+    assert(!reachedEnd() &&
+           "OrIterator can't consume() after it reached the end.");
+    const DocID ID = peek();
     return std::accumulate(
         begin(Children), end(Children), DEFAULT_BOOST_SCORE,
-        [&](float Current, const std::unique_ptr<Iterator> &Child) {
+        [&](float Boost, const std::unique_ptr<Iterator> &Child) {
           return (!Child->reachedEnd() && Child->peek() == ID)
-                     ? std::max(Current, Child->consume(ID))
-                     : Current;
+                     ? std::max(Boost, Child->consume())
+                     : Boost;
         });
   }
 
@@ -278,7 +275,7 @@ public:
     return Index;
   }
 
-  float consume(DocID) override { return DEFAULT_BOOST_SCORE; }
+  float consume() override { return DEFAULT_BOOST_SCORE; }
 
 private:
   llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
@@ -306,7 +303,7 @@ public:
 
   DocID peek() const override { return Child->peek(); }
 
-  float consume(DocID ID) override { return Child->consume(ID) * Factor; }
+  float consume() override { return Child->consume() * Factor; }
 
 private:
   llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
@@ -318,15 +315,50 @@ private:
   float Factor;
 };
 
+/// This iterator limits the number of items retrieved from the child iterator
+/// on top of the query tree. To ensure that query tree with LIMIT iterators
+/// inside works correctly, users have to call Root->consume(Root->peek()) each
+/// time item is retrieved at the root of query tree.
+class LimitIterator : public Iterator {
+public:
+  LimitIterator(std::unique_ptr<Iterator> Child, size_t Limit)
+      : Child(move(Child)), Limit(Limit), ItemsLeft(Limit) {}
+
+  bool reachedEnd() const override {
+    return ItemsLeft == 0 || Child->reachedEnd();
+  }
+
+  void advance() override { Child->advance(); }
+
+  void advanceTo(DocID ID) override { Child->advanceTo(ID); }
+
+  DocID peek() const override { return Child->peek(); }
+
+  /// Decreases the limit in case the element consumed at top of the query tree
+  /// comes from the underlying iterator.
+  float consume() override {
+    assert(!reachedEnd() && "LimitIterator can't consume at the end.");
+    --ItemsLeft;
+    return Child->consume();
+  }
+
+private:
+  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
+    OS << "(LIMIT " << Limit << '(' << ItemsLeft << ") " << *Child << ')';
+    return OS;
+  }
+
+  std::unique_ptr<Iterator> Child;
+  size_t Limit;
+  size_t ItemsLeft;
+};
+
 } // end namespace
 
-std::vector<std::pair<DocID, float>> consume(Iterator &It, size_t Limit) {
+std::vector<std::pair<DocID, float>> consume(Iterator &It) {
   std::vector<std::pair<DocID, float>> Result;
-  for (size_t Retrieved = 0; !It.reachedEnd() && Retrieved < Limit;
-       It.advance(), ++Retrieved) {
-    DocID Document = It.peek();
-    Result.push_back(std::make_pair(Document, It.consume(Document)));
-  }
+  for (; !It.reachedEnd(); It.advance())
+    Result.push_back(std::make_pair(It.peek(), It.consume()));
   return Result;
 }
 
@@ -353,6 +385,11 @@ std::unique_ptr<Iterator> createBoost(st
   return llvm::make_unique<BoostIterator>(move(Child), Factor);
 }
 
+std::unique_ptr<Iterator> createLimit(std::unique_ptr<Iterator> Child,
+                                      size_t Size) {
+  return llvm::make_unique<LimitIterator>(move(Child), Size);
+}
+
 } // namespace dex
 } // namespace clangd
 } // namespace clang

Modified: clang-tools-extra/trunk/clangd/index/dex/Iterator.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/dex/Iterator.h?rev=340605&r1=340604&r2=340605&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/dex/Iterator.h (original)
+++ clang-tools-extra/trunk/clangd/index/dex/Iterator.h Fri Aug 24 04:25:43 2018
@@ -87,13 +87,14 @@ public:
   ///
   /// Note: reachedEnd() must be false.
   virtual DocID peek() const = 0;
-  /// Retrieves boosting score. Query tree root should pass Root->peek() to this
-  /// function, the parameter is needed to propagate through the tree. Given ID
-  /// should be compared against BOOST iterator peek()s: some of the iterators
-  /// would not point to the item which was propagated to the top of the query
-  /// tree (e.g. if these iterators are branches of OR iterator) and hence
-  /// shouldn't apply any boosting to the consumed item.
-  virtual float consume(DocID ID) = 0;
+  /// Informs the iterator that the current document was consumed, and returns
+  /// its boost.
+  ///
+  /// Note: If this iterator has any child iterators that contain the document,
+  /// consume() should be called on those and their boosts incorporated.
+  /// consume() must *not* be called on children that don't contain the current
+  /// doc.
+  virtual float consume() = 0;
 
   virtual ~Iterator() {}
 
@@ -118,17 +119,15 @@ private:
   virtual llvm::raw_ostream &dump(llvm::raw_ostream &OS) const = 0;
 };
 
-/// Advances the iterator until it is either exhausted or the number of
-/// requested items is reached. Returns pairs of document IDs with the
-/// corresponding boosting score.
+/// Advances the iterator until it is exhausted. Returns pairs of document IDs
+/// with the corresponding boosting score.
 ///
 /// Boosting can be seen as a compromise between retrieving too many items and
 /// calculating finals score for each of them (which might be very expensive)
 /// and not retrieving enough items so that items with very high final score
 /// would not be processed. Boosting score is a computationally efficient way
 /// to acquire preliminary scores of requested items.
-std::vector<std::pair<DocID, float>>
-consume(Iterator &It, size_t Limit = std::numeric_limits<size_t>::max());
+std::vector<std::pair<DocID, float>> consume(Iterator &It);
 
 /// Returns a document iterator over given PostingList.
 ///
@@ -165,6 +164,13 @@ std::unique_ptr<Iterator> createTrue(Doc
 std::unique_ptr<Iterator> createBoost(std::unique_ptr<Iterator> Child,
                                       float Factor);
 
+/// Returns LIMIT iterator, which yields up to N elements of its child iterator.
+/// Elements only count towards the limit if they are part of the final result
+/// set. Therefore the following iterator (AND (2) (LIMIT (1 2) 1)) yields (2),
+/// not ().
+std::unique_ptr<Iterator> createLimit(std::unique_ptr<Iterator> Child,
+                                      size_t Limit);
+
 /// This allows createAnd(create(...), create(...)) syntax.
 template <typename... Args> std::unique_ptr<Iterator> createAnd(Args... args) {
   std::vector<std::unique_ptr<Iterator>> Children;

Modified: clang-tools-extra/trunk/unittests/clangd/DexIndexTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/DexIndexTests.cpp?rev=340605&r1=340604&r2=340605&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/DexIndexTests.cpp (original)
+++ clang-tools-extra/trunk/unittests/clangd/DexIndexTests.cpp Fri Aug 24 04:25:43 2018
@@ -29,9 +29,8 @@ namespace clangd {
 namespace dex {
 namespace {
 
-std::vector<DocID>
-consumeIDs(Iterator &It, size_t Limit = std::numeric_limits<size_t>::max()) {
-  auto IDAndScore = consume(It, Limit);
+std::vector<DocID> consumeIDs(Iterator &It) {
+  auto IDAndScore = consume(It);
   std::vector<DocID> IDs(IDAndScore.size());
   for (size_t I = 0; I < IDAndScore.size(); ++I)
     IDs[I] = IDAndScore[I].first;
@@ -234,13 +233,13 @@ TEST(DexIndexIterators, QueryTree) {
   Root->advanceTo(1);
   Root->advanceTo(0);
   EXPECT_EQ(Root->peek(), 1U);
-  auto ElementBoost = Root->consume(Root->peek());
+  auto ElementBoost = Root->consume();
   EXPECT_THAT(ElementBoost, 6);
   Root->advance();
   EXPECT_EQ(Root->peek(), 5U);
   Root->advanceTo(5);
   EXPECT_EQ(Root->peek(), 5U);
-  ElementBoost = Root->consume(Root->peek());
+  ElementBoost = Root->consume();
   EXPECT_THAT(ElementBoost, 8);
   Root->advanceTo(9000);
   EXPECT_TRUE(Root->reachedEnd());
@@ -265,24 +264,23 @@ TEST(DexIndexIterators, StringRepresenta
 }
 
 TEST(DexIndexIterators, Limit) {
-  const PostingList L0 = {4, 7, 8, 20, 42, 100};
-  const PostingList L1 = {1, 3, 5, 8, 9};
-  const PostingList L2 = {1, 5, 7, 9};
-  const PostingList L3 = {0, 5};
-  const PostingList L4 = {0, 1, 5};
-  const PostingList L5;
-
-  auto DocIterator = create(L0);
-  EXPECT_THAT(consumeIDs(*DocIterator, 42), ElementsAre(4, 7, 8, 20, 42, 100));
-
-  DocIterator = create(L0);
-  EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre(4, 7, 8, 20, 42, 100));
-
-  DocIterator = create(L0);
-  EXPECT_THAT(consumeIDs(*DocIterator, 3), ElementsAre(4, 7, 8));
-
-  DocIterator = create(L0);
-  EXPECT_THAT(consumeIDs(*DocIterator, 0), ElementsAre());
+  const PostingList L0 = {3, 6, 7, 20, 42, 100};
+  const PostingList L1 = {1, 3, 5, 6, 7, 30, 100};
+  const PostingList L2 = {0, 3, 5, 7, 8, 100};
+
+  auto DocIterator = createLimit(create(L0), 42);
+  EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre(3, 6, 7, 20, 42, 100));
+
+  DocIterator = createLimit(create(L0), 3);
+  EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre(3, 6, 7));
+
+  DocIterator = createLimit(create(L0), 0);
+  EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre());
+
+  auto AndIterator =
+      createAnd(createLimit(createTrue(9000), 343), createLimit(create(L0), 2),
+                createLimit(create(L1), 3), createLimit(create(L2), 42));
+  EXPECT_THAT(consumeIDs(*AndIterator), ElementsAre(3, 7));
 }
 
 TEST(DexIndexIterators, True) {
@@ -301,7 +299,7 @@ TEST(DexIndexIterators, True) {
 TEST(DexIndexIterators, Boost) {
   auto BoostIterator = createBoost(createTrue(5U), 42U);
   EXPECT_FALSE(BoostIterator->reachedEnd());
-  auto ElementBoost = BoostIterator->consume(BoostIterator->peek());
+  auto ElementBoost = BoostIterator->consume();
   EXPECT_THAT(ElementBoost, 42U);
 
   const PostingList L0 = {2, 4};
@@ -309,20 +307,20 @@ TEST(DexIndexIterators, Boost) {
   auto Root = createOr(createTrue(5U), createBoost(create(L0), 2U),
                        createBoost(create(L1), 3U));
 
-  ElementBoost = Root->consume(Root->peek());
+  ElementBoost = Root->consume();
   EXPECT_THAT(ElementBoost, Iterator::DEFAULT_BOOST_SCORE);
   Root->advance();
   EXPECT_THAT(Root->peek(), 1U);
-  ElementBoost = Root->consume(Root->peek());
+  ElementBoost = Root->consume();
   EXPECT_THAT(ElementBoost, 3);
 
   Root->advance();
   EXPECT_THAT(Root->peek(), 2U);
-  ElementBoost = Root->consume(Root->peek());
+  ElementBoost = Root->consume();
   EXPECT_THAT(ElementBoost, 2);
 
   Root->advanceTo(4);
-  ElementBoost = Root->consume(Root->peek());
+  ElementBoost = Root->consume();
   EXPECT_THAT(ElementBoost, 3);
 }
 




More information about the cfe-commits mailing list