[clang-tools-extra] r341784 - [clangd] NFC: Rename DexIndex to Dex
Kirill Bobyrev via cfe-commits
cfe-commits at lists.llvm.org
Mon Sep 10 01:23:53 PDT 2018
Author: omtcyfz
Date: Mon Sep 10 01:23:53 2018
New Revision: 341784
URL: http://llvm.org/viewvc/llvm-project?rev=341784&view=rev
Log:
[clangd] NFC: Rename DexIndex to Dex
Also, cleanup some redundant includes.
Reviewed By: sammccall
Differential Revision: https://reviews.llvm.org/D51774
Added:
clang-tools-extra/trunk/clangd/index/dex/Dex.cpp
clang-tools-extra/trunk/clangd/index/dex/Dex.h
clang-tools-extra/trunk/unittests/clangd/DexTests.cpp
Removed:
clang-tools-extra/trunk/clangd/index/dex/DexIndex.cpp
clang-tools-extra/trunk/clangd/index/dex/DexIndex.h
clang-tools-extra/trunk/unittests/clangd/DexIndexTests.cpp
Modified:
clang-tools-extra/trunk/clangd/CMakeLists.txt
clang-tools-extra/trunk/clangd/index/SymbolYAML.cpp
clang-tools-extra/trunk/clangd/tool/ClangdMain.cpp
clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt
clang-tools-extra/trunk/unittests/clangd/TestIndex.h
Modified: clang-tools-extra/trunk/clangd/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/CMakeLists.txt?rev=341784&r1=341783&r2=341784&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/CMakeLists.txt (original)
+++ clang-tools-extra/trunk/clangd/CMakeLists.txt Mon Sep 10 01:23:53 2018
@@ -46,7 +46,7 @@ add_clang_library(clangDaemon
index/SymbolCollector.cpp
index/SymbolYAML.cpp
- index/dex/DexIndex.cpp
+ index/dex/Dex.cpp
index/dex/Iterator.cpp
index/dex/Trigram.cpp
Modified: clang-tools-extra/trunk/clangd/index/SymbolYAML.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/SymbolYAML.cpp?rev=341784&r1=341783&r2=341784&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/index/SymbolYAML.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/SymbolYAML.cpp Mon Sep 10 01:23:53 2018
@@ -11,7 +11,7 @@
#include "Index.h"
#include "Serialization.h"
#include "Trace.h"
-#include "dex/DexIndex.h"
+#include "dex/Dex.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Errc.h"
@@ -225,7 +225,7 @@ std::unique_ptr<SymbolIndex> loadIndex(l
if (!Slab)
return nullptr;
trace::Span Tracer("BuildIndex");
- return UseDex ? dex::DexIndex::build(std::move(*Slab), URISchemes)
+ return UseDex ? dex::Dex::build(std::move(*Slab), URISchemes)
: MemIndex::build(std::move(*Slab), RefSlab());
}
Added: clang-tools-extra/trunk/clangd/index/dex/Dex.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/dex/Dex.cpp?rev=341784&view=auto
==============================================================================
--- clang-tools-extra/trunk/clangd/index/dex/Dex.cpp (added)
+++ clang-tools-extra/trunk/clangd/index/dex/Dex.cpp Mon Sep 10 01:23:53 2018
@@ -0,0 +1,270 @@
+//===--- Dex.cpp - Dex Symbol Index Implementation --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Dex.h"
+#include "FileDistance.h"
+#include "FuzzyMatch.h"
+#include "Logger.h"
+#include "Quality.h"
+#include "llvm/ADT/StringSet.h"
+#include <algorithm>
+#include <queue>
+
+namespace clang {
+namespace clangd {
+namespace dex {
+
+namespace {
+
+// Returns the tokens which are given symbol's characteristics. Currently, the
+// generated tokens only contain fuzzy matching trigrams and symbol's scope,
+// but in the future this will also return path proximity tokens and other
+// types of tokens such as symbol type (if applicable).
+// Returns the tokens which are given symbols's characteristics. For example,
+// trigrams and scopes.
+// FIXME(kbobyrev): Support more token types:
+// * Types
+// * Namespace proximity
+std::vector<Token> generateSearchTokens(const Symbol &Sym) {
+ std::vector<Token> Result = generateIdentifierTrigrams(Sym.Name);
+ Result.emplace_back(Token::Kind::Scope, Sym.Scope);
+ // Skip token generation for symbols with unknown declaration location.
+ if (!Sym.CanonicalDeclaration.FileURI.empty())
+ for (const auto &ProximityURI :
+ generateProximityURIs(Sym.CanonicalDeclaration.FileURI))
+ Result.emplace_back(Token::Kind::ProximityURI, ProximityURI);
+ return Result;
+}
+
+// Constructs BOOST iterators for Path Proximities.
+std::vector<std::unique_ptr<Iterator>> createFileProximityIterators(
+ llvm::ArrayRef<std::string> ProximityPaths,
+ llvm::ArrayRef<std::string> URISchemes,
+ const llvm::DenseMap<Token, PostingList> &InvertedIndex) {
+ std::vector<std::unique_ptr<Iterator>> BoostingIterators;
+ // Deduplicate parent URIs extracted from the ProximityPaths.
+ llvm::StringSet<> ParentURIs;
+ llvm::StringMap<SourceParams> Sources;
+ for (const auto &Path : ProximityPaths) {
+ Sources[Path] = SourceParams();
+ auto PathURI = URI::create(Path, URISchemes);
+ if (!PathURI) {
+ elog("Given ProximityPath {0} is can not be converted to any known URI "
+ "scheme. fuzzyFind request will ignore it.",
+ Path);
+ llvm::consumeError(PathURI.takeError());
+ continue;
+ }
+ const auto PathProximityURIs = generateProximityURIs(PathURI->toString());
+ for (const auto &ProximityURI : PathProximityURIs)
+ ParentURIs.insert(ProximityURI);
+ }
+ // Use SymbolRelevanceSignals for symbol relevance evaluation: use defaults
+ // for all parameters except for Proximity Path distance signal.
+ SymbolRelevanceSignals PathProximitySignals;
+ // DistanceCalculator will find the shortest distance from ProximityPaths to
+ // any URI extracted from the ProximityPaths.
+ URIDistance DistanceCalculator(Sources);
+ PathProximitySignals.FileProximityMatch = &DistanceCalculator;
+ // Try to build BOOST iterator for each Proximity Path provided by
+ // ProximityPaths. Boosting factor should depend on the distance to the
+ // Proximity Path: the closer processed path is, the higher boosting factor.
+ for (const auto &ParentURI : ParentURIs.keys()) {
+ const auto It =
+ InvertedIndex.find(Token(Token::Kind::ProximityURI, ParentURI));
+ if (It != InvertedIndex.end()) {
+ // FIXME(kbobyrev): Append LIMIT on top of every BOOST iterator.
+ PathProximitySignals.SymbolURI = ParentURI;
+ BoostingIterators.push_back(
+ createBoost(create(It->second), PathProximitySignals.evaluate()));
+ }
+ }
+ return BoostingIterators;
+}
+
+} // namespace
+
+void Dex::buildIndex() {
+ std::vector<std::pair<float, const Symbol *>> ScoredSymbols(Symbols.size());
+
+ for (size_t I = 0; I < Symbols.size(); ++I) {
+ const Symbol *Sym = Symbols[I];
+ LookupTable[Sym->ID] = Sym;
+ ScoredSymbols[I] = {quality(*Sym), Sym};
+ }
+
+ // Symbols are sorted by symbol qualities so that items in the posting lists
+ // are stored in the descending order of symbol quality.
+ std::sort(begin(ScoredSymbols), end(ScoredSymbols),
+ std::greater<std::pair<float, const Symbol *>>());
+
+ // SymbolQuality was empty up until now.
+ SymbolQuality.resize(Symbols.size());
+ // Populate internal storage using Symbol + Score pairs.
+ for (size_t I = 0; I < ScoredSymbols.size(); ++I) {
+ SymbolQuality[I] = ScoredSymbols[I].first;
+ Symbols[I] = ScoredSymbols[I].second;
+ }
+
+ // Populate TempInvertedIndex with posting lists for index symbols.
+ for (DocID SymbolRank = 0; SymbolRank < Symbols.size(); ++SymbolRank) {
+ const auto *Sym = Symbols[SymbolRank];
+ for (const auto &Token : generateSearchTokens(*Sym))
+ InvertedIndex[Token].push_back(SymbolRank);
+ }
+
+ vlog("Built Dex with estimated memory usage {0} bytes.",
+ estimateMemoryUsage());
+}
+
+/// Constructs iterators over tokens extracted from the query and exhausts it
+/// while applying Callback to each symbol in the order of decreasing quality
+/// of the matched symbols.
+bool Dex::fuzzyFind(const FuzzyFindRequest &Req,
+ llvm::function_ref<void(const Symbol &)> Callback) const {
+ assert(!StringRef(Req.Query).contains("::") &&
+ "There must be no :: in query.");
+ FuzzyMatcher Filter(Req.Query);
+ bool More = false;
+
+ std::vector<std::unique_ptr<Iterator>> TopLevelChildren;
+ const auto TrigramTokens = generateIdentifierTrigrams(Req.Query);
+
+ // Generate query trigrams and construct AND iterator over all query
+ // trigrams.
+ std::vector<std::unique_ptr<Iterator>> TrigramIterators;
+ for (const auto &Trigram : TrigramTokens) {
+ const auto It = InvertedIndex.find(Trigram);
+ if (It != InvertedIndex.end())
+ TrigramIterators.push_back(create(It->second));
+ }
+ if (!TrigramIterators.empty())
+ TopLevelChildren.push_back(createAnd(move(TrigramIterators)));
+
+ // Generate scope tokens for search query.
+ std::vector<std::unique_ptr<Iterator>> ScopeIterators;
+ for (const auto &Scope : Req.Scopes) {
+ const auto It = InvertedIndex.find(Token(Token::Kind::Scope, Scope));
+ if (It != InvertedIndex.end())
+ ScopeIterators.push_back(create(It->second));
+ }
+ // Add OR iterator for scopes if there are any Scope Iterators.
+ if (!ScopeIterators.empty())
+ TopLevelChildren.push_back(createOr(move(ScopeIterators)));
+
+ // Add proximity paths boosting.
+ auto BoostingIterators = createFileProximityIterators(
+ Req.ProximityPaths, URISchemes, InvertedIndex);
+ // Boosting iterators do not actually filter symbols. In order to preserve
+ // the validity of resulting query, TRUE iterator should be added along
+ // BOOSTs.
+ if (!BoostingIterators.empty()) {
+ BoostingIterators.push_back(createTrue(Symbols.size()));
+ TopLevelChildren.push_back(createOr(move(BoostingIterators)));
+ }
+
+ // Use TRUE iterator if both trigrams and scopes from the query are not
+ // present in the symbol index.
+ auto QueryIterator = TopLevelChildren.empty()
+ ? createTrue(Symbols.size())
+ : createAnd(move(TopLevelChildren));
+ // Retrieve more items than it was requested: some of the items with high
+ // final score might not be retrieved otherwise.
+ // FIXME(kbobyrev): Pre-scoring retrieval threshold should be adjusted as
+ // using 100x of the requested number might not be good in practice, e.g.
+ // when the requested number of items is small.
+ const size_t ItemsToRetrieve = 100 * Req.MaxCandidateCount;
+ auto Root = createLimit(move(QueryIterator), ItemsToRetrieve);
+
+ using IDAndScore = std::pair<DocID, float>;
+ std::vector<IDAndScore> IDAndScores = consume(*Root);
+
+ auto Compare = [](const IDAndScore &LHS, const IDAndScore &RHS) {
+ return LHS.second > RHS.second;
+ };
+ TopN<IDAndScore, decltype(Compare)> Top(Req.MaxCandidateCount, Compare);
+ for (const auto &IDAndScore : IDAndScores) {
+ const DocID SymbolDocID = IDAndScore.first;
+ const auto *Sym = Symbols[SymbolDocID];
+ const llvm::Optional<float> Score = Filter.match(Sym->Name);
+ if (!Score)
+ continue;
+ // Combine Fuzzy Matching score, precomputed symbol quality and boosting
+ // score for a cumulative final symbol score.
+ const float FinalScore =
+ (*Score) * SymbolQuality[SymbolDocID] * IDAndScore.second;
+ // If Top.push(...) returns true, it means that it had to pop an item. In
+ // this case, it is possible to retrieve more symbols.
+ if (Top.push({SymbolDocID, FinalScore}))
+ More = true;
+ }
+
+ // Apply callback to the top Req.MaxCandidateCount items in the descending
+ // order of cumulative score.
+ for (const auto &Item : std::move(Top).items())
+ Callback(*Symbols[Item.first]);
+ return More;
+}
+
+void Dex::lookup(const LookupRequest &Req,
+ llvm::function_ref<void(const Symbol &)> Callback) const {
+ for (const auto &ID : Req.IDs) {
+ auto I = LookupTable.find(ID);
+ if (I != LookupTable.end())
+ Callback(*I->second);
+ }
+}
+
+void Dex::refs(const RefsRequest &Req,
+ llvm::function_ref<void(const Ref &)> Callback) const {
+ log("refs is not implemented.");
+}
+
+size_t Dex::estimateMemoryUsage() const {
+ size_t Bytes =
+ LookupTable.size() * sizeof(std::pair<SymbolID, const Symbol *>);
+ Bytes += SymbolQuality.size() * sizeof(std::pair<const Symbol *, float>);
+ Bytes += InvertedIndex.size() * sizeof(Token);
+
+ for (const auto &P : InvertedIndex) {
+ Bytes += P.second.size() * sizeof(DocID);
+ }
+ return Bytes;
+}
+
+std::vector<std::string> generateProximityURIs(llvm::StringRef URIPath) {
+ std::vector<std::string> Result;
+ auto ParsedURI = URI::parse(URIPath);
+ assert(ParsedURI &&
+ "Non-empty argument of generateProximityURIs() should be a valid "
+ "URI.");
+ StringRef Body = ParsedURI->body();
+ // FIXME(kbobyrev): Currently, this is a heuristic which defines the maximum
+ // size of resulting vector. Some projects might want to have higher limit if
+ // the file hierarchy is deeper. For the generic case, it would be useful to
+ // calculate Limit in the index build stage by calculating the maximum depth
+ // of the project source tree at runtime.
+ size_t Limit = 5;
+ // Insert original URI before the loop: this would save a redundant iteration
+ // with a URI parse.
+ Result.emplace_back(ParsedURI->toString());
+ while (!Body.empty() && --Limit > 0) {
+ // FIXME(kbobyrev): Parsing and encoding path to URIs is not necessary and
+ // could be optimized.
+ Body = llvm::sys::path::parent_path(Body, llvm::sys::path::Style::posix);
+ URI TokenURI(ParsedURI->scheme(), ParsedURI->authority(), Body);
+ if (!Body.empty())
+ Result.emplace_back(TokenURI.toString());
+ }
+ return Result;
+}
+
+} // namespace dex
+} // namespace clangd
+} // namespace clang
Added: clang-tools-extra/trunk/clangd/index/dex/Dex.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/dex/Dex.h?rev=341784&view=auto
==============================================================================
--- clang-tools-extra/trunk/clangd/index/dex/Dex.h (added)
+++ clang-tools-extra/trunk/clangd/index/dex/Dex.h Mon Sep 10 01:23:53 2018
@@ -0,0 +1,113 @@
+//===--- Dex.h - Dex Symbol Index Implementation ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines Dex - a symbol index implementation based on query iterators
+// over symbol tokens, such as fuzzy matching trigrams, scopes, types, etc.
+// While consuming more memory and having longer build stage due to
+// preprocessing, Dex will have substantially lower latency. It will also allow
+// efficient symbol searching which is crucial for operations like code
+// completion, and can be very important for a number of different code
+// transformations which will be eventually supported by Clangd.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
+
+#include "Iterator.h"
+#include "Token.h"
+#include "Trigram.h"
+#include "index/Index.h"
+#include "index/MemIndex.h"
+#include "index/SymbolCollector.h"
+
+namespace clang {
+namespace clangd {
+namespace dex {
+
+/// In-memory Dex trigram-based index implementation.
+// FIXME(kbobyrev): Introduce serialization and deserialization of the symbol
+// index so that it can be loaded from the disk. Since static index is not
+// changed frequently, it's safe to assume that it has to be built only once
+// (when the clangd process starts). Therefore, it can be easier to store built
+// index on disk and then load it if available.
+class Dex : public SymbolIndex {
+public:
+ // All symbols must outlive this index.
+ template <typename Range>
+ Dex(Range &&Symbols, llvm::ArrayRef<std::string> Schemes)
+ : URISchemes(Schemes) {
+ // If Schemes don't contain any items, fall back to SymbolCollector's
+ // default URI schemes.
+ if (URISchemes.empty()) {
+ SymbolCollector::Options Opts;
+ URISchemes = Opts.URISchemes;
+ }
+ for (auto &&Sym : Symbols)
+ this->Symbols.push_back(&Sym);
+ buildIndex();
+ }
+ // Symbols are owned by BackingData, Index takes ownership.
+ template <typename Range, typename Payload>
+ Dex(Range &&Symbols, Payload &&BackingData,
+ llvm::ArrayRef<std::string> URISchemes)
+ : Dex(std::forward<Range>(Symbols), URISchemes) {
+ KeepAlive = std::shared_ptr<void>(
+ std::make_shared<Payload>(std::move(BackingData)), nullptr);
+ }
+
+ /// Builds an index from a slab. The index takes ownership of the slab.
+ static std::unique_ptr<SymbolIndex>
+ build(SymbolSlab Slab, llvm::ArrayRef<std::string> URISchemes) {
+ return llvm::make_unique<Dex>(Slab, std::move(Slab), URISchemes);
+ }
+
+ bool
+ fuzzyFind(const FuzzyFindRequest &Req,
+ llvm::function_ref<void(const Symbol &)> Callback) const override;
+
+ void lookup(const LookupRequest &Req,
+ llvm::function_ref<void(const Symbol &)> Callback) const override;
+
+ void refs(const RefsRequest &Req,
+ llvm::function_ref<void(const Ref &)> Callback) const override;
+
+ size_t estimateMemoryUsage() const override;
+
+private:
+ void buildIndex();
+
+ /// Stores symbols sorted in the descending order of symbol quality..
+ std::vector<const Symbol *> Symbols;
+ /// SymbolQuality[I] is the quality of Symbols[I].
+ std::vector<float> SymbolQuality;
+ llvm::DenseMap<SymbolID, const Symbol *> LookupTable;
+ /// Inverted index is a mapping from the search token to the posting list,
+ /// which contains all items which can be characterized by such search token.
+ /// For example, if the search token is scope "std::", the corresponding
+ /// posting list would contain all indices of symbols defined in namespace
+ /// std. Inverted index is used to retrieve posting lists which are processed
+ /// during the fuzzyFind process.
+ llvm::DenseMap<Token, PostingList> InvertedIndex;
+ std::shared_ptr<void> KeepAlive; // poor man's move-only std::any
+
+ std::vector<std::string> URISchemes;
+};
+
+/// Returns Search Token for a number of parent directories of given Path.
+/// Should be used within the index build process.
+///
+/// This function is exposed for testing only.
+std::vector<std::string> generateProximityURIs(llvm::StringRef URIPath);
+
+} // namespace dex
+} // namespace clangd
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
Removed: clang-tools-extra/trunk/clangd/index/dex/DexIndex.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/dex/DexIndex.cpp?rev=341783&view=auto
==============================================================================
--- clang-tools-extra/trunk/clangd/index/dex/DexIndex.cpp (original)
+++ clang-tools-extra/trunk/clangd/index/dex/DexIndex.cpp (removed)
@@ -1,271 +0,0 @@
-//===--- DexIndex.cpp - Dex Symbol Index Implementation ---------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "DexIndex.h"
-#include "FileDistance.h"
-#include "FuzzyMatch.h"
-#include "Logger.h"
-#include "Quality.h"
-#include "llvm/ADT/StringSet.h"
-#include <algorithm>
-#include <queue>
-
-namespace clang {
-namespace clangd {
-namespace dex {
-
-namespace {
-
-// Returns the tokens which are given symbol's characteristics. Currently, the
-// generated tokens only contain fuzzy matching trigrams and symbol's scope,
-// but in the future this will also return path proximity tokens and other
-// types of tokens such as symbol type (if applicable).
-// Returns the tokens which are given symbols's characteristics. For example,
-// trigrams and scopes.
-// FIXME(kbobyrev): Support more token types:
-// * Types
-// * Namespace proximity
-std::vector<Token> generateSearchTokens(const Symbol &Sym) {
- std::vector<Token> Result = generateIdentifierTrigrams(Sym.Name);
- Result.emplace_back(Token::Kind::Scope, Sym.Scope);
- // Skip token generation for symbols with unknown declaration location.
- if (!Sym.CanonicalDeclaration.FileURI.empty())
- for (const auto &ProximityURI :
- generateProximityURIs(Sym.CanonicalDeclaration.FileURI))
- Result.emplace_back(Token::Kind::ProximityURI, ProximityURI);
- return Result;
-}
-
-// Constructs BOOST iterators for Path Proximities.
-std::vector<std::unique_ptr<Iterator>> createFileProximityIterators(
- llvm::ArrayRef<std::string> ProximityPaths,
- llvm::ArrayRef<std::string> URISchemes,
- const llvm::DenseMap<Token, PostingList> &InvertedIndex) {
- std::vector<std::unique_ptr<Iterator>> BoostingIterators;
- // Deduplicate parent URIs extracted from the ProximityPaths.
- llvm::StringSet<> ParentURIs;
- llvm::StringMap<SourceParams> Sources;
- for (const auto &Path : ProximityPaths) {
- Sources[Path] = SourceParams();
- auto PathURI = URI::create(Path, URISchemes);
- if (!PathURI) {
- elog("Given ProximityPath {0} is can not be converted to any known URI "
- "scheme. fuzzyFind request will ignore it.",
- Path);
- llvm::consumeError(PathURI.takeError());
- continue;
- }
- const auto PathProximityURIs = generateProximityURIs(PathURI->toString());
- for (const auto &ProximityURI : PathProximityURIs)
- ParentURIs.insert(ProximityURI);
- }
- // Use SymbolRelevanceSignals for symbol relevance evaluation: use defaults
- // for all parameters except for Proximity Path distance signal.
- SymbolRelevanceSignals PathProximitySignals;
- // DistanceCalculator will find the shortest distance from ProximityPaths to
- // any URI extracted from the ProximityPaths.
- URIDistance DistanceCalculator(Sources);
- PathProximitySignals.FileProximityMatch = &DistanceCalculator;
- // Try to build BOOST iterator for each Proximity Path provided by
- // ProximityPaths. Boosting factor should depend on the distance to the
- // Proximity Path: the closer processed path is, the higher boosting factor.
- for (const auto &ParentURI : ParentURIs.keys()) {
- const auto It =
- InvertedIndex.find(Token(Token::Kind::ProximityURI, ParentURI));
- if (It != InvertedIndex.end()) {
- // FIXME(kbobyrev): Append LIMIT on top of every BOOST iterator.
- PathProximitySignals.SymbolURI = ParentURI;
- BoostingIterators.push_back(
- createBoost(create(It->second), PathProximitySignals.evaluate()));
- }
- }
- return BoostingIterators;
-}
-
-} // namespace
-
-void DexIndex::buildIndex() {
- std::vector<std::pair<float, const Symbol *>> ScoredSymbols(Symbols.size());
-
- for (size_t I = 0; I < Symbols.size(); ++I) {
- const Symbol *Sym = Symbols[I];
- LookupTable[Sym->ID] = Sym;
- ScoredSymbols[I] = {quality(*Sym), Sym};
- }
-
- // Symbols are sorted by symbol qualities so that items in the posting lists
- // are stored in the descending order of symbol quality.
- std::sort(begin(ScoredSymbols), end(ScoredSymbols),
- std::greater<std::pair<float, const Symbol *>>());
-
- // SymbolQuality was empty up until now.
- SymbolQuality.resize(Symbols.size());
- // Populate internal storage using Symbol + Score pairs.
- for (size_t I = 0; I < ScoredSymbols.size(); ++I) {
- SymbolQuality[I] = ScoredSymbols[I].first;
- Symbols[I] = ScoredSymbols[I].second;
- }
-
- // Populate TempInvertedIndex with posting lists for index symbols.
- for (DocID SymbolRank = 0; SymbolRank < Symbols.size(); ++SymbolRank) {
- const auto *Sym = Symbols[SymbolRank];
- for (const auto &Token : generateSearchTokens(*Sym))
- InvertedIndex[Token].push_back(SymbolRank);
- }
-
- vlog("Built DexIndex with estimated memory usage {0} bytes.",
- estimateMemoryUsage());
-}
-
-/// Constructs iterators over tokens extracted from the query and exhausts it
-/// while applying Callback to each symbol in the order of decreasing quality
-/// of the matched symbols.
-bool DexIndex::fuzzyFind(
- const FuzzyFindRequest &Req,
- llvm::function_ref<void(const Symbol &)> Callback) const {
- assert(!StringRef(Req.Query).contains("::") &&
- "There must be no :: in query.");
- FuzzyMatcher Filter(Req.Query);
- bool More = false;
-
- std::vector<std::unique_ptr<Iterator>> TopLevelChildren;
- const auto TrigramTokens = generateIdentifierTrigrams(Req.Query);
-
- // Generate query trigrams and construct AND iterator over all query
- // trigrams.
- std::vector<std::unique_ptr<Iterator>> TrigramIterators;
- for (const auto &Trigram : TrigramTokens) {
- const auto It = InvertedIndex.find(Trigram);
- if (It != InvertedIndex.end())
- TrigramIterators.push_back(create(It->second));
- }
- if (!TrigramIterators.empty())
- TopLevelChildren.push_back(createAnd(move(TrigramIterators)));
-
- // Generate scope tokens for search query.
- std::vector<std::unique_ptr<Iterator>> ScopeIterators;
- for (const auto &Scope : Req.Scopes) {
- const auto It = InvertedIndex.find(Token(Token::Kind::Scope, Scope));
- if (It != InvertedIndex.end())
- ScopeIterators.push_back(create(It->second));
- }
- // Add OR iterator for scopes if there are any Scope Iterators.
- if (!ScopeIterators.empty())
- TopLevelChildren.push_back(createOr(move(ScopeIterators)));
-
- // Add proximity paths boosting.
- auto BoostingIterators = createFileProximityIterators(
- Req.ProximityPaths, URISchemes, InvertedIndex);
- // Boosting iterators do not actually filter symbols. In order to preserve
- // the validity of resulting query, TRUE iterator should be added along
- // BOOSTs.
- if (!BoostingIterators.empty()) {
- BoostingIterators.push_back(createTrue(Symbols.size()));
- TopLevelChildren.push_back(createOr(move(BoostingIterators)));
- }
-
- // Use TRUE iterator if both trigrams and scopes from the query are not
- // present in the symbol index.
- auto QueryIterator = TopLevelChildren.empty()
- ? createTrue(Symbols.size())
- : createAnd(move(TopLevelChildren));
- // Retrieve more items than it was requested: some of the items with high
- // final score might not be retrieved otherwise.
- // FIXME(kbobyrev): Pre-scoring retrieval threshold should be adjusted as
- // using 100x of the requested number might not be good in practice, e.g.
- // when the requested number of items is small.
- const size_t ItemsToRetrieve = 100 * Req.MaxCandidateCount;
- auto Root = createLimit(move(QueryIterator), ItemsToRetrieve);
-
- using IDAndScore = std::pair<DocID, float>;
- std::vector<IDAndScore> IDAndScores = consume(*Root);
-
- auto Compare = [](const IDAndScore &LHS, const IDAndScore &RHS) {
- return LHS.second > RHS.second;
- };
- TopN<IDAndScore, decltype(Compare)> Top(Req.MaxCandidateCount, Compare);
- for (const auto &IDAndScore : IDAndScores) {
- const DocID SymbolDocID = IDAndScore.first;
- const auto *Sym = Symbols[SymbolDocID];
- const llvm::Optional<float> Score = Filter.match(Sym->Name);
- if (!Score)
- continue;
- // Combine Fuzzy Matching score, precomputed symbol quality and boosting
- // score for a cumulative final symbol score.
- const float FinalScore =
- (*Score) * SymbolQuality[SymbolDocID] * IDAndScore.second;
- // If Top.push(...) returns true, it means that it had to pop an item. In
- // this case, it is possible to retrieve more symbols.
- if (Top.push({SymbolDocID, FinalScore}))
- More = true;
- }
-
- // Apply callback to the top Req.MaxCandidateCount items in the descending
- // order of cumulative score.
- for (const auto &Item : std::move(Top).items())
- Callback(*Symbols[Item.first]);
- return More;
-}
-
-void DexIndex::lookup(const LookupRequest &Req,
- llvm::function_ref<void(const Symbol &)> Callback) const {
- for (const auto &ID : Req.IDs) {
- auto I = LookupTable.find(ID);
- if (I != LookupTable.end())
- Callback(*I->second);
- }
-}
-
-void DexIndex::refs(const RefsRequest &Req,
- llvm::function_ref<void(const Ref &)> Callback) const {
- log("refs is not implemented.");
-}
-
-size_t DexIndex::estimateMemoryUsage() const {
- size_t Bytes =
- LookupTable.size() * sizeof(std::pair<SymbolID, const Symbol *>);
- Bytes += SymbolQuality.size() * sizeof(std::pair<const Symbol *, float>);
- Bytes += InvertedIndex.size() * sizeof(Token);
-
- for (const auto &P : InvertedIndex) {
- Bytes += P.second.size() * sizeof(DocID);
- }
- return Bytes;
-}
-
-std::vector<std::string> generateProximityURIs(llvm::StringRef URIPath) {
- std::vector<std::string> Result;
- auto ParsedURI = URI::parse(URIPath);
- assert(ParsedURI &&
- "Non-empty argument of generateProximityURIs() should be a valid "
- "URI.");
- StringRef Body = ParsedURI->body();
- // FIXME(kbobyrev): Currently, this is a heuristic which defines the maximum
- // size of resulting vector. Some projects might want to have higher limit if
- // the file hierarchy is deeper. For the generic case, it would be useful to
- // calculate Limit in the index build stage by calculating the maximum depth
- // of the project source tree at runtime.
- size_t Limit = 5;
- // Insert original URI before the loop: this would save a redundant iteration
- // with a URI parse.
- Result.emplace_back(ParsedURI->toString());
- while (!Body.empty() && --Limit > 0) {
- // FIXME(kbobyrev): Parsing and encoding path to URIs is not necessary and
- // could be optimized.
- Body = llvm::sys::path::parent_path(Body, llvm::sys::path::Style::posix);
- URI TokenURI(ParsedURI->scheme(), ParsedURI->authority(), Body);
- if (!Body.empty())
- Result.emplace_back(TokenURI.toString());
- }
- return Result;
-}
-
-} // namespace dex
-} // namespace clangd
-} // namespace clang
Removed: clang-tools-extra/trunk/clangd/index/dex/DexIndex.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/index/dex/DexIndex.h?rev=341783&view=auto
==============================================================================
--- clang-tools-extra/trunk/clangd/index/dex/DexIndex.h (original)
+++ clang-tools-extra/trunk/clangd/index/dex/DexIndex.h (removed)
@@ -1,113 +0,0 @@
-//===--- DexIndex.h - Dex Symbol Index Implementation -----------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This defines Dex - a symbol index implementation based on query iterators
-// over symbol tokens, such as fuzzy matching trigrams, scopes, types, etc.
-// While consuming more memory and having longer build stage due to
-// preprocessing, Dex will have substantially lower latency. It will also allow
-// efficient symbol searching which is crucial for operations like code
-// completion, and can be very important for a number of different code
-// transformations which will be eventually supported by Clangd.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEXINDEX_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEXINDEX_H
-
-#include "Iterator.h"
-#include "Token.h"
-#include "Trigram.h"
-#include "index/Index.h"
-#include "index/MemIndex.h"
-#include "index/SymbolCollector.h"
-
-namespace clang {
-namespace clangd {
-namespace dex {
-
-/// In-memory Dex trigram-based index implementation.
-// FIXME(kbobyrev): Introduce serialization and deserialization of the symbol
-// index so that it can be loaded from the disk. Since static index is not
-// changed frequently, it's safe to assume that it has to be built only once
-// (when the clangd process starts). Therefore, it can be easier to store built
-// index on disk and then load it if available.
-class DexIndex : public SymbolIndex {
-public:
- // All symbols must outlive this index.
- template <typename Range>
- DexIndex(Range &&Symbols, llvm::ArrayRef<std::string> Schemes)
- : URISchemes(Schemes) {
- // If Schemes don't contain any items, fall back to SymbolCollector's
- // default URI schemes.
- if (URISchemes.empty()) {
- SymbolCollector::Options Opts;
- URISchemes = Opts.URISchemes;
- }
- for (auto &&Sym : Symbols)
- this->Symbols.push_back(&Sym);
- buildIndex();
- }
- // Symbols are owned by BackingData, Index takes ownership.
- template <typename Range, typename Payload>
- DexIndex(Range &&Symbols, Payload &&BackingData,
- llvm::ArrayRef<std::string> URISchemes)
- : DexIndex(std::forward<Range>(Symbols), URISchemes) {
- KeepAlive = std::shared_ptr<void>(
- std::make_shared<Payload>(std::move(BackingData)), nullptr);
- }
-
- /// Builds an index from a slab. The index takes ownership of the slab.
- static std::unique_ptr<SymbolIndex>
- build(SymbolSlab Slab, llvm::ArrayRef<std::string> URISchemes) {
- return llvm::make_unique<DexIndex>(Slab, std::move(Slab), URISchemes);
- }
-
- bool
- fuzzyFind(const FuzzyFindRequest &Req,
- llvm::function_ref<void(const Symbol &)> Callback) const override;
-
- void lookup(const LookupRequest &Req,
- llvm::function_ref<void(const Symbol &)> Callback) const override;
-
- void refs(const RefsRequest &Req,
- llvm::function_ref<void(const Ref &)> Callback) const override;
-
- size_t estimateMemoryUsage() const override;
-
-private:
- void buildIndex();
-
- /// Stores symbols sorted in the descending order of symbol quality..
- std::vector<const Symbol *> Symbols;
- /// SymbolQuality[I] is the quality of Symbols[I].
- std::vector<float> SymbolQuality;
- llvm::DenseMap<SymbolID, const Symbol *> LookupTable;
- /// Inverted index is a mapping from the search token to the posting list,
- /// which contains all items which can be characterized by such search token.
- /// For example, if the search token is scope "std::", the corresponding
- /// posting list would contain all indices of symbols defined in namespace
- /// std. Inverted index is used to retrieve posting lists which are processed
- /// during the fuzzyFind process.
- llvm::DenseMap<Token, PostingList> InvertedIndex;
- std::shared_ptr<void> KeepAlive; // poor man's move-only std::any
-
- std::vector<std::string> URISchemes;
-};
-
-/// Returns Search Token for a number of parent directories of given Path.
-/// Should be used within the index build process.
-///
-/// This function is exposed for testing only.
-std::vector<std::string> generateProximityURIs(llvm::StringRef URIPath);
-
-} // namespace dex
-} // namespace clangd
-} // namespace clang
-
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEXINDEX_H
Modified: clang-tools-extra/trunk/clangd/tool/ClangdMain.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/tool/ClangdMain.cpp?rev=341784&r1=341783&r2=341784&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/tool/ClangdMain.cpp (original)
+++ clang-tools-extra/trunk/clangd/tool/ClangdMain.cpp Mon Sep 10 01:23:53 2018
@@ -13,7 +13,6 @@
#include "RIFF.h"
#include "Trace.h"
#include "index/SymbolYAML.h"
-#include "index/dex/DexIndex.h"
#include "clang/Basic/Version.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
Modified: clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt?rev=341784&r1=341783&r2=341784&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt (original)
+++ clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt Mon Sep 10 01:23:53 2018
@@ -16,7 +16,7 @@ add_extra_unittest(ClangdTests
CodeCompleteTests.cpp
CodeCompletionStringsTests.cpp
ContextTests.cpp
- DexIndexTests.cpp
+ DexTests.cpp
DraftStoreTests.cpp
FileDistanceTests.cpp
FileIndexTests.cpp
Removed: clang-tools-extra/trunk/unittests/clangd/DexIndexTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/DexIndexTests.cpp?rev=341783&view=auto
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/DexIndexTests.cpp (original)
+++ clang-tools-extra/trunk/unittests/clangd/DexIndexTests.cpp (removed)
@@ -1,615 +0,0 @@
-//===-- DexIndexTests.cpp ----------------------------*- C++ -*-----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "FuzzyMatch.h"
-#include "TestFS.h"
-#include "TestIndex.h"
-#include "index/Index.h"
-#include "index/Merge.h"
-#include "index/dex/DexIndex.h"
-#include "index/dex/Iterator.h"
-#include "index/dex/Token.h"
-#include "index/dex/Trigram.h"
-#include "llvm/Support/ScopedPrinter.h"
-#include "llvm/Support/raw_ostream.h"
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-#include <string>
-#include <vector>
-
-using ::testing::ElementsAre;
-using ::testing::UnorderedElementsAre;
-using namespace llvm;
-
-namespace clang {
-namespace clangd {
-namespace dex {
-namespace {
-
-std::vector<std::string> URISchemes = {"unittest"};
-
-//===----------------------------------------------------------------------===//
-// Query iterator tests.
-//===----------------------------------------------------------------------===//
-
-std::vector<DocID> consumeIDs(Iterator &It) {
- auto IDAndScore = consume(It);
- std::vector<DocID> IDs(IDAndScore.size());
- for (size_t I = 0; I < IDAndScore.size(); ++I)
- IDs[I] = IDAndScore[I].first;
- return IDs;
-}
-
-TEST(DexIndexIterators, DocumentIterator) {
- const PostingList L = {4, 7, 8, 20, 42, 100};
- auto DocIterator = create(L);
-
- EXPECT_EQ(DocIterator->peek(), 4U);
- EXPECT_FALSE(DocIterator->reachedEnd());
-
- DocIterator->advance();
- EXPECT_EQ(DocIterator->peek(), 7U);
- EXPECT_FALSE(DocIterator->reachedEnd());
-
- DocIterator->advanceTo(20);
- EXPECT_EQ(DocIterator->peek(), 20U);
- EXPECT_FALSE(DocIterator->reachedEnd());
-
- DocIterator->advanceTo(65);
- EXPECT_EQ(DocIterator->peek(), 100U);
- EXPECT_FALSE(DocIterator->reachedEnd());
-
- DocIterator->advanceTo(420);
- EXPECT_TRUE(DocIterator->reachedEnd());
-}
-
-TEST(DexIndexIterators, AndWithEmpty) {
- const PostingList L0;
- const PostingList L1 = {0, 5, 7, 10, 42, 320, 9000};
-
- auto AndEmpty = createAnd(create(L0));
- EXPECT_TRUE(AndEmpty->reachedEnd());
-
- auto AndWithEmpty = createAnd(create(L0), create(L1));
- EXPECT_TRUE(AndWithEmpty->reachedEnd());
-
- EXPECT_THAT(consumeIDs(*AndWithEmpty), ElementsAre());
-}
-
-TEST(DexIndexIterators, AndTwoLists) {
- const PostingList L0 = {0, 5, 7, 10, 42, 320, 9000};
- const PostingList L1 = {0, 4, 7, 10, 30, 60, 320, 9000};
-
- auto And = createAnd(create(L1), create(L0));
-
- EXPECT_FALSE(And->reachedEnd());
- EXPECT_THAT(consumeIDs(*And), ElementsAre(0U, 7U, 10U, 320U, 9000U));
-
- And = createAnd(create(L0), create(L1));
-
- And->advanceTo(0);
- EXPECT_EQ(And->peek(), 0U);
- And->advanceTo(5);
- EXPECT_EQ(And->peek(), 7U);
- And->advanceTo(10);
- EXPECT_EQ(And->peek(), 10U);
- And->advanceTo(42);
- EXPECT_EQ(And->peek(), 320U);
- And->advanceTo(8999);
- EXPECT_EQ(And->peek(), 9000U);
- And->advanceTo(9001);
-}
-
-TEST(DexIndexIterators, AndThreeLists) {
- const PostingList L0 = {0, 5, 7, 10, 42, 320, 9000};
- const PostingList L1 = {0, 4, 7, 10, 30, 60, 320, 9000};
- const PostingList L2 = {1, 4, 7, 11, 30, 60, 320, 9000};
-
- auto And = createAnd(create(L0), create(L1), create(L2));
- EXPECT_EQ(And->peek(), 7U);
- And->advanceTo(300);
- EXPECT_EQ(And->peek(), 320U);
- And->advanceTo(100000);
-
- EXPECT_TRUE(And->reachedEnd());
-}
-
-TEST(DexIndexIterators, OrWithEmpty) {
- const PostingList L0;
- const PostingList L1 = {0, 5, 7, 10, 42, 320, 9000};
-
- auto OrEmpty = createOr(create(L0));
- EXPECT_TRUE(OrEmpty->reachedEnd());
-
- auto OrWithEmpty = createOr(create(L0), create(L1));
- EXPECT_FALSE(OrWithEmpty->reachedEnd());
-
- EXPECT_THAT(consumeIDs(*OrWithEmpty),
- ElementsAre(0U, 5U, 7U, 10U, 42U, 320U, 9000U));
-}
-
-TEST(DexIndexIterators, OrTwoLists) {
- const PostingList L0 = {0, 5, 7, 10, 42, 320, 9000};
- const PostingList L1 = {0, 4, 7, 10, 30, 60, 320, 9000};
-
- auto Or = createOr(create(L0), create(L1));
-
- EXPECT_FALSE(Or->reachedEnd());
- EXPECT_EQ(Or->peek(), 0U);
- Or->advance();
- EXPECT_EQ(Or->peek(), 4U);
- Or->advance();
- EXPECT_EQ(Or->peek(), 5U);
- Or->advance();
- EXPECT_EQ(Or->peek(), 7U);
- Or->advance();
- EXPECT_EQ(Or->peek(), 10U);
- Or->advance();
- EXPECT_EQ(Or->peek(), 30U);
- Or->advanceTo(42);
- EXPECT_EQ(Or->peek(), 42U);
- Or->advanceTo(300);
- EXPECT_EQ(Or->peek(), 320U);
- Or->advanceTo(9000);
- EXPECT_EQ(Or->peek(), 9000U);
- Or->advanceTo(9001);
- EXPECT_TRUE(Or->reachedEnd());
-
- Or = createOr(create(L0), create(L1));
-
- EXPECT_THAT(consumeIDs(*Or),
- ElementsAre(0U, 4U, 5U, 7U, 10U, 30U, 42U, 60U, 320U, 9000U));
-}
-
-TEST(DexIndexIterators, OrThreeLists) {
- const PostingList L0 = {0, 5, 7, 10, 42, 320, 9000};
- const PostingList L1 = {0, 4, 7, 10, 30, 60, 320, 9000};
- const PostingList L2 = {1, 4, 7, 11, 30, 60, 320, 9000};
-
- auto Or = createOr(create(L0), create(L1), create(L2));
-
- EXPECT_FALSE(Or->reachedEnd());
- EXPECT_EQ(Or->peek(), 0U);
-
- Or->advance();
- EXPECT_EQ(Or->peek(), 1U);
-
- Or->advance();
- EXPECT_EQ(Or->peek(), 4U);
-
- Or->advanceTo(7);
-
- Or->advanceTo(59);
- EXPECT_EQ(Or->peek(), 60U);
-
- Or->advanceTo(9001);
- EXPECT_TRUE(Or->reachedEnd());
-}
-
-// FIXME(kbobyrev): The testcase below is similar to what is expected in real
-// queries. It should be updated once new iterators (such as boosting, limiting,
-// etc iterators) appear. However, it is not exhaustive and it would be
-// beneficial to implement automatic generation (e.g. fuzzing) of query trees
-// for more comprehensive testing.
-TEST(DexIndexIterators, QueryTree) {
- //
- // +-----------------+
- // |And Iterator:1, 5|
- // +--------+--------+
- // |
- // |
- // +-------------+----------------------+
- // | |
- // | |
- // +----------v----------+ +----------v------------+
- // |And Iterator: 1, 5, 9| |Or Iterator: 0, 1, 3, 5|
- // +----------+----------+ +----------+------------+
- // | |
- // +------+-----+ +---------------------+
- // | | | | |
- // +-------v-----+ +----+---+ +--v--+ +---v----+ +----v---+
- // |1, 3, 5, 8, 9| |Boost: 2| |Empty| |Boost: 3| |Boost: 4|
- // +-------------+ +----+---+ +-----+ +---+----+ +----+---+
- // | | |
- // +----v-----+ +-v--+ +---v---+
- // |1, 5, 7, 9| |1, 5| |0, 3, 5|
- // +----------+ +----+ +-------+
- //
- const PostingList L0 = {1, 3, 5, 8, 9};
- const PostingList L1 = {1, 5, 7, 9};
- const PostingList L3;
- const PostingList L4 = {1, 5};
- const PostingList L5 = {0, 3, 5};
-
- // Root of the query tree: [1, 5]
- auto Root = createAnd(
- // Lower And Iterator: [1, 5, 9]
- createAnd(create(L0), createBoost(create(L1), 2U)),
- // Lower Or Iterator: [0, 1, 5]
- createOr(create(L3), createBoost(create(L4), 3U),
- createBoost(create(L5), 4U)));
-
- EXPECT_FALSE(Root->reachedEnd());
- EXPECT_EQ(Root->peek(), 1U);
- Root->advanceTo(0);
- // Advance multiple times. Shouldn't do anything.
- Root->advanceTo(1);
- Root->advanceTo(0);
- EXPECT_EQ(Root->peek(), 1U);
- auto ElementBoost = Root->consume();
- EXPECT_THAT(ElementBoost, 6);
- Root->advance();
- EXPECT_EQ(Root->peek(), 5U);
- Root->advanceTo(5);
- EXPECT_EQ(Root->peek(), 5U);
- ElementBoost = Root->consume();
- EXPECT_THAT(ElementBoost, 8);
- Root->advanceTo(9000);
- EXPECT_TRUE(Root->reachedEnd());
-}
-
-TEST(DexIndexIterators, StringRepresentation) {
- const PostingList L0 = {4, 7, 8, 20, 42, 100};
- const PostingList L1 = {1, 3, 5, 8, 9};
- const PostingList L2 = {1, 5, 7, 9};
- const PostingList L3 = {0, 5};
- const PostingList L4 = {0, 1, 5};
- const PostingList L5;
-
- EXPECT_EQ(llvm::to_string(*(create(L0))), "[{4}, 7, 8, 20, 42, 100, END]");
-
- auto Nested = createAnd(createAnd(create(L1), create(L2)),
- createOr(create(L3), create(L4), create(L5)));
-
- EXPECT_EQ(llvm::to_string(*Nested),
- "(& (| [0, {5}, END] [0, {1}, 5, END] [{END}]) (& [{1}, 5, 7, 9, "
- "END] [{1}, 3, 5, 8, 9, END]))");
-}
-
-TEST(DexIndexIterators, Limit) {
- const PostingList L0 = {3, 6, 7, 20, 42, 100};
- const PostingList L1 = {1, 3, 5, 6, 7, 30, 100};
- const PostingList L2 = {0, 3, 5, 7, 8, 100};
-
- auto DocIterator = createLimit(create(L0), 42);
- EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre(3, 6, 7, 20, 42, 100));
-
- DocIterator = createLimit(create(L0), 3);
- EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre(3, 6, 7));
-
- DocIterator = createLimit(create(L0), 0);
- EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre());
-
- auto AndIterator =
- createAnd(createLimit(createTrue(9000), 343), createLimit(create(L0), 2),
- createLimit(create(L1), 3), createLimit(create(L2), 42));
- EXPECT_THAT(consumeIDs(*AndIterator), ElementsAre(3, 7));
-}
-
-TEST(DexIndexIterators, True) {
- auto TrueIterator = createTrue(0U);
- EXPECT_TRUE(TrueIterator->reachedEnd());
- EXPECT_THAT(consumeIDs(*TrueIterator), ElementsAre());
-
- PostingList L0 = {1, 2, 5, 7};
- TrueIterator = createTrue(7U);
- EXPECT_THAT(TrueIterator->peek(), 0);
- auto AndIterator = createAnd(create(L0), move(TrueIterator));
- EXPECT_FALSE(AndIterator->reachedEnd());
- EXPECT_THAT(consumeIDs(*AndIterator), ElementsAre(1, 2, 5));
-}
-
-TEST(DexIndexIterators, Boost) {
- auto BoostIterator = createBoost(createTrue(5U), 42U);
- EXPECT_FALSE(BoostIterator->reachedEnd());
- auto ElementBoost = BoostIterator->consume();
- EXPECT_THAT(ElementBoost, 42U);
-
- const PostingList L0 = {2, 4};
- const PostingList L1 = {1, 4};
- auto Root = createOr(createTrue(5U), createBoost(create(L0), 2U),
- createBoost(create(L1), 3U));
-
- ElementBoost = Root->consume();
- EXPECT_THAT(ElementBoost, Iterator::DEFAULT_BOOST_SCORE);
- Root->advance();
- EXPECT_THAT(Root->peek(), 1U);
- ElementBoost = Root->consume();
- EXPECT_THAT(ElementBoost, 3);
-
- Root->advance();
- EXPECT_THAT(Root->peek(), 2U);
- ElementBoost = Root->consume();
- EXPECT_THAT(ElementBoost, 2);
-
- Root->advanceTo(4);
- ElementBoost = Root->consume();
- EXPECT_THAT(ElementBoost, 3);
-}
-
-//===----------------------------------------------------------------------===//
-// Search token tests.
-//===----------------------------------------------------------------------===//
-
-testing::Matcher<std::vector<Token>>
-tokensAre(std::initializer_list<std::string> Strings, Token::Kind Kind) {
- std::vector<Token> Tokens;
- for (const auto &TokenData : Strings) {
- Tokens.push_back(Token(Kind, TokenData));
- }
- return testing::UnorderedElementsAreArray(Tokens);
-}
-
-testing::Matcher<std::vector<Token>>
-trigramsAre(std::initializer_list<std::string> Trigrams) {
- return tokensAre(Trigrams, Token::Kind::Trigram);
-}
-
-TEST(DexIndexTrigrams, IdentifierTrigrams) {
- EXPECT_THAT(generateIdentifierTrigrams("X86"),
- trigramsAre({"x86", "x$$", "x8$"}));
-
- EXPECT_THAT(generateIdentifierTrigrams("nl"), trigramsAre({"nl$", "n$$"}));
-
- EXPECT_THAT(generateIdentifierTrigrams("n"), trigramsAre({"n$$"}));
-
- EXPECT_THAT(generateIdentifierTrigrams("clangd"),
- trigramsAre({"c$$", "cl$", "cla", "lan", "ang", "ngd"}));
-
- EXPECT_THAT(generateIdentifierTrigrams("abc_def"),
- trigramsAre({"a$$", "abc", "abd", "ade", "bcd", "bde", "cde",
- "def", "ab$", "ad$"}));
-
- EXPECT_THAT(generateIdentifierTrigrams("a_b_c_d_e_"),
- trigramsAre({"a$$", "a_$", "a_b", "abc", "abd", "acd", "ace",
- "bcd", "bce", "bde", "cde", "ab$"}));
-
- EXPECT_THAT(generateIdentifierTrigrams("unique_ptr"),
- trigramsAre({"u$$", "uni", "unp", "upt", "niq", "nip", "npt",
- "iqu", "iqp", "ipt", "que", "qup", "qpt", "uep",
- "ept", "ptr", "un$", "up$"}));
-
- EXPECT_THAT(
- generateIdentifierTrigrams("TUDecl"),
- trigramsAre({"t$$", "tud", "tde", "ude", "dec", "ecl", "tu$", "td$"}));
-
- EXPECT_THAT(generateIdentifierTrigrams("IsOK"),
- trigramsAre({"i$$", "iso", "iok", "sok", "is$", "io$"}));
-
- EXPECT_THAT(
- generateIdentifierTrigrams("abc_defGhij__klm"),
- trigramsAre({"a$$", "abc", "abd", "abg", "ade", "adg", "adk", "agh",
- "agk", "bcd", "bcg", "bde", "bdg", "bdk", "bgh", "bgk",
- "cde", "cdg", "cdk", "cgh", "cgk", "def", "deg", "dek",
- "dgh", "dgk", "dkl", "efg", "efk", "egh", "egk", "ekl",
- "fgh", "fgk", "fkl", "ghi", "ghk", "gkl", "hij", "hik",
- "hkl", "ijk", "ikl", "jkl", "klm", "ab$", "ad$"}));
-}
-
-TEST(DexIndexTrigrams, QueryTrigrams) {
- EXPECT_THAT(generateQueryTrigrams("c"), trigramsAre({"c$$"}));
- EXPECT_THAT(generateQueryTrigrams("cl"), trigramsAre({"cl$"}));
- EXPECT_THAT(generateQueryTrigrams("cla"), trigramsAre({"cla"}));
-
- EXPECT_THAT(generateQueryTrigrams("_"), trigramsAre({"_$$"}));
- EXPECT_THAT(generateQueryTrigrams("__"), trigramsAre({"__$"}));
- EXPECT_THAT(generateQueryTrigrams("___"), trigramsAre({"___"}));
-
- EXPECT_THAT(generateQueryTrigrams("X86"), trigramsAre({"x86"}));
-
- EXPECT_THAT(generateQueryTrigrams("clangd"),
- trigramsAre({"cla", "lan", "ang", "ngd"}));
-
- EXPECT_THAT(generateQueryTrigrams("abc_def"),
- trigramsAre({"abc", "bcd", "cde", "def"}));
-
- EXPECT_THAT(generateQueryTrigrams("a_b_c_d_e_"),
- trigramsAre({"abc", "bcd", "cde"}));
-
- EXPECT_THAT(generateQueryTrigrams("unique_ptr"),
- trigramsAre({"uni", "niq", "iqu", "que", "uep", "ept", "ptr"}));
-
- EXPECT_THAT(generateQueryTrigrams("TUDecl"),
- trigramsAre({"tud", "ude", "dec", "ecl"}));
-
- EXPECT_THAT(generateQueryTrigrams("IsOK"), trigramsAre({"iso", "sok"}));
-
- EXPECT_THAT(generateQueryTrigrams("abc_defGhij__klm"),
- trigramsAre({"abc", "bcd", "cde", "def", "efg", "fgh", "ghi",
- "hij", "ijk", "jkl", "klm"}));
-}
-
-TEST(DexSearchTokens, SymbolPath) {
- EXPECT_THAT(generateProximityURIs(
- "unittest:///clang-tools-extra/clangd/index/Token.h"),
- ElementsAre("unittest:///clang-tools-extra/clangd/index/Token.h",
- "unittest:///clang-tools-extra/clangd/index",
- "unittest:///clang-tools-extra/clangd",
- "unittest:///clang-tools-extra", "unittest:///"));
-
- EXPECT_THAT(generateProximityURIs("unittest:///a/b/c.h"),
- ElementsAre("unittest:///a/b/c.h", "unittest:///a/b",
- "unittest:///a", "unittest:///"));
-}
-
-//===----------------------------------------------------------------------===//
-// Index tests.
-//===----------------------------------------------------------------------===//
-
-TEST(DexIndex, Lookup) {
- auto I = DexIndex::build(generateSymbols({"ns::abc", "ns::xyz"}), URISchemes);
- EXPECT_THAT(lookup(*I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
- EXPECT_THAT(lookup(*I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
- UnorderedElementsAre("ns::abc", "ns::xyz"));
- EXPECT_THAT(lookup(*I, {SymbolID("ns::nonono"), SymbolID("ns::xyz")}),
- UnorderedElementsAre("ns::xyz"));
- EXPECT_THAT(lookup(*I, SymbolID("ns::nonono")), UnorderedElementsAre());
-}
-
-TEST(DexIndex, FuzzyFind) {
- auto Index = DexIndex::build(
- generateSymbols({"ns::ABC", "ns::BCD", "::ABC", "ns::nested::ABC",
- "other::ABC", "other::A"}),
- URISchemes);
- FuzzyFindRequest Req;
- Req.Query = "ABC";
- Req.Scopes = {"ns::"};
- EXPECT_THAT(match(*Index, Req), UnorderedElementsAre("ns::ABC"));
- Req.Scopes = {"ns::", "ns::nested::"};
- EXPECT_THAT(match(*Index, Req),
- UnorderedElementsAre("ns::ABC", "ns::nested::ABC"));
- Req.Query = "A";
- Req.Scopes = {"other::"};
- EXPECT_THAT(match(*Index, Req),
- UnorderedElementsAre("other::A", "other::ABC"));
- Req.Query = "";
- Req.Scopes = {};
- EXPECT_THAT(match(*Index, Req),
- UnorderedElementsAre("ns::ABC", "ns::BCD", "::ABC",
- "ns::nested::ABC", "other::ABC",
- "other::A"));
-}
-
-TEST(DexIndexTest, FuzzyMatchQ) {
- auto I = DexIndex::build(
- generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}),
- URISchemes);
- FuzzyFindRequest Req;
- Req.Query = "lol";
- Req.MaxCandidateCount = 2;
- EXPECT_THAT(match(*I, Req),
- UnorderedElementsAre("LaughingOutLoud", "LittleOldLady"));
-}
-
-// FIXME(kbobyrev): This test is different for DexIndex and MemIndex: while
-// MemIndex manages response deduplication, DexIndex simply returns all matched
-// symbols which means there might be equivalent symbols in the response.
-// Before drop-in replacement of MemIndex with DexIndex happens, FileIndex
-// should handle deduplication instead.
-TEST(DexIndexTest, DexIndexDeduplicate) {
- std::vector<Symbol> Symbols = {symbol("1"), symbol("2"), symbol("3"),
- symbol("2") /* duplicate */};
- FuzzyFindRequest Req;
- Req.Query = "2";
- DexIndex I(Symbols, URISchemes);
- EXPECT_THAT(match(I, Req), ElementsAre("2", "2"));
-}
-
-TEST(DexIndexTest, DexIndexLimitedNumMatches) {
- auto I = DexIndex::build(generateNumSymbols(0, 100), URISchemes);
- FuzzyFindRequest Req;
- Req.Query = "5";
- Req.MaxCandidateCount = 3;
- bool Incomplete;
- auto Matches = match(*I, Req, &Incomplete);
- EXPECT_EQ(Matches.size(), Req.MaxCandidateCount);
- EXPECT_TRUE(Incomplete);
-}
-
-TEST(DexIndexTest, FuzzyMatch) {
- auto I = DexIndex::build(
- generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}),
- URISchemes);
- FuzzyFindRequest Req;
- Req.Query = "lol";
- Req.MaxCandidateCount = 2;
- EXPECT_THAT(match(*I, Req),
- UnorderedElementsAre("LaughingOutLoud", "LittleOldLady"));
-}
-
-TEST(DexIndexTest, MatchQualifiedNamesWithoutSpecificScope) {
- auto I =
- DexIndex::build(generateSymbols({"a::y1", "b::y2", "y3"}), URISchemes);
- FuzzyFindRequest Req;
- Req.Query = "y";
- EXPECT_THAT(match(*I, Req), UnorderedElementsAre("a::y1", "b::y2", "y3"));
-}
-
-TEST(DexIndexTest, MatchQualifiedNamesWithGlobalScope) {
- auto I =
- DexIndex::build(generateSymbols({"a::y1", "b::y2", "y3"}), URISchemes);
- FuzzyFindRequest Req;
- Req.Query = "y";
- Req.Scopes = {""};
- EXPECT_THAT(match(*I, Req), UnorderedElementsAre("y3"));
-}
-
-TEST(DexIndexTest, MatchQualifiedNamesWithOneScope) {
- auto I = DexIndex::build(
- generateSymbols({"a::y1", "a::y2", "a::x", "b::y2", "y3"}), URISchemes);
- FuzzyFindRequest Req;
- Req.Query = "y";
- Req.Scopes = {"a::"};
- EXPECT_THAT(match(*I, Req), UnorderedElementsAre("a::y1", "a::y2"));
-}
-
-TEST(DexIndexTest, MatchQualifiedNamesWithMultipleScopes) {
- auto I = DexIndex::build(
- generateSymbols({"a::y1", "a::y2", "a::x", "b::y3", "y3"}), URISchemes);
- FuzzyFindRequest Req;
- Req.Query = "y";
- Req.Scopes = {"a::", "b::"};
- EXPECT_THAT(match(*I, Req), UnorderedElementsAre("a::y1", "a::y2", "b::y3"));
-}
-
-TEST(DexIndexTest, NoMatchNestedScopes) {
- auto I = DexIndex::build(generateSymbols({"a::y1", "a::b::y2"}), URISchemes);
- FuzzyFindRequest Req;
- Req.Query = "y";
- Req.Scopes = {"a::"};
- EXPECT_THAT(match(*I, Req), UnorderedElementsAre("a::y1"));
-}
-
-TEST(DexIndexTest, IgnoreCases) {
- auto I = DexIndex::build(generateSymbols({"ns::ABC", "ns::abc"}), URISchemes);
- FuzzyFindRequest Req;
- Req.Query = "AB";
- Req.Scopes = {"ns::"};
- EXPECT_THAT(match(*I, Req), UnorderedElementsAre("ns::ABC", "ns::abc"));
-}
-
-TEST(DexIndexTest, Lookup) {
- auto I = DexIndex::build(generateSymbols({"ns::abc", "ns::xyz"}), URISchemes);
- EXPECT_THAT(lookup(*I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
- EXPECT_THAT(lookup(*I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
- UnorderedElementsAre("ns::abc", "ns::xyz"));
- EXPECT_THAT(lookup(*I, {SymbolID("ns::nonono"), SymbolID("ns::xyz")}),
- UnorderedElementsAre("ns::xyz"));
- EXPECT_THAT(lookup(*I, SymbolID("ns::nonono")), UnorderedElementsAre());
-}
-
-TEST(DexIndexTest, ProximityPathsBoosting) {
- auto RootSymbol = symbol("root::abc");
- RootSymbol.CanonicalDeclaration.FileURI = "unittest:///file.h";
- auto CloseSymbol = symbol("close::abc");
- CloseSymbol.CanonicalDeclaration.FileURI = "unittest:///a/b/c/d/e/f/file.h";
-
- std::vector<Symbol> Symbols{CloseSymbol, RootSymbol};
- DexIndex I(Symbols, URISchemes);
-
- FuzzyFindRequest Req;
- Req.Query = "abc";
- // The best candidate can change depending on the proximity paths.
- Req.MaxCandidateCount = 1;
-
- // FuzzyFind request comes from the file which is far from the root: expect
- // CloseSymbol to come out.
- Req.ProximityPaths = {testPath("a/b/c/d/e/f/file.h")};
- EXPECT_THAT(match(I, Req), ElementsAre("close::abc"));
-
- // FuzzyFind request comes from the file which is close to the root: expect
- // RootSymbol to come out.
- Req.ProximityPaths = {testPath("file.h")};
- EXPECT_THAT(match(I, Req), ElementsAre("root::abc"));
-}
-
-} // namespace
-} // namespace dex
-} // namespace clangd
-} // namespace clang
Added: clang-tools-extra/trunk/unittests/clangd/DexTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/DexTests.cpp?rev=341784&view=auto
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/DexTests.cpp (added)
+++ clang-tools-extra/trunk/unittests/clangd/DexTests.cpp Mon Sep 10 01:23:53 2018
@@ -0,0 +1,615 @@
+//===-- DexTests.cpp ---------------------------------*- C++ -*-----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "FuzzyMatch.h"
+#include "TestFS.h"
+#include "TestIndex.h"
+#include "index/Index.h"
+#include "index/Merge.h"
+#include "index/dex/Dex.h"
+#include "index/dex/Iterator.h"
+#include "index/dex/Token.h"
+#include "index/dex/Trigram.h"
+#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include <string>
+#include <vector>
+
+using ::testing::ElementsAre;
+using ::testing::UnorderedElementsAre;
+using namespace llvm;
+
+namespace clang {
+namespace clangd {
+namespace dex {
+namespace {
+
+std::vector<std::string> URISchemes = {"unittest"};
+
+//===----------------------------------------------------------------------===//
+// Query iterator tests.
+//===----------------------------------------------------------------------===//
+
+std::vector<DocID> consumeIDs(Iterator &It) {
+ auto IDAndScore = consume(It);
+ std::vector<DocID> IDs(IDAndScore.size());
+ for (size_t I = 0; I < IDAndScore.size(); ++I)
+ IDs[I] = IDAndScore[I].first;
+ return IDs;
+}
+
+TEST(DexIterators, DocumentIterator) {
+ const PostingList L = {4, 7, 8, 20, 42, 100};
+ auto DocIterator = create(L);
+
+ EXPECT_EQ(DocIterator->peek(), 4U);
+ EXPECT_FALSE(DocIterator->reachedEnd());
+
+ DocIterator->advance();
+ EXPECT_EQ(DocIterator->peek(), 7U);
+ EXPECT_FALSE(DocIterator->reachedEnd());
+
+ DocIterator->advanceTo(20);
+ EXPECT_EQ(DocIterator->peek(), 20U);
+ EXPECT_FALSE(DocIterator->reachedEnd());
+
+ DocIterator->advanceTo(65);
+ EXPECT_EQ(DocIterator->peek(), 100U);
+ EXPECT_FALSE(DocIterator->reachedEnd());
+
+ DocIterator->advanceTo(420);
+ EXPECT_TRUE(DocIterator->reachedEnd());
+}
+
+TEST(DexIterators, AndWithEmpty) {
+ const PostingList L0;
+ const PostingList L1 = {0, 5, 7, 10, 42, 320, 9000};
+
+ auto AndEmpty = createAnd(create(L0));
+ EXPECT_TRUE(AndEmpty->reachedEnd());
+
+ auto AndWithEmpty = createAnd(create(L0), create(L1));
+ EXPECT_TRUE(AndWithEmpty->reachedEnd());
+
+ EXPECT_THAT(consumeIDs(*AndWithEmpty), ElementsAre());
+}
+
+TEST(DexIterators, AndTwoLists) {
+ const PostingList L0 = {0, 5, 7, 10, 42, 320, 9000};
+ const PostingList L1 = {0, 4, 7, 10, 30, 60, 320, 9000};
+
+ auto And = createAnd(create(L1), create(L0));
+
+ EXPECT_FALSE(And->reachedEnd());
+ EXPECT_THAT(consumeIDs(*And), ElementsAre(0U, 7U, 10U, 320U, 9000U));
+
+ And = createAnd(create(L0), create(L1));
+
+ And->advanceTo(0);
+ EXPECT_EQ(And->peek(), 0U);
+ And->advanceTo(5);
+ EXPECT_EQ(And->peek(), 7U);
+ And->advanceTo(10);
+ EXPECT_EQ(And->peek(), 10U);
+ And->advanceTo(42);
+ EXPECT_EQ(And->peek(), 320U);
+ And->advanceTo(8999);
+ EXPECT_EQ(And->peek(), 9000U);
+ And->advanceTo(9001);
+}
+
+TEST(DexIterators, AndThreeLists) {
+ const PostingList L0 = {0, 5, 7, 10, 42, 320, 9000};
+ const PostingList L1 = {0, 4, 7, 10, 30, 60, 320, 9000};
+ const PostingList L2 = {1, 4, 7, 11, 30, 60, 320, 9000};
+
+ auto And = createAnd(create(L0), create(L1), create(L2));
+ EXPECT_EQ(And->peek(), 7U);
+ And->advanceTo(300);
+ EXPECT_EQ(And->peek(), 320U);
+ And->advanceTo(100000);
+
+ EXPECT_TRUE(And->reachedEnd());
+}
+
+TEST(DexIterators, OrWithEmpty) {
+ const PostingList L0;
+ const PostingList L1 = {0, 5, 7, 10, 42, 320, 9000};
+
+ auto OrEmpty = createOr(create(L0));
+ EXPECT_TRUE(OrEmpty->reachedEnd());
+
+ auto OrWithEmpty = createOr(create(L0), create(L1));
+ EXPECT_FALSE(OrWithEmpty->reachedEnd());
+
+ EXPECT_THAT(consumeIDs(*OrWithEmpty),
+ ElementsAre(0U, 5U, 7U, 10U, 42U, 320U, 9000U));
+}
+
+TEST(DexIterators, OrTwoLists) {
+ const PostingList L0 = {0, 5, 7, 10, 42, 320, 9000};
+ const PostingList L1 = {0, 4, 7, 10, 30, 60, 320, 9000};
+
+ auto Or = createOr(create(L0), create(L1));
+
+ EXPECT_FALSE(Or->reachedEnd());
+ EXPECT_EQ(Or->peek(), 0U);
+ Or->advance();
+ EXPECT_EQ(Or->peek(), 4U);
+ Or->advance();
+ EXPECT_EQ(Or->peek(), 5U);
+ Or->advance();
+ EXPECT_EQ(Or->peek(), 7U);
+ Or->advance();
+ EXPECT_EQ(Or->peek(), 10U);
+ Or->advance();
+ EXPECT_EQ(Or->peek(), 30U);
+ Or->advanceTo(42);
+ EXPECT_EQ(Or->peek(), 42U);
+ Or->advanceTo(300);
+ EXPECT_EQ(Or->peek(), 320U);
+ Or->advanceTo(9000);
+ EXPECT_EQ(Or->peek(), 9000U);
+ Or->advanceTo(9001);
+ EXPECT_TRUE(Or->reachedEnd());
+
+ Or = createOr(create(L0), create(L1));
+
+ EXPECT_THAT(consumeIDs(*Or),
+ ElementsAre(0U, 4U, 5U, 7U, 10U, 30U, 42U, 60U, 320U, 9000U));
+}
+
+TEST(DexIterators, OrThreeLists) {
+ const PostingList L0 = {0, 5, 7, 10, 42, 320, 9000};
+ const PostingList L1 = {0, 4, 7, 10, 30, 60, 320, 9000};
+ const PostingList L2 = {1, 4, 7, 11, 30, 60, 320, 9000};
+
+ auto Or = createOr(create(L0), create(L1), create(L2));
+
+ EXPECT_FALSE(Or->reachedEnd());
+ EXPECT_EQ(Or->peek(), 0U);
+
+ Or->advance();
+ EXPECT_EQ(Or->peek(), 1U);
+
+ Or->advance();
+ EXPECT_EQ(Or->peek(), 4U);
+
+ Or->advanceTo(7);
+
+ Or->advanceTo(59);
+ EXPECT_EQ(Or->peek(), 60U);
+
+ Or->advanceTo(9001);
+ EXPECT_TRUE(Or->reachedEnd());
+}
+
+// FIXME(kbobyrev): The testcase below is similar to what is expected in real
+// queries. It should be updated once new iterators (such as boosting, limiting,
+// etc iterators) appear. However, it is not exhaustive and it would be
+// beneficial to implement automatic generation (e.g. fuzzing) of query trees
+// for more comprehensive testing.
+TEST(DexIterators, QueryTree) {
+ //
+ // +-----------------+
+ // |And Iterator:1, 5|
+ // +--------+--------+
+ // |
+ // |
+ // +-------------+----------------------+
+ // | |
+ // | |
+ // +----------v----------+ +----------v------------+
+ // |And Iterator: 1, 5, 9| |Or Iterator: 0, 1, 3, 5|
+ // +----------+----------+ +----------+------------+
+ // | |
+ // +------+-----+ +---------------------+
+ // | | | | |
+ // +-------v-----+ +----+---+ +--v--+ +---v----+ +----v---+
+ // |1, 3, 5, 8, 9| |Boost: 2| |Empty| |Boost: 3| |Boost: 4|
+ // +-------------+ +----+---+ +-----+ +---+----+ +----+---+
+ // | | |
+ // +----v-----+ +-v--+ +---v---+
+ // |1, 5, 7, 9| |1, 5| |0, 3, 5|
+ // +----------+ +----+ +-------+
+ //
+ const PostingList L0 = {1, 3, 5, 8, 9};
+ const PostingList L1 = {1, 5, 7, 9};
+ const PostingList L3;
+ const PostingList L4 = {1, 5};
+ const PostingList L5 = {0, 3, 5};
+
+ // Root of the query tree: [1, 5]
+ auto Root = createAnd(
+ // Lower And Iterator: [1, 5, 9]
+ createAnd(create(L0), createBoost(create(L1), 2U)),
+ // Lower Or Iterator: [0, 1, 5]
+ createOr(create(L3), createBoost(create(L4), 3U),
+ createBoost(create(L5), 4U)));
+
+ EXPECT_FALSE(Root->reachedEnd());
+ EXPECT_EQ(Root->peek(), 1U);
+ Root->advanceTo(0);
+ // Advance multiple times. Shouldn't do anything.
+ Root->advanceTo(1);
+ Root->advanceTo(0);
+ EXPECT_EQ(Root->peek(), 1U);
+ auto ElementBoost = Root->consume();
+ EXPECT_THAT(ElementBoost, 6);
+ Root->advance();
+ EXPECT_EQ(Root->peek(), 5U);
+ Root->advanceTo(5);
+ EXPECT_EQ(Root->peek(), 5U);
+ ElementBoost = Root->consume();
+ EXPECT_THAT(ElementBoost, 8);
+ Root->advanceTo(9000);
+ EXPECT_TRUE(Root->reachedEnd());
+}
+
+TEST(DexIterators, StringRepresentation) {
+ const PostingList L0 = {4, 7, 8, 20, 42, 100};
+ const PostingList L1 = {1, 3, 5, 8, 9};
+ const PostingList L2 = {1, 5, 7, 9};
+ const PostingList L3 = {0, 5};
+ const PostingList L4 = {0, 1, 5};
+ const PostingList L5;
+
+ EXPECT_EQ(llvm::to_string(*(create(L0))), "[{4}, 7, 8, 20, 42, 100, END]");
+
+ auto Nested = createAnd(createAnd(create(L1), create(L2)),
+ createOr(create(L3), create(L4), create(L5)));
+
+ EXPECT_EQ(llvm::to_string(*Nested),
+ "(& (| [0, {5}, END] [0, {1}, 5, END] [{END}]) (& [{1}, 5, 7, 9, "
+ "END] [{1}, 3, 5, 8, 9, END]))");
+}
+
+TEST(DexIterators, Limit) {
+ const PostingList L0 = {3, 6, 7, 20, 42, 100};
+ const PostingList L1 = {1, 3, 5, 6, 7, 30, 100};
+ const PostingList L2 = {0, 3, 5, 7, 8, 100};
+
+ auto DocIterator = createLimit(create(L0), 42);
+ EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre(3, 6, 7, 20, 42, 100));
+
+ DocIterator = createLimit(create(L0), 3);
+ EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre(3, 6, 7));
+
+ DocIterator = createLimit(create(L0), 0);
+ EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre());
+
+ auto AndIterator =
+ createAnd(createLimit(createTrue(9000), 343), createLimit(create(L0), 2),
+ createLimit(create(L1), 3), createLimit(create(L2), 42));
+ EXPECT_THAT(consumeIDs(*AndIterator), ElementsAre(3, 7));
+}
+
+TEST(DexIterators, True) {
+ auto TrueIterator = createTrue(0U);
+ EXPECT_TRUE(TrueIterator->reachedEnd());
+ EXPECT_THAT(consumeIDs(*TrueIterator), ElementsAre());
+
+ PostingList L0 = {1, 2, 5, 7};
+ TrueIterator = createTrue(7U);
+ EXPECT_THAT(TrueIterator->peek(), 0);
+ auto AndIterator = createAnd(create(L0), move(TrueIterator));
+ EXPECT_FALSE(AndIterator->reachedEnd());
+ EXPECT_THAT(consumeIDs(*AndIterator), ElementsAre(1, 2, 5));
+}
+
+TEST(DexIterators, Boost) {
+ auto BoostIterator = createBoost(createTrue(5U), 42U);
+ EXPECT_FALSE(BoostIterator->reachedEnd());
+ auto ElementBoost = BoostIterator->consume();
+ EXPECT_THAT(ElementBoost, 42U);
+
+ const PostingList L0 = {2, 4};
+ const PostingList L1 = {1, 4};
+ auto Root = createOr(createTrue(5U), createBoost(create(L0), 2U),
+ createBoost(create(L1), 3U));
+
+ ElementBoost = Root->consume();
+ EXPECT_THAT(ElementBoost, Iterator::DEFAULT_BOOST_SCORE);
+ Root->advance();
+ EXPECT_THAT(Root->peek(), 1U);
+ ElementBoost = Root->consume();
+ EXPECT_THAT(ElementBoost, 3);
+
+ Root->advance();
+ EXPECT_THAT(Root->peek(), 2U);
+ ElementBoost = Root->consume();
+ EXPECT_THAT(ElementBoost, 2);
+
+ Root->advanceTo(4);
+ ElementBoost = Root->consume();
+ EXPECT_THAT(ElementBoost, 3);
+}
+
+//===----------------------------------------------------------------------===//
+// Search token tests.
+//===----------------------------------------------------------------------===//
+
+testing::Matcher<std::vector<Token>>
+tokensAre(std::initializer_list<std::string> Strings, Token::Kind Kind) {
+ std::vector<Token> Tokens;
+ for (const auto &TokenData : Strings) {
+ Tokens.push_back(Token(Kind, TokenData));
+ }
+ return testing::UnorderedElementsAreArray(Tokens);
+}
+
+testing::Matcher<std::vector<Token>>
+trigramsAre(std::initializer_list<std::string> Trigrams) {
+ return tokensAre(Trigrams, Token::Kind::Trigram);
+}
+
+TEST(DexTrigrams, IdentifierTrigrams) {
+ EXPECT_THAT(generateIdentifierTrigrams("X86"),
+ trigramsAre({"x86", "x$$", "x8$"}));
+
+ EXPECT_THAT(generateIdentifierTrigrams("nl"), trigramsAre({"nl$", "n$$"}));
+
+ EXPECT_THAT(generateIdentifierTrigrams("n"), trigramsAre({"n$$"}));
+
+ EXPECT_THAT(generateIdentifierTrigrams("clangd"),
+ trigramsAre({"c$$", "cl$", "cla", "lan", "ang", "ngd"}));
+
+ EXPECT_THAT(generateIdentifierTrigrams("abc_def"),
+ trigramsAre({"a$$", "abc", "abd", "ade", "bcd", "bde", "cde",
+ "def", "ab$", "ad$"}));
+
+ EXPECT_THAT(generateIdentifierTrigrams("a_b_c_d_e_"),
+ trigramsAre({"a$$", "a_$", "a_b", "abc", "abd", "acd", "ace",
+ "bcd", "bce", "bde", "cde", "ab$"}));
+
+ EXPECT_THAT(generateIdentifierTrigrams("unique_ptr"),
+ trigramsAre({"u$$", "uni", "unp", "upt", "niq", "nip", "npt",
+ "iqu", "iqp", "ipt", "que", "qup", "qpt", "uep",
+ "ept", "ptr", "un$", "up$"}));
+
+ EXPECT_THAT(
+ generateIdentifierTrigrams("TUDecl"),
+ trigramsAre({"t$$", "tud", "tde", "ude", "dec", "ecl", "tu$", "td$"}));
+
+ EXPECT_THAT(generateIdentifierTrigrams("IsOK"),
+ trigramsAre({"i$$", "iso", "iok", "sok", "is$", "io$"}));
+
+ EXPECT_THAT(
+ generateIdentifierTrigrams("abc_defGhij__klm"),
+ trigramsAre({"a$$", "abc", "abd", "abg", "ade", "adg", "adk", "agh",
+ "agk", "bcd", "bcg", "bde", "bdg", "bdk", "bgh", "bgk",
+ "cde", "cdg", "cdk", "cgh", "cgk", "def", "deg", "dek",
+ "dgh", "dgk", "dkl", "efg", "efk", "egh", "egk", "ekl",
+ "fgh", "fgk", "fkl", "ghi", "ghk", "gkl", "hij", "hik",
+ "hkl", "ijk", "ikl", "jkl", "klm", "ab$", "ad$"}));
+}
+
+TEST(DexTrigrams, QueryTrigrams) {
+ EXPECT_THAT(generateQueryTrigrams("c"), trigramsAre({"c$$"}));
+ EXPECT_THAT(generateQueryTrigrams("cl"), trigramsAre({"cl$"}));
+ EXPECT_THAT(generateQueryTrigrams("cla"), trigramsAre({"cla"}));
+
+ EXPECT_THAT(generateQueryTrigrams("_"), trigramsAre({"_$$"}));
+ EXPECT_THAT(generateQueryTrigrams("__"), trigramsAre({"__$"}));
+ EXPECT_THAT(generateQueryTrigrams("___"), trigramsAre({"___"}));
+
+ EXPECT_THAT(generateQueryTrigrams("X86"), trigramsAre({"x86"}));
+
+ EXPECT_THAT(generateQueryTrigrams("clangd"),
+ trigramsAre({"cla", "lan", "ang", "ngd"}));
+
+ EXPECT_THAT(generateQueryTrigrams("abc_def"),
+ trigramsAre({"abc", "bcd", "cde", "def"}));
+
+ EXPECT_THAT(generateQueryTrigrams("a_b_c_d_e_"),
+ trigramsAre({"abc", "bcd", "cde"}));
+
+ EXPECT_THAT(generateQueryTrigrams("unique_ptr"),
+ trigramsAre({"uni", "niq", "iqu", "que", "uep", "ept", "ptr"}));
+
+ EXPECT_THAT(generateQueryTrigrams("TUDecl"),
+ trigramsAre({"tud", "ude", "dec", "ecl"}));
+
+ EXPECT_THAT(generateQueryTrigrams("IsOK"), trigramsAre({"iso", "sok"}));
+
+ EXPECT_THAT(generateQueryTrigrams("abc_defGhij__klm"),
+ trigramsAre({"abc", "bcd", "cde", "def", "efg", "fgh", "ghi",
+ "hij", "ijk", "jkl", "klm"}));
+}
+
+TEST(DexSearchTokens, SymbolPath) {
+ EXPECT_THAT(generateProximityURIs(
+ "unittest:///clang-tools-extra/clangd/index/Token.h"),
+ ElementsAre("unittest:///clang-tools-extra/clangd/index/Token.h",
+ "unittest:///clang-tools-extra/clangd/index",
+ "unittest:///clang-tools-extra/clangd",
+ "unittest:///clang-tools-extra", "unittest:///"));
+
+ EXPECT_THAT(generateProximityURIs("unittest:///a/b/c.h"),
+ ElementsAre("unittest:///a/b/c.h", "unittest:///a/b",
+ "unittest:///a", "unittest:///"));
+}
+
+//===----------------------------------------------------------------------===//
+// Index tests.
+//===----------------------------------------------------------------------===//
+
+TEST(Dex, Lookup) {
+ auto I = Dex::build(generateSymbols({"ns::abc", "ns::xyz"}), URISchemes);
+ EXPECT_THAT(lookup(*I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
+ EXPECT_THAT(lookup(*I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
+ UnorderedElementsAre("ns::abc", "ns::xyz"));
+ EXPECT_THAT(lookup(*I, {SymbolID("ns::nonono"), SymbolID("ns::xyz")}),
+ UnorderedElementsAre("ns::xyz"));
+ EXPECT_THAT(lookup(*I, SymbolID("ns::nonono")), UnorderedElementsAre());
+}
+
+TEST(Dex, FuzzyFind) {
+ auto Index = Dex::build(
+ generateSymbols({"ns::ABC", "ns::BCD", "::ABC", "ns::nested::ABC",
+ "other::ABC", "other::A"}),
+ URISchemes);
+ FuzzyFindRequest Req;
+ Req.Query = "ABC";
+ Req.Scopes = {"ns::"};
+ EXPECT_THAT(match(*Index, Req), UnorderedElementsAre("ns::ABC"));
+ Req.Scopes = {"ns::", "ns::nested::"};
+ EXPECT_THAT(match(*Index, Req),
+ UnorderedElementsAre("ns::ABC", "ns::nested::ABC"));
+ Req.Query = "A";
+ Req.Scopes = {"other::"};
+ EXPECT_THAT(match(*Index, Req),
+ UnorderedElementsAre("other::A", "other::ABC"));
+ Req.Query = "";
+ Req.Scopes = {};
+ EXPECT_THAT(match(*Index, Req),
+ UnorderedElementsAre("ns::ABC", "ns::BCD", "::ABC",
+ "ns::nested::ABC", "other::ABC",
+ "other::A"));
+}
+
+TEST(DexTest, FuzzyMatchQ) {
+ auto I = Dex::build(
+ generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}),
+ URISchemes);
+ FuzzyFindRequest Req;
+ Req.Query = "lol";
+ Req.MaxCandidateCount = 2;
+ EXPECT_THAT(match(*I, Req),
+ UnorderedElementsAre("LaughingOutLoud", "LittleOldLady"));
+}
+
+// FIXME(kbobyrev): This test is different for Dex and MemIndex: while
+// MemIndex manages response deduplication, Dex simply returns all matched
+// symbols which means there might be equivalent symbols in the response.
+// Before drop-in replacement of MemIndex with Dex happens, FileIndex
+// should handle deduplication instead.
+TEST(DexTest, DexDeduplicate) {
+ std::vector<Symbol> Symbols = {symbol("1"), symbol("2"), symbol("3"),
+ symbol("2") /* duplicate */};
+ FuzzyFindRequest Req;
+ Req.Query = "2";
+ Dex I(Symbols, URISchemes);
+ EXPECT_THAT(match(I, Req), ElementsAre("2", "2"));
+}
+
+TEST(DexTest, DexLimitedNumMatches) {
+ auto I = Dex::build(generateNumSymbols(0, 100), URISchemes);
+ FuzzyFindRequest Req;
+ Req.Query = "5";
+ Req.MaxCandidateCount = 3;
+ bool Incomplete;
+ auto Matches = match(*I, Req, &Incomplete);
+ EXPECT_EQ(Matches.size(), Req.MaxCandidateCount);
+ EXPECT_TRUE(Incomplete);
+}
+
+TEST(DexTest, FuzzyMatch) {
+ auto I = Dex::build(
+ generateSymbols({"LaughingOutLoud", "LionPopulation", "LittleOldLady"}),
+ URISchemes);
+ FuzzyFindRequest Req;
+ Req.Query = "lol";
+ Req.MaxCandidateCount = 2;
+ EXPECT_THAT(match(*I, Req),
+ UnorderedElementsAre("LaughingOutLoud", "LittleOldLady"));
+}
+
+TEST(DexTest, MatchQualifiedNamesWithoutSpecificScope) {
+ auto I =
+ Dex::build(generateSymbols({"a::y1", "b::y2", "y3"}), URISchemes);
+ FuzzyFindRequest Req;
+ Req.Query = "y";
+ EXPECT_THAT(match(*I, Req), UnorderedElementsAre("a::y1", "b::y2", "y3"));
+}
+
+TEST(DexTest, MatchQualifiedNamesWithGlobalScope) {
+ auto I =
+ Dex::build(generateSymbols({"a::y1", "b::y2", "y3"}), URISchemes);
+ FuzzyFindRequest Req;
+ Req.Query = "y";
+ Req.Scopes = {""};
+ EXPECT_THAT(match(*I, Req), UnorderedElementsAre("y3"));
+}
+
+TEST(DexTest, MatchQualifiedNamesWithOneScope) {
+ auto I = Dex::build(
+ generateSymbols({"a::y1", "a::y2", "a::x", "b::y2", "y3"}), URISchemes);
+ FuzzyFindRequest Req;
+ Req.Query = "y";
+ Req.Scopes = {"a::"};
+ EXPECT_THAT(match(*I, Req), UnorderedElementsAre("a::y1", "a::y2"));
+}
+
+TEST(DexTest, MatchQualifiedNamesWithMultipleScopes) {
+ auto I = Dex::build(
+ generateSymbols({"a::y1", "a::y2", "a::x", "b::y3", "y3"}), URISchemes);
+ FuzzyFindRequest Req;
+ Req.Query = "y";
+ Req.Scopes = {"a::", "b::"};
+ EXPECT_THAT(match(*I, Req), UnorderedElementsAre("a::y1", "a::y2", "b::y3"));
+}
+
+TEST(DexTest, NoMatchNestedScopes) {
+ auto I = Dex::build(generateSymbols({"a::y1", "a::b::y2"}), URISchemes);
+ FuzzyFindRequest Req;
+ Req.Query = "y";
+ Req.Scopes = {"a::"};
+ EXPECT_THAT(match(*I, Req), UnorderedElementsAre("a::y1"));
+}
+
+TEST(DexTest, IgnoreCases) {
+ auto I = Dex::build(generateSymbols({"ns::ABC", "ns::abc"}), URISchemes);
+ FuzzyFindRequest Req;
+ Req.Query = "AB";
+ Req.Scopes = {"ns::"};
+ EXPECT_THAT(match(*I, Req), UnorderedElementsAre("ns::ABC", "ns::abc"));
+}
+
+TEST(DexTest, Lookup) {
+ auto I = Dex::build(generateSymbols({"ns::abc", "ns::xyz"}), URISchemes);
+ EXPECT_THAT(lookup(*I, SymbolID("ns::abc")), UnorderedElementsAre("ns::abc"));
+ EXPECT_THAT(lookup(*I, {SymbolID("ns::abc"), SymbolID("ns::xyz")}),
+ UnorderedElementsAre("ns::abc", "ns::xyz"));
+ EXPECT_THAT(lookup(*I, {SymbolID("ns::nonono"), SymbolID("ns::xyz")}),
+ UnorderedElementsAre("ns::xyz"));
+ EXPECT_THAT(lookup(*I, SymbolID("ns::nonono")), UnorderedElementsAre());
+}
+
+TEST(DexTest, ProximityPathsBoosting) {
+ auto RootSymbol = symbol("root::abc");
+ RootSymbol.CanonicalDeclaration.FileURI = "unittest:///file.h";
+ auto CloseSymbol = symbol("close::abc");
+ CloseSymbol.CanonicalDeclaration.FileURI = "unittest:///a/b/c/d/e/f/file.h";
+
+ std::vector<Symbol> Symbols{CloseSymbol, RootSymbol};
+ Dex I(Symbols, URISchemes);
+
+ FuzzyFindRequest Req;
+ Req.Query = "abc";
+ // The best candidate can change depending on the proximity paths.
+ Req.MaxCandidateCount = 1;
+
+ // FuzzyFind request comes from the file which is far from the root: expect
+ // CloseSymbol to come out.
+ Req.ProximityPaths = {testPath("a/b/c/d/e/f/file.h")};
+ EXPECT_THAT(match(I, Req), ElementsAre("close::abc"));
+
+ // FuzzyFind request comes from the file which is close to the root: expect
+ // RootSymbol to come out.
+ Req.ProximityPaths = {testPath("file.h")};
+ EXPECT_THAT(match(I, Req), ElementsAre("root::abc"));
+}
+
+} // namespace
+} // namespace dex
+} // namespace clangd
+} // namespace clang
Modified: clang-tools-extra/trunk/unittests/clangd/TestIndex.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/TestIndex.h?rev=341784&r1=341783&r2=341784&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/TestIndex.h (original)
+++ clang-tools-extra/trunk/unittests/clangd/TestIndex.h Mon Sep 10 01:23:53 2018
@@ -12,10 +12,6 @@
#include "index/Index.h"
#include "index/Merge.h"
-#include "index/dex/DexIndex.h"
-#include "index/dex/Iterator.h"
-#include "index/dex/Token.h"
-#include "index/dex/Trigram.h"
namespace clang {
namespace clangd {
More information about the cfe-commits
mailing list