[clang-tools-extra] r336177 - [clangd] Incorporate transitive #includes into code complete proximity scoring.

via cfe-commits cfe-commits at lists.llvm.org
Tue Jul 3 11:00:26 PDT 2018


Hi Sam,

This commit is causing failures on windows buildbots.

  Testing Time: 211.19s
  ********************
  Failing Tests (1):
      Extra Tools Unit Tests :: clangd/Checking/./ClangdTests.exe/FileDistanceTests.URI


Example build: http://lab.llvm.org:8011/builders/llvm-clang-lld-x86_64-scei-ps4-windows10pro-fast/builds/18166

These failures are also occurring on our internal bots. Could you take a look at this?

Thanks,
Matthew

-----Original Message-----
From: cfe-commits [mailto:cfe-commits-bounces at lists.llvm.org] On Behalf Of Sam McCall via cfe-commits
Sent: Tuesday, July 3, 2018 1:09 AM
To: cfe-commits at lists.llvm.org
Subject: [clang-tools-extra] r336177 - [clangd] Incorporate transitive #includes into code complete proximity scoring.

Author: sammccall
Date: Tue Jul  3 01:09:29 2018
New Revision: 336177

URL: http://llvm.org/viewvc/llvm-project?rev=336177&view=rev
Log:
[clangd] Incorporate transitive #includes into code complete proximity scoring.

Summary:
We now compute a distance from the main file to the symbol header, which
is a weighted count of:
 - some number of #include traversals from source file --> included file
 - some number of FS traversals from file --> parent directory
 - some number of FS traversals from parent directory --> child file/dir
This calculation is performed in the appropriate URI scheme.

This means we'll get some proximity boost from header files in main-file
contexts, even when these are in different directory trees.

This extended file proximity model is not yet incorporated in the index
interface/implementation.

Reviewers: ioeric

Subscribers: mgorny, ilya-biryukov, MaskRay, jkorous, cfe-commits

Differential Revision: https://reviews.llvm.org/D48441

Added:
    clang-tools-extra/trunk/clangd/FileDistance.cpp
    clang-tools-extra/trunk/clangd/FileDistance.h
    clang-tools-extra/trunk/unittests/clangd/FileDistanceTests.cpp
Modified:
    clang-tools-extra/trunk/clangd/CMakeLists.txt
    clang-tools-extra/trunk/clangd/ClangdServer.cpp
    clang-tools-extra/trunk/clangd/ClangdUnit.cpp
    clang-tools-extra/trunk/clangd/ClangdUnit.h
    clang-tools-extra/trunk/clangd/CodeComplete.cpp
    clang-tools-extra/trunk/clangd/CodeComplete.h
    clang-tools-extra/trunk/clangd/Headers.cpp
    clang-tools-extra/trunk/clangd/Headers.h
    clang-tools-extra/trunk/clangd/Quality.cpp
    clang-tools-extra/trunk/clangd/Quality.h
    clang-tools-extra/trunk/clangd/XRefs.cpp
    clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt
    clang-tools-extra/trunk/unittests/clangd/HeadersTests.cpp
    clang-tools-extra/trunk/unittests/clangd/QualityTests.cpp

Modified: clang-tools-extra/trunk/clangd/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/CMakeLists.txt?rev=336177&r1=336176&r2=336177&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/CMakeLists.txt (original)
+++ clang-tools-extra/trunk/clangd/CMakeLists.txt Tue Jul  3 01:09:29 2018
@@ -19,6 +19,7 @@ add_clang_library(clangDaemon
   Diagnostics.cpp
   DraftStore.cpp
   FindSymbols.cpp
+  FileDistance.cpp
   FuzzyMatch.cpp
   GlobalCompilationDatabase.cpp
   Headers.cpp

Modified: clang-tools-extra/trunk/clangd/ClangdServer.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/ClangdServer.cpp?rev=336177&r1=336176&r2=336177&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/ClangdServer.cpp (original)
+++ clang-tools-extra/trunk/clangd/ClangdServer.cpp Tue Jul  3 01:09:29 2018
@@ -167,7 +167,7 @@ void ClangdServer::codeComplete(PathRef
     // both the old and the new version in case only one of them matches.
     CodeCompleteResult Result = clangd::codeComplete(
         File, IP->Command, PreambleData ? &PreambleData->Preamble : nullptr,
-        PreambleData ? PreambleData->Inclusions : std::vector<Inclusion>(),
+        PreambleData ? PreambleData->Includes : IncludeStructure(),
         IP->Contents, Pos, FS, PCHs, CodeCompleteOpts);
     CB(std::move(Result));
   };

Modified: clang-tools-extra/trunk/clangd/ClangdUnit.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/ClangdUnit.cpp?rev=336177&r1=336176&r2=336177&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/ClangdUnit.cpp (original)
+++ clang-tools-extra/trunk/clangd/ClangdUnit.cpp Tue Jul  3 01:09:29 2018
@@ -88,7 +88,7 @@ public:
   CppFilePreambleCallbacks(PathRef File, PreambleParsedCallback ParsedCallback)
       : File(File), ParsedCallback(ParsedCallback) {}
 
-  std::vector<Inclusion> takeInclusions() { return std::move(Inclusions); }
+  IncludeStructure takeIncludes() { return std::move(Includes); }
 
   void AfterExecute(CompilerInstance &CI) override {
     if (!ParsedCallback)
@@ -103,15 +103,13 @@ public:
 
   std::unique_ptr<PPCallbacks> createPPCallbacks() override {
     assert(SourceMgr && "SourceMgr must be set at this point");
-    return collectInclusionsInMainFileCallback(
-        *SourceMgr,
-        [this](Inclusion Inc) { Inclusions.push_back(std::move(Inc)); });
+    return collectIncludeStructureCallback(*SourceMgr, &Includes);
   }
 
 private:
   PathRef File;
   PreambleParsedCallback ParsedCallback;
-  std::vector<Inclusion> Inclusions;
+  IncludeStructure Includes;
   SourceManager *SourceMgr = nullptr;
 };
 
@@ -153,15 +151,11 @@ ParsedAST::Build(std::unique_ptr<clang::
     return llvm::None;
   }
 
-  std::vector<Inclusion> Inclusions;
   // Copy over the includes from the preamble, then combine with the
   // non-preamble includes below.
-  if (Preamble)
-    Inclusions = Preamble->Inclusions;
-
-  Clang->getPreprocessor().addPPCallbacks(collectInclusionsInMainFileCallback(
-      Clang->getSourceManager(),
-      [&Inclusions](Inclusion Inc) { Inclusions.push_back(std::move(Inc)); }));
+  auto Includes = Preamble ? Preamble->Includes : IncludeStructure{};
+  Clang->getPreprocessor().addPPCallbacks(
+      collectIncludeStructureCallback(Clang->getSourceManager(), &Includes));
 
   if (!Action->Execute())
     log("Execute() failed when building AST for " + MainInput.getFile());
@@ -179,7 +173,7 @@ ParsedAST::Build(std::unique_ptr<clang::
     Diags.insert(Diags.begin(), Preamble->Diags.begin(), Preamble->Diags.end());
   return ParsedAST(std::move(Preamble), std::move(Clang), std::move(Action),
                    std::move(ParsedDecls), std::move(Diags),
-                   std::move(Inclusions));
+                   std::move(Includes));
 }
 
 ParsedAST::ParsedAST(ParsedAST &&Other) = default;
@@ -246,25 +240,24 @@ std::size_t ParsedAST::getUsedBytes() co
   return Total;
 }
 
-const std::vector<Inclusion> &ParsedAST::getInclusions() const {
-  return Inclusions;
+const IncludeStructure &ParsedAST::getIncludeStructure() const {
+  return Includes;
 }
 
 PreambleData::PreambleData(PrecompiledPreamble Preamble,
-                           std::vector<Diag> Diags,
-                           std::vector<Inclusion> Inclusions)
+                           std::vector<Diag> Diags, IncludeStructure Includes)
     : Preamble(std::move(Preamble)), Diags(std::move(Diags)),
-      Inclusions(std::move(Inclusions)) {}
+      Includes(std::move(Includes)) {}
 
 ParsedAST::ParsedAST(std::shared_ptr<const PreambleData> Preamble,
                      std::unique_ptr<CompilerInstance> Clang,
                      std::unique_ptr<FrontendAction> Action,
                      std::vector<Decl *> LocalTopLevelDecls,
-                     std::vector<Diag> Diags, std::vector<Inclusion> Inclusions)
+                     std::vector<Diag> Diags, IncludeStructure Includes)
     : Preamble(std::move(Preamble)), Clang(std::move(Clang)),
       Action(std::move(Action)), Diags(std::move(Diags)),
       LocalTopLevelDecls(std::move(LocalTopLevelDecls)),
-      Inclusions(std::move(Inclusions)) {
+      Includes(std::move(Includes)) {
   assert(this->Clang);
   assert(this->Action);
 }
@@ -350,7 +343,7 @@ std::shared_ptr<const PreambleData> clan
         " for file " + Twine(FileName));
     return std::make_shared<PreambleData>(
         std::move(*BuiltPreamble), PreambleDiagnostics.take(),
-        SerializedDeclsCollector.takeInclusions());
+        SerializedDeclsCollector.takeIncludes());
   } else {
     log("Could not build a preamble for file " + Twine(FileName));
     return nullptr;

Modified: clang-tools-extra/trunk/clangd/ClangdUnit.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/ClangdUnit.h?rev=336177&r1=336176&r2=336177&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/ClangdUnit.h (original)
+++ clang-tools-extra/trunk/clangd/ClangdUnit.h Tue Jul  3 01:09:29 2018
@@ -45,14 +45,14 @@ namespace clangd {
 // Stores Preamble and associated data.
 struct PreambleData {
   PreambleData(PrecompiledPreamble Preamble, std::vector<Diag> Diags,
-               std::vector<Inclusion> Inclusions);
+               IncludeStructure Includes);
 
   tooling::CompileCommand CompileCommand;
   PrecompiledPreamble Preamble;
   std::vector<Diag> Diags;
   // Processes like code completions and go-to-definitions will need #include
   // information, and their compile action skips preamble range.
-  std::vector<Inclusion> Inclusions;
+  IncludeStructure Includes;
 };
 
 /// Information required to run clang, e.g. to parse AST or do code completion.
@@ -99,14 +99,14 @@ public:
   /// Returns the esitmated size of the AST and the accessory structures, in
   /// bytes. Does not include the size of the preamble.
   std::size_t getUsedBytes() const;
-  const std::vector<Inclusion> &getInclusions() const;
+  const IncludeStructure &getIncludeStructure() const;
 
 private:
   ParsedAST(std::shared_ptr<const PreambleData> Preamble,
             std::unique_ptr<CompilerInstance> Clang,
             std::unique_ptr<FrontendAction> Action,
             std::vector<Decl *> LocalTopLevelDecls, std::vector<Diag> Diags,
-            std::vector<Inclusion> Inclusions);
+            IncludeStructure Includes);
 
   // In-memory preambles must outlive the AST, it is important that this member
   // goes before Clang and Action.
@@ -124,7 +124,7 @@ private:
   // Top-level decls inside the current file. Not that this does not include
   // top-level decls from the preamble.
   std::vector<Decl *> LocalTopLevelDecls;
-  std::vector<Inclusion> Inclusions;
+  IncludeStructure Includes;
 };
 
 using PreambleParsedCallback = std::function<void(

Modified: clang-tools-extra/trunk/clangd/CodeComplete.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/CodeComplete.cpp?rev=336177&r1=336176&r2=336177&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/CodeComplete.cpp (original)
+++ clang-tools-extra/trunk/clangd/CodeComplete.cpp Tue Jul  3 01:09:29 2018
@@ -22,6 +22,7 @@
 #include "AST.h"
 #include "CodeCompletionStrings.h"
 #include "Compiler.h"
+#include "FileDistance.h"
 #include "FuzzyMatch.h"
 #include "Headers.h"
 #include "Logger.h"
@@ -763,7 +764,6 @@ struct SemaCompleteInput {
   PathRef FileName;
   const tooling::CompileCommand &Command;
   PrecompiledPreamble const *Preamble;
-  const std::vector<Inclusion> &PreambleInclusions;
   StringRef Contents;
   Position Pos;
   IntrusiveRefCntPtr<vfs::FileSystem> VFS;
@@ -771,12 +771,11 @@ struct SemaCompleteInput {
 };
 
 // Invokes Sema code completion on a file.
-// If \p Includes is set, it will be initialized after a compiler instance has
-// been set up.
+// If \p Includes is set, it will be updated based on the compiler invocation.
 bool semaCodeComplete(std::unique_ptr<CodeCompleteConsumer> Consumer,
                       const clang::CodeCompleteOptions &Options,
                       const SemaCompleteInput &Input,
-                      std::unique_ptr<IncludeInserter> *Includes = nullptr) {
+                      IncludeStructure *Includes = nullptr) {
   trace::Span Tracer("Sema completion");
   std::vector<const char *> ArgStrs;
   for (const auto &S : Input.Command.CommandLine)
@@ -837,29 +836,9 @@ bool semaCodeComplete(std::unique_ptr<Co
         Input.FileName);
     return false;
   }
-  if (Includes) {
-    // Initialize Includes if provided.
-
-    // FIXME(ioeric): needs more consistent style support in clangd server.
-    auto Style = format::getStyle(format::DefaultFormatStyle, Input.FileName,
-                                  format::DefaultFallbackStyle, Input.Contents,
-                                  Input.VFS.get());
-    if (!Style) {
-      log("ERROR: failed to get FormatStyle for file " + Input.FileName +
-          ". Fall back to use LLVM style. Error: " +
-          llvm::toString(Style.takeError()));
-      Style = format::getLLVMStyle();
-    }
-    *Includes = llvm::make_unique<IncludeInserter>(
-        Input.FileName, Input.Contents, *Style, Input.Command.Directory,
-        Clang->getPreprocessor().getHeaderSearchInfo());
-    for (const auto &Inc : Input.PreambleInclusions)
-      Includes->get()->addExisting(Inc);
-    Clang->getPreprocessor().addPPCallbacks(collectInclusionsInMainFileCallback(
-        Clang->getSourceManager(), [Includes](Inclusion Inc) {
-          Includes->get()->addExisting(std::move(Inc));
-        }));
-  }
+  if (Includes)
+    Clang->getPreprocessor().addPPCallbacks(
+        collectIncludeStructureCallback(Clang->getSourceManager(), Includes));
   if (!Action.Execute()) {
     log("Execute() failed when running codeComplete for " + Input.FileName);
     return false;
@@ -949,24 +928,23 @@ clang::CodeCompleteOptions CodeCompleteO
 //   - TopN determines the results with the best score.
 class CodeCompleteFlow {
   PathRef FileName;
+  IncludeStructure Includes; // Complete once the compiler runs.
   const CodeCompleteOptions &Opts;
   // Sema takes ownership of Recorder. Recorder is valid until Sema cleanup.
   CompletionRecorder *Recorder = nullptr;
   int NSema = 0, NIndex = 0, NBoth = 0; // Counters for logging.
   bool Incomplete = false; // Would more be available with a higher limit?
   llvm::Optional<FuzzyMatcher> Filter;       // Initialized once Sema runs.
-  std::unique_ptr<IncludeInserter> Includes; // Initialized once compiler runs.
-  FileProximityMatcher FileProximityMatch;
+  // Include-insertion and proximity scoring rely on the include structure.
+  // This is available after Sema has run.
+  llvm::Optional<IncludeInserter> Inserter;  // Available during runWithSema.
+  llvm::Optional<URIDistance> FileProximity; // Initialized once Sema runs.
 
 public:
   // A CodeCompleteFlow object is only useful for calling run() exactly once.
-  CodeCompleteFlow(PathRef FileName, const CodeCompleteOptions &Opts)
-      : FileName(FileName), Opts(Opts),
-        // FIXME: also use path of the main header corresponding to FileName to
-        // calculate the file proximity, which would capture include/ and src/
-        // project setup where headers and implementations are not in the same
-        // directory.
-        FileProximityMatch(ArrayRef<StringRef>({FileName})) {}
+  CodeCompleteFlow(PathRef FileName, const IncludeStructure &Includes,
+                   const CodeCompleteOptions &Opts)
+      : FileName(FileName), Includes(Includes), Opts(Opts) {}
 
   CodeCompleteResult run(const SemaCompleteInput &SemaCCInput) && {
     trace::Span Tracer("CodeCompleteFlow");
@@ -977,11 +955,45 @@ public:
     CodeCompleteResult Output;
     auto RecorderOwner = llvm::make_unique<CompletionRecorder>(Opts, [&]() {
       assert(Recorder && "Recorder is not set");
-      assert(Includes && "Includes is not set");
+      // FIXME(ioeric): needs more consistent style support in clangd server.
+      auto Style =
+          format::getStyle("file", SemaCCInput.FileName, "LLVM",
+                           SemaCCInput.Contents, SemaCCInput.VFS.get());
+      if (!Style) {
+        log("Failed to get FormatStyle for file" + SemaCCInput.FileName + ": " +
+            llvm::toString(Style.takeError()) + ". Fallback is LLVM style.");
+        Style = format::getLLVMStyle();
+      }
       // If preprocessor was run, inclusions from preprocessor callback should
-      // already be added to Inclusions.
+      // already be added to Includes.
+      Inserter.emplace(
+          SemaCCInput.FileName, SemaCCInput.Contents, *Style,
+          SemaCCInput.Command.Directory,
+          Recorder->CCSema->getPreprocessor().getHeaderSearchInfo());
+      for (const auto &Inc : Includes.MainFileIncludes)
+        Inserter->addExisting(Inc);
+
+      // Most of the cost of file proximity is in initializing the FileDistance
+      // structures based on the observed includes, once per query. Conceptually
+      // that happens here (though the per-URI-scheme initialization is lazy).
+      // The per-result proximity scoring is (amortized) very cheap.
+      FileDistanceOptions ProxOpts{}; // Use defaults.
+      const auto &SM = Recorder->CCSema->getSourceManager();
+      llvm::StringMap<SourceParams> ProxSources;
+      for (auto &Entry : Includes.includeDepth(
+               SM.getFileEntryForID(SM.getMainFileID())->getName())) {
+        auto &Source = ProxSources[Entry.getKey()];
+        Source.Cost = Entry.getValue() * ProxOpts.IncludeCost;
+        // Symbols near our transitive includes are good, but only consider
+        // things in the same directory or below it. Otherwise there can be
+        // many false positives.
+        if (Entry.getValue() > 0)
+          Source.MaxUpTraversals = 1;
+      }
+      FileProximity.emplace(ProxSources, ProxOpts);
+
       Output = runWithSema();
-      Includes.reset(); // Make sure this doesn't out-live Clang.
+      Inserter.reset(); // Make sure this doesn't out-live Clang.
       SPAN_ATTACH(Tracer, "sema_completion_kind",
                   getCompletionKindString(Recorder->CCContext.getKind()));
     });
@@ -1044,6 +1056,7 @@ private:
     Req.RestrictForCodeCompletion = true;
     Req.Scopes = getQueryScopes(Recorder->CCContext,
                                 Recorder->CCSema->getSourceManager());
+    // FIXME: we should send multiple weighted paths here.
     Req.ProximityPaths.push_back(FileName);
     log(llvm::formatv("Code complete: fuzzyFind(\"{0}\", scopes=[{1}])",
                       Req.Query,
@@ -1124,7 +1137,7 @@ private:
     SymbolQualitySignals Quality;
     SymbolRelevanceSignals Relevance;
     Relevance.Query = SymbolRelevanceSignals::CodeComplete;
-    Relevance.FileProximityMatch = &FileProximityMatch;
+    Relevance.FileProximityMatch = FileProximity.getPointer();
     auto &First = Bundle.front();
     if (auto FuzzyScore = fuzzyScore(First))
       Relevance.NameMatch = *FuzzyScore;
@@ -1174,7 +1187,7 @@ private:
                           : nullptr;
       if (!Builder)
         Builder.emplace(Recorder->CCSema->getASTContext(), Item, SemaCCS,
-                        *Includes, FileName, Opts);
+                        *Inserter, FileName, Opts);
       else
         Builder->add(Item, SemaCCS);
     }
@@ -1182,15 +1195,16 @@ private:
   }
 };
 
-CodeCompleteResult codeComplete(
-    PathRef FileName, const tooling::CompileCommand &Command,
-    PrecompiledPreamble const *Preamble,
-    const std::vector<Inclusion> &PreambleInclusions, StringRef Contents,
-    Position Pos, IntrusiveRefCntPtr<vfs::FileSystem> VFS,
-    std::shared_ptr<PCHContainerOperations> PCHs, CodeCompleteOptions Opts) {
-  return CodeCompleteFlow(FileName, Opts)
-      .run({FileName, Command, Preamble, PreambleInclusions, Contents, Pos, VFS,
-            PCHs});
+CodeCompleteResult codeComplete(PathRef FileName,
+                                const tooling::CompileCommand &Command,
+                                PrecompiledPreamble const *Preamble,
+                                const IncludeStructure &PreambleInclusions,
+                                StringRef Contents, Position Pos,
+                                IntrusiveRefCntPtr<vfs::FileSystem> VFS,
+                                std::shared_ptr<PCHContainerOperations> PCHs,
+                                CodeCompleteOptions Opts) {
+  return CodeCompleteFlow(FileName, PreambleInclusions, Opts)
+      .run({FileName, Command, Preamble, Contents, Pos, VFS, PCHs});
 }
 
 SignatureHelp signatureHelp(PathRef FileName,
@@ -1205,11 +1219,11 @@ SignatureHelp signatureHelp(PathRef File
   Options.IncludeMacros = false;
   Options.IncludeCodePatterns = false;
   Options.IncludeBriefComments = false;
-  std::vector<Inclusion> PreambleInclusions = {}; // Unused for signatureHelp
+  IncludeStructure PreambleInclusions; // Unused for signatureHelp
   semaCodeComplete(llvm::make_unique<SignatureHelpCollector>(Options, Result),
                    Options,
-                   {FileName, Command, Preamble, PreambleInclusions, Contents,
-                    Pos, std::move(VFS), std::move(PCHs)});
+                   {FileName, Command, Preamble, Contents, Pos, std::move(VFS),
+                    std::move(PCHs)});
   return Result;
 }
 

Modified: clang-tools-extra/trunk/clangd/CodeComplete.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/CodeComplete.h?rev=336177&r1=336176&r2=336177&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/CodeComplete.h (original)
+++ clang-tools-extra/trunk/clangd/CodeComplete.h Tue Jul  3 01:09:29 2018
@@ -144,12 +144,14 @@ struct CodeCompleteResult {
 raw_ostream &operator<<(raw_ostream &, const CodeCompleteResult &);
 
 /// Get code completions at a specified \p Pos in \p FileName.
-CodeCompleteResult codeComplete(
-    PathRef FileName, const tooling::CompileCommand &Command,
-    PrecompiledPreamble const *Preamble,
-    const std::vector<Inclusion> &PreambleInclusions, StringRef Contents,
-    Position Pos, IntrusiveRefCntPtr<vfs::FileSystem> VFS,
-    std::shared_ptr<PCHContainerOperations> PCHs, CodeCompleteOptions Opts);
+CodeCompleteResult codeComplete(PathRef FileName,
+                                const tooling::CompileCommand &Command,
+                                PrecompiledPreamble const *Preamble,
+                                const IncludeStructure &PreambleInclusions,
+                                StringRef Contents, Position Pos,
+                                IntrusiveRefCntPtr<vfs::FileSystem> VFS,
+                                std::shared_ptr<PCHContainerOperations> PCHs,
+                                CodeCompleteOptions Opts);
 
 /// Get signature help at a specified \p Pos in \p FileName.
 SignatureHelp signatureHelp(PathRef FileName,

Added: clang-tools-extra/trunk/clangd/FileDistance.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/FileDistance.cpp?rev=336177&view=auto
==============================================================================
--- clang-tools-extra/trunk/clangd/FileDistance.cpp (added)
+++ clang-tools-extra/trunk/clangd/FileDistance.cpp Tue Jul  3 01:09:29 2018
@@ -0,0 +1,173 @@
+//===--- FileDistance.cpp - File contents container -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The FileDistance structure allows calculating the minimum distance to paths
+// in a single tree.
+// We simply walk up the path's ancestors until we find a node whose cost is
+// known, and add the cost of walking back down. Initialization ensures this
+// gives the correct path to the roots.
+// We cache the results, so that the runtime is O(|A|), where A is the set of
+// all distinct ancestors of visited paths.
+//
+// Example after initialization with /=2, /bar=0, DownCost = 1:
+//  / = 2
+//    /bar = 0
+//
+// After querying /foo/bar and /bar/foo:
+//  / = 2
+//    /bar = 0
+//      /bar/foo = 1
+//    /foo = 3
+//      /foo/bar = 4
+//
+// URIDistance creates FileDistance lazily for each URI scheme encountered. In
+// practice this is a small constant factor.
+//
+//===-------------------------------------------------------------------------//
+
+#include "FileDistance.h"
+#include "Logger.h"
+#include "llvm/ADT/STLExtras.h"
+#include <queue>
+#define DEBUG_TYPE "FileDistance"
+
+namespace clang {
+namespace clangd {
+using namespace llvm;
+
+// Convert a path into the canonical form.
+// Canonical form is either "/", or "/segment" * N:
+//   C:\foo\bar --> /c:/foo/bar
+//   /foo/      --> /foo
+//   a/b/c      --> /a/b/c
+static SmallString<128> canonicalize(StringRef Path) {
+  SmallString<128> Result = Path.rtrim('/');
+  native(Result, sys::path::Style::posix);
+  if (Result.empty() || Result.front() != '/')
+    Result.insert(Result.begin(), '/');
+  return Result;
+}
+
+const unsigned FileDistance::kUnreachable;
+
+FileDistance::FileDistance(StringMap<SourceParams> Sources,
+                           const FileDistanceOptions &Opts)
+    : Opts(Opts) {
+  llvm::DenseMap<hash_code, SmallVector<hash_code, 4>> DownEdges;
+  // Compute the best distance following only up edges.
+  // Keep track of down edges, in case we can use them to improve on this.
+  for (const auto &S : Sources) {
+    auto Canonical = canonicalize(S.getKey());
+    LLVM_DEBUG(dbgs() << "Source " << Canonical << " = " << S.second.Cost
+                      << ", MaxUp=" << S.second.MaxUpTraversals << "\n");
+    // Walk up to ancestors of this source, assigning cost.
+    StringRef Rest = Canonical;
+    llvm::hash_code Hash = hash_value(Rest);
+    for (unsigned I = 0; !Rest.empty(); ++I) {
+      Rest = parent_path(Rest, sys::path::Style::posix);
+      auto NextHash = hash_value(Rest);
+      DownEdges[NextHash].push_back(Hash);
+      // We can't just break after MaxUpTraversals, must still set DownEdges.
+      if (I > S.getValue().MaxUpTraversals) {
+        if (Cache.find(Hash) != Cache.end())
+          break;
+      } else {
+        unsigned Cost = S.getValue().Cost + I * Opts.UpCost;
+        auto R = Cache.try_emplace(Hash, Cost);
+        if (!R.second) {
+          if (Cost < R.first->second) {
+            R.first->second = Cost;
+          } else {
+            // If we're not the best way to get to this path, stop assigning.
+            break;
+          }
+        }
+      }
+      Hash = NextHash;
+    }
+  }
+  // Now propagate scores parent -> child if that's an improvement.
+  // BFS ensures we propagate down chains (must visit parents before children).
+  std::queue<hash_code> Next;
+  for (auto Child : DownEdges.lookup(hash_value(llvm::StringRef(""))))
+    Next.push(Child);
+  while (!Next.empty()) {
+    auto ParentCost = Cache.lookup(Next.front());
+    for (auto Child : DownEdges.lookup(Next.front())) {
+      auto &ChildCost =
+          Cache.try_emplace(Child, kUnreachable).first->getSecond();
+      if (ParentCost + Opts.DownCost < ChildCost)
+        ChildCost = ParentCost + Opts.DownCost;
+      Next.push(Child);
+    }
+    Next.pop();
+  }
+}
+
+unsigned FileDistance::distance(StringRef Path) {
+  auto Canonical = canonicalize(Path);
+  unsigned Cost = kUnreachable;
+  SmallVector<hash_code, 16> Ancestors;
+  // Walk up ancestors until we find a path we know the distance for.
+  for (StringRef Rest = Canonical; !Rest.empty();
+       Rest = parent_path(Rest, sys::path::Style::posix)) {
+    auto Hash = hash_value(Rest);
+    auto It = Cache.find(Hash);
+    if (It != Cache.end()) {
+      Cost = It->second;
+      break;
+    }
+    Ancestors.push_back(Hash);
+  }
+  // Now we know the costs for (known node, queried node].
+  // Fill these in, walking down the directory tree.
+  for (hash_code Hash : reverse(Ancestors)) {
+    if (Cost != kUnreachable)
+      Cost += Opts.DownCost;
+    Cache.try_emplace(Hash, Cost);
+  }
+  LLVM_DEBUG(dbgs() << "distance(" << Path << ") = " << Cost << "\n");
+  return Cost;
+}
+
+unsigned URIDistance::distance(llvm::StringRef URI) {
+  auto R = Cache.try_emplace(llvm::hash_value(URI), FileDistance::kUnreachable);
+  if (!R.second)
+    return R.first->getSecond();
+  if (auto U = clangd::URI::parse(URI)) {
+    LLVM_DEBUG(dbgs() << "distance(" << URI << ") = distance(" << U->body()
+                      << ")\n");
+    R.first->second = forScheme(U->scheme()).distance(U->body());
+  } else {
+    log("URIDistance::distance() of unparseable " + URI + ": " +
+        llvm::toString(U.takeError()));
+  }
+  return R.first->second;
+}
+
+FileDistance &URIDistance::forScheme(llvm::StringRef Scheme) {
+  auto &Delegate = ByScheme[Scheme];
+  if (!Delegate) {
+    llvm::StringMap<SourceParams> SchemeSources;
+    for (const auto &Source : Sources) {
+      if (auto U = clangd::URI::create(Source.getKey(), Scheme))
+        SchemeSources.try_emplace(U->body(), Source.getValue());
+      else
+        consumeError(U.takeError());
+    }
+    LLVM_DEBUG(dbgs() << "FileDistance for scheme " << Scheme << ": "
+                      << SchemeSources.size() << "/" << Sources.size()
+                      << " sources\n");
+    Delegate.reset(new FileDistance(std::move(SchemeSources), Opts));
+  }
+  return *Delegate;
+}
+
+} // namespace clangd
+} // namespace clang

Added: clang-tools-extra/trunk/clangd/FileDistance.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/FileDistance.h?rev=336177&view=auto
==============================================================================
--- clang-tools-extra/trunk/clangd/FileDistance.h (added)
+++ clang-tools-extra/trunk/clangd/FileDistance.h Tue Jul  3 01:09:29 2018
@@ -0,0 +1,109 @@
+//===--- FileDistance.h - File proximity scoring -----------------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This library measures the distance between file paths.
+// It's used for ranking symbols, e.g. in code completion.
+//  |foo/bar.h -> foo/bar.h| = 0.
+//  |foo/bar.h -> foo/baz.h| < |foo/bar.h -> baz.h|.
+// This is an edit-distance, where edits go up or down the directory tree.
+// It's not symmetrical, the costs of going up and down may not match.
+//
+// Dealing with multiple sources:
+// In practice we care about the distance from a source file, but files near
+// its main-header and #included files are considered "close".
+// So we start with a set of (anchor, cost) pairs, and call the distance to a
+// path the minimum of `cost + |source -> path|`.
+//
+// We allow each source to limit the number of up-traversals paths may start
+// with. Up-traversals may reach things that are not "semantically near".
+//
+// Symbol URI schemes:
+// Symbol locations may be represented by URIs rather than file paths directly.
+// In this case we want to perform distance computations in URI space rather
+// than in file-space, without performing redundant conversions.
+// Therefore we have a lookup structure that accepts URIs, so that intermediate
+// calculations for the same scheme can be reused.
+//
+// Caveats:
+// Assuming up and down traversals each have uniform costs is simplistic.
+// Often there are "semantic roots" whose children are almost unrelated.
+// (e.g. /usr/include/, or / in an umbrella repository). We ignore this.
+//
+//===----------------------------------------------------------------------===//
+
+#include "URI.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/StringSaver.h"
+
+namespace clang {
+namespace clangd {
+
+struct FileDistanceOptions {
+  unsigned UpCost = 2;      // |foo/bar.h -> foo|
+  unsigned DownCost = 1;    // |foo -> foo/bar.h|
+  unsigned IncludeCost = 2; // |foo.cc -> included_header.h|
+};
+
+struct SourceParams {
+  // Base cost for paths starting at this source.
+  unsigned Cost = 0;
+  // Limits the number of upwards traversals allowed from this source.
+  unsigned MaxUpTraversals = std::numeric_limits<unsigned>::max();
+};
+
+// Supports lookups to find the minimum distance to a file from any source.
+// This object should be reused, it memoizes intermediate computations.
+class FileDistance {
+public:
+  static constexpr unsigned kUnreachable = std::numeric_limits<unsigned>::max();
+
+  FileDistance(llvm::StringMap<SourceParams> Sources,
+               const FileDistanceOptions &Opts = {});
+
+  // Computes the minimum distance from any source to the file path.
+  unsigned distance(llvm::StringRef Path);
+
+private:
+  // Costs computed so far. Always contains sources and their ancestors.
+  // We store hash codes only. Collisions are rare and consequences aren't dire.
+  llvm::DenseMap<llvm::hash_code, unsigned> Cache;
+  FileDistanceOptions Opts;
+};
+
+// Supports lookups like FileDistance, but the lookup keys are URIs.
+// We convert each of the sources to the scheme of the URI and do a FileDistance
+// comparison on the bodies.
+class URIDistance {
+public:
+  URIDistance(llvm::StringMap<SourceParams> Sources,
+              const FileDistanceOptions &Opts = {})
+      : Sources(Sources), Opts(Opts) {}
+
+  // Computes the minimum distance from any source to the URI.
+  // Only sources that can be mapped into the URI's scheme are considered.
+  unsigned distance(llvm::StringRef URI);
+
+private:
+  // Returns the FileDistance for a URI scheme, creating it if needed.
+  FileDistance &forScheme(llvm::StringRef Scheme);
+
+  // We cache the results using the original strings so we can skip URI parsing.
+  llvm::DenseMap<llvm::hash_code, unsigned> Cache;
+  llvm::StringMap<SourceParams> Sources;
+  llvm::StringMap<std::unique_ptr<FileDistance>> ByScheme;
+  FileDistanceOptions Opts;
+};
+
+} // namespace clangd
+} // namespace clang

Modified: clang-tools-extra/trunk/clangd/Headers.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/Headers.cpp?rev=336177&r1=336176&r2=336177&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/Headers.cpp (original)
+++ clang-tools-extra/trunk/clangd/Headers.cpp Tue Jul  3 01:09:29 2018
@@ -23,9 +23,8 @@ namespace {
 
 class RecordHeaders : public PPCallbacks {
 public:
-  RecordHeaders(const SourceManager &SM,
-                std::function<void(Inclusion)> Callback)
-      : SM(SM), Callback(std::move(Callback)) {}
+  RecordHeaders(const SourceManager &SM, IncludeStructure *Out)
+      : SM(SM), Out(Out) {}
 
   // Record existing #includes - both written and resolved paths. Only #includes
   // in the main file are collected.
@@ -36,21 +35,28 @@ public:
                           llvm::StringRef /*RelativePath*/,
                           const Module * /*Imported*/,
                           SrcMgr::CharacteristicKind /*FileType*/) override {
-    // Only inclusion directives in the main file make sense. The user cannot
-    // select directives not in the main file.
-    if (HashLoc.isInvalid() || !SM.isInMainFile(HashLoc))
-      return;
-    std::string Written =
-        (IsAngled ? "<" + FileName + ">" : "\"" + FileName + "\"").str();
-    std::string Resolved = (!File || File->tryGetRealPathName().empty())
-                               ? ""
-                               : File->tryGetRealPathName();
-    Callback({halfOpenToRange(SM, FilenameRange), Written, Resolved});
+    if (SM.isInMainFile(HashLoc))
+      Out->MainFileIncludes.push_back({
+          halfOpenToRange(SM, FilenameRange),
+          (IsAngled ? "<" + FileName + ">" : "\"" + FileName + "\"").str(),
+          File ? File->tryGetRealPathName() : "",
+      });
+    if (File) {
+      auto *IncludingFileEntry = SM.getFileEntryForID(SM.getFileID(HashLoc));
+      if (!IncludingFileEntry) {
+        assert(SM.getBufferName(HashLoc).startswith("<") &&
+               "Expected #include location to be a file or <built-in>");
+        // Treat as if included from the main file.
+        IncludingFileEntry = SM.getFileEntryForID(SM.getMainFileID());
+      }
+      Out->recordInclude(IncludingFileEntry->getName(), File->getName(),
+                         File->tryGetRealPathName());
+    }
   }
 
 private:
   const SourceManager &SM;
-  std::function<void(Inclusion)> Callback;
+  IncludeStructure *Out;
 };
 
 } // namespace
@@ -65,9 +71,59 @@ bool HeaderFile::valid() const {
 }
 
 std::unique_ptr<PPCallbacks>
-collectInclusionsInMainFileCallback(const SourceManager &SM,
-                                    std::function<void(Inclusion)> Callback) {
-  return llvm::make_unique<RecordHeaders>(SM, std::move(Callback));
+collectIncludeStructureCallback(const SourceManager &SM,
+                                IncludeStructure *Out) {
+  return llvm::make_unique<RecordHeaders>(SM, Out);
+}
+
+void IncludeStructure::recordInclude(llvm::StringRef IncludingName,
+                                     llvm::StringRef IncludedName,
+                                     llvm::StringRef IncludedRealName) {
+  auto Child = fileIndex(IncludedName);
+  if (!IncludedRealName.empty() && RealPathNames[Child].empty())
+    RealPathNames[Child] = IncludedRealName;
+  auto Parent = fileIndex(IncludingName);
+  IncludeChildren[Parent].push_back(Child);
+}
+
+unsigned IncludeStructure::fileIndex(llvm::StringRef Name) {
+  auto R = NameToIndex.try_emplace(Name, RealPathNames.size());
+  if (R.second)
+    RealPathNames.emplace_back();
+  return R.first->getValue();
+}
+
+llvm::StringMap<unsigned>
+IncludeStructure::includeDepth(llvm::StringRef Root) const {
+  // Include depth 0 is the main file only.
+  llvm::StringMap<unsigned> Result;
+  Result[Root] = 0;
+  std::vector<unsigned> CurrentLevel;
+  llvm::DenseSet<unsigned> Seen;
+  auto It = NameToIndex.find(Root);
+  if (It != NameToIndex.end()) {
+    CurrentLevel.push_back(It->second);
+    Seen.insert(It->second);
+  }
+
+  // Each round of BFS traversal finds the next depth level.
+  std::vector<unsigned> PreviousLevel;
+  for (unsigned Level = 1; !CurrentLevel.empty(); ++Level) {
+    PreviousLevel.clear();
+    PreviousLevel.swap(CurrentLevel);
+    for (const auto &Parent : PreviousLevel) {
+      for (const auto &Child : IncludeChildren.lookup(Parent)) {
+        if (Seen.insert(Child).second) {
+          CurrentLevel.push_back(Child);
+          const auto &Name = RealPathNames[Child];
+          // Can't include files if we don't have their real path.
+          if (!Name.empty())
+            Result[Name] = Level;
+        }
+      }
+    }
+  }
+  return Result;
 }
 
 /// FIXME(ioeric): we might not want to insert an absolute include path if the

Modified: clang-tools-extra/trunk/clangd/Headers.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/Headers.h?rev=336177&r1=336176&r2=336177&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/Headers.h (original)
+++ clang-tools-extra/trunk/clangd/Headers.h Tue Jul  3 01:09:29 2018
@@ -45,10 +45,47 @@ struct Inclusion {
   Path Resolved;       // Resolved path of included file. Empty if not resolved.
 };
 
+// Information captured about the inclusion graph in a translation unit.
+// This includes detailed information about the direct #includes, and summary
+// information about all transitive includes.
+//
+// It should be built incrementally with collectIncludeStructureCallback().
+// When we build the preamble, we capture and store its include structure along
+// with the preamble data. When we use the preamble, we can copy its
+// IncludeStructure and use another collectIncludeStructureCallback() to fill
+// in any non-preamble inclusions.
+class IncludeStructure {
+public:
+  std::vector<Inclusion> MainFileIncludes;
+
+  // Return all transitively reachable files, and their minimum include depth.
+  // All transitive includes (absolute paths), with their minimum include depth.
+  // Root --> 0, #included file --> 1, etc.
+  // Root is clang's name for a file, which may not be absolute.
+  // Usually it should be SM.getFileEntryForID(SM.getMainFileID())->getName().
+  llvm::StringMap<unsigned> includeDepth(llvm::StringRef Root) const;
+
+  // This updates IncludeDepth(), but not MainFileIncludes.
+  void recordInclude(llvm::StringRef IncludingName,
+                     llvm::StringRef IncludedName,
+                     llvm::StringRef IncludedRealName);
+
+private:
+  // Identifying files in a way that persists from preamble build to subsequent
+  // builds is surprisingly hard. FileID is unavailable in InclusionDirective(),
+  // and RealPathName and UniqueID are not preseved in the preamble.
+  // We use the FileEntry::Name, which is stable, interned into a "file index".
+  // The paths we want to expose are the RealPathName, so store those too.
+  std::vector<std::string> RealPathNames; // In file index order.
+  unsigned fileIndex(llvm::StringRef Name);
+  llvm::StringMap<unsigned> NameToIndex; // Values are file indexes.
+  // Maps a file's index to that of the files it includes.
+  llvm::DenseMap<unsigned, SmallVector<unsigned, 8>> IncludeChildren;
+};
+
 /// Returns a PPCallback that visits all inclusions in the main file.
 std::unique_ptr<PPCallbacks>
-collectInclusionsInMainFileCallback(const SourceManager &SM,
-                                    std::function<void(Inclusion)> Callback);
+collectIncludeStructureCallback(const SourceManager &SM, IncludeStructure *Out);
 
 // Calculates insertion edit for including a new header in a file.
 class IncludeInserter {

Modified: clang-tools-extra/trunk/clangd/Quality.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/Quality.cpp?rev=336177&r1=336176&r2=336177&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/Quality.cpp (original)
+++ clang-tools-extra/trunk/clangd/Quality.cpp Tue Jul  3 01:09:29 2018
@@ -7,17 +7,18 @@
 //
 //===---------------------------------------------------------------------===//
 #include "Quality.h"
-#include <cmath>
+#include "FileDistance.h"
 #include "URI.h"
 #include "index/Index.h"
 #include "clang/AST/ASTContext.h"
-#include "clang/Basic/CharInfo.h"
 #include "clang/AST/DeclVisitor.h"
+#include "clang/Basic/CharInfo.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Sema/CodeCompleteConsumer.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cmath>
 
 namespace clang {
 namespace clangd {
@@ -187,60 +188,6 @@ raw_ostream &operator<<(raw_ostream &OS,
   return OS;
 }
 
-/// Calculates a proximity score from \p From and \p To, which are URI strings
-/// that have the same scheme. This does not parse URI. A URI (sans "<scheme>:")
-/// is split into chunks by '/' and each chunk is considered a file/directory.
-/// For example, "uri:///a/b/c" will be treated as /a/b/c
-static float uriProximity(StringRef From, StringRef To) {
-  auto SchemeSplitFrom = From.split(':');
-  auto SchemeSplitTo = To.split(':');
-  assert((SchemeSplitFrom.first == SchemeSplitTo.first) &&
-         "URIs must have the same scheme in order to compute proximity.");
-  auto Split = [](StringRef URIWithoutScheme) {
-    SmallVector<StringRef, 8> Split;
-    URIWithoutScheme.split(Split, '/', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
-    return Split;
-  };
-  SmallVector<StringRef, 8> Fs = Split(SchemeSplitFrom.second);
-  SmallVector<StringRef, 8> Ts = Split(SchemeSplitTo.second);
-  auto F = Fs.begin(), T = Ts.begin(), FE = Fs.end(), TE = Ts.end();
-  for (; F != FE && T != TE && *F == *T; ++F, ++T) {
-  }
-  // We penalize for traversing up and down from \p From to \p To but penalize
-  // less for traversing down because subprojects are more closely related than
-  // superprojects.
-  int UpDist = FE - F;
-  int DownDist = TE - T;
-  return std::pow(0.7, UpDist + DownDist/2);
-}
-
-FileProximityMatcher::FileProximityMatcher(ArrayRef<StringRef> ProximityPaths)
-    : ProximityPaths(ProximityPaths.begin(), ProximityPaths.end()) {}
-
-float FileProximityMatcher::uriProximity(StringRef SymbolURI) const {
-  float Score = 0;
-  if (!ProximityPaths.empty() && !SymbolURI.empty()) {
-    for (const auto &Path : ProximityPaths)
-      // Only calculate proximity score for two URIs with the same scheme so
-      // that the computation can be purely text-based and thus avoid expensive
-      // URI encoding/decoding.
-      if (auto U = URI::create(Path, SymbolURI.split(':').first)) {
-        Score = std::max(Score, clangd::uriProximity(U->toString(), SymbolURI));
-      } else {
-        llvm::consumeError(U.takeError());
-      }
-  }
-  return Score;
-}
-
-llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
-                              const FileProximityMatcher &M) {
-  OS << formatv("File proximity matcher: ");
-  OS << formatv("ProximityPaths[{0}]", llvm::join(M.ProximityPaths.begin(),
-                                                  M.ProximityPaths.end(), ","));
-  return OS;
-}
-
 static SymbolRelevanceSignals::AccessibleScope
 ComputeScope(const NamedDecl *D) {
   // Injected "Foo" within the class "Foo" has file scope, not class scope.
@@ -288,6 +235,15 @@ void SymbolRelevanceSignals::merge(const
     Scope = std::min(Scope, ComputeScope(SemaCCResult.Declaration));
 }
 
+static std::pair<float, unsigned> proximityScore(llvm::StringRef SymbolURI,
+                                                 URIDistance *D) {
+  if (!D || SymbolURI.empty())
+    return {0.f, 0u};
+  unsigned Distance = D->distance(SymbolURI);
+  // Assume approximately default options are used for sensible scoring.
+  return {std::exp(Distance * -0.4f / FileDistanceOptions().UpCost), Distance};
+}
+
 float SymbolRelevanceSignals::evaluate() const {
   float Score = 1;
 
@@ -296,11 +252,10 @@ float SymbolRelevanceSignals::evaluate()
 
   Score *= NameMatch;
 
-  float IndexProximityScore =
-      FileProximityMatch ? FileProximityMatch->uriProximity(SymbolURI) : 0;
   // Proximity scores are [0,1] and we translate them into a multiplier in the
-  // range from 1 to 2.
-  Score *= 1 + std::max(IndexProximityScore, SemaProximityScore);
+  // range from 1 to 3.
+  Score *= 1 + 2 * std::max(proximityScore(SymbolURI, FileProximityMatch).first,
+                            SemaProximityScore);
 
   // Symbols like local variables may only be referenced within their scope.
   // Conversely if we're in that scope, it's likely we'll reference them.
@@ -331,9 +286,9 @@ raw_ostream &operator<<(raw_ostream &OS,
   OS << formatv("\tForbidden: {0}\n", S.Forbidden);
   OS << formatv("\tSymbol URI: {0}\n", S.SymbolURI);
   if (S.FileProximityMatch) {
-    OS << "\tIndex proximity: "
-       << S.FileProximityMatch->uriProximity(S.SymbolURI) << " ("
-       << *S.FileProximityMatch << ")\n";
+    auto Score = proximityScore(S.SymbolURI, S.FileProximityMatch);
+    OS << formatv("\tIndex proximity: {0} (distance={1})\n", Score.first,
+                  Score.second);
   }
   OS << formatv("\tSema proximity: {0}\n", S.SemaProximityScore);
   OS << formatv("\tQuery type: {0}\n", static_cast<int>(S.Query));

Modified: clang-tools-extra/trunk/clangd/Quality.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/Quality.h?rev=336177&r1=336176&r2=336177&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/Quality.h (original)
+++ clang-tools-extra/trunk/clangd/Quality.h Tue Jul  3 01:09:29 2018
@@ -38,6 +38,7 @@ namespace clang {
 class CodeCompletionResult;
 namespace clangd {
 struct Symbol;
+class URIDistance;
 
 // Signals structs are designed to be aggregated from 0 or more sources.
 // A default instance has neutral signals, and sources are merged into it.
@@ -69,15 +70,13 @@ struct SymbolQualitySignals {
 llvm::raw_ostream &operator<<(llvm::raw_ostream &,
                               const SymbolQualitySignals &);
 
-class FileProximityMatcher;
-
 /// Attributes of a symbol-query pair that affect how much we like it.
 struct SymbolRelevanceSignals {
   /// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned.
   float NameMatch = 1;
   bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private).
 
-  const FileProximityMatcher *FileProximityMatch = nullptr;
+  URIDistance *FileProximityMatch = nullptr;
   /// This is used to calculate proximity between the index symbol and the
   /// query.
   llvm::StringRef SymbolURI;
@@ -111,25 +110,6 @@ llvm::raw_ostream &operator<<(llvm::raw_
 /// Combine symbol quality and relevance into a single score.
 float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance);
 
-class FileProximityMatcher {
-  public:
-    /// \p ProximityPaths are used to compute proximity scores from symbol's
-    /// declaring file. The best score will be used.
-    explicit FileProximityMatcher(
-        llvm::ArrayRef<llvm::StringRef> ProximityPaths);
-
-    /// Calculates the best proximity score from proximity paths to the symbol's
-    /// URI. Score is [0-1], 1 means \p SymbolURI exactly matches a proximity
-    /// path. When a path cannot be encoded into the same scheme as \p
-    /// SymbolURI, the proximity will be 0.
-    float uriProximity(llvm::StringRef SymbolURI) const;
-
-  private:
-    llvm::SmallVector<std::string, 2> ProximityPaths;
-    friend llvm::raw_ostream &operator<<(llvm::raw_ostream &,
-                                         const FileProximityMatcher &);
-};
-
 /// TopN<T> is a lossy container that preserves only the "best" N elements.
 template <typename T, typename Compare = std::greater<T>> class TopN {
 public:

Modified: clang-tools-extra/trunk/clangd/XRefs.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/clangd/XRefs.cpp?rev=336177&r1=336176&r2=336177&view=diff
==============================================================================
--- clang-tools-extra/trunk/clangd/XRefs.cpp (original)
+++ clang-tools-extra/trunk/clangd/XRefs.cpp Tue Jul  3 01:09:29 2018
@@ -230,13 +230,10 @@ llvm::Optional<SymbolID> getSymbolID(con
 std::vector<Location> findDefinitions(ParsedAST &AST, Position Pos,
                                       const SymbolIndex *Index) {
   const SourceManager &SourceMgr = AST.getASTContext().getSourceManager();
-  SourceLocation SourceLocationBeg =
-      getBeginningOfIdentifier(AST, Pos, SourceMgr.getMainFileID());
 
   std::vector<Location> Result;
   // Handle goto definition for #include.
-  for (auto &Inc : AST.getInclusions()) {
-    Position Pos = sourceLocToPosition(SourceMgr, SourceLocationBeg);
+  for (auto &Inc : AST.getIncludeStructure().MainFileIncludes) {
     if (!Inc.Resolved.empty() && Inc.R.contains(Pos))
       Result.push_back(Location{URIForFile{Inc.Resolved}, {}});
   }
@@ -244,6 +241,8 @@ std::vector<Location> findDefinitions(Pa
     return Result;
 
   // Identified symbols at a specific position.
+  SourceLocation SourceLocationBeg =
+      getBeginningOfIdentifier(AST, Pos, SourceMgr.getMainFileID());
   auto Symbols = getSymbolAtPosition(AST, SourceLocationBeg);
 
   for (auto Item : Symbols.Macros) {

Modified: clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt?rev=336177&r1=336176&r2=336177&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt (original)
+++ clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt Tue Jul  3 01:09:29 2018
@@ -17,6 +17,7 @@ add_extra_unittest(ClangdTests
   ContextTests.cpp
   DraftStoreTests.cpp
   FileIndexTests.cpp
+  FileDistanceTests.cpp
   FindSymbolsTests.cpp
   FuzzyMatchTests.cpp
   GlobalCompilationDatabaseTests.cpp

Added: clang-tools-extra/trunk/unittests/clangd/FileDistanceTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/FileDistanceTests.cpp?rev=336177&view=auto
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/FileDistanceTests.cpp (added)
+++ clang-tools-extra/trunk/unittests/clangd/FileDistanceTests.cpp Tue Jul  3 01:09:29 2018
@@ -0,0 +1,93 @@
+//===-- FileDistanceTests.cpp  ------------------------*- C++ -*-----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "FileDistance.h"
+#include "TestFS.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace clangd {
+namespace {
+
+TEST(FileDistanceTests, Distance) {
+  FileDistanceOptions Opts;
+  Opts.UpCost = 5;
+  Opts.DownCost = 3;
+  SourceParams CostTwo;
+  CostTwo.Cost = 2;
+  FileDistance D(
+      {{"tools/clang/lib/Format/FormatToken.cpp", SourceParams()},
+       {"tools/clang/include/clang/Format/FormatToken.h", SourceParams()},
+       {"include/llvm/ADT/StringRef.h", CostTwo}},
+      Opts);
+
+  // Source
+  EXPECT_EQ(D.distance("tools/clang/lib/Format/FormatToken.cpp"), 0u);
+  EXPECT_EQ(D.distance("include/llvm/ADT/StringRef.h"), 2u);
+  // Parent
+  EXPECT_EQ(D.distance("tools/clang/lib/Format/"), 5u);
+  // Child
+  EXPECT_EQ(D.distance("tools/clang/lib/Format/FormatToken.cpp/Oops"), 3u);
+  // Ancestor (up+up+up+up)
+  EXPECT_EQ(D.distance("/"), 22u);
+  // Sibling (up+down)
+  EXPECT_EQ(D.distance("tools/clang/lib/Format/AnotherFile.cpp"), 8u);
+  // Cousin (up+up+down+down)
+  EXPECT_EQ(D.distance("include/llvm/Support/Allocator.h"), 18u);
+  // First cousin, once removed (up+up+up+down+down)
+  EXPECT_EQ(D.distance("include/llvm-c/Core.h"), 23u);
+}
+
+TEST(FileDistanceTests, BadSource) {
+  // We mustn't assume that paths above sources are best reached via them.
+  FileDistanceOptions Opts;
+  Opts.UpCost = 5;
+  Opts.DownCost = 3;
+  SourceParams CostLots;
+  CostLots.Cost = 100;
+  FileDistance D({{"a", SourceParams()}, {"b/b/b", CostLots}}, Opts);
+  EXPECT_EQ(D.distance("b"), 8u);        // a+up+down, not b+up+up
+  EXPECT_EQ(D.distance("b/b/b"), 14u);   // a+up+down+down+down, not b
+  EXPECT_EQ(D.distance("b/b/b/c"), 17u); // a+up+down+down+down+down, not b+down
+}
+
+auto UseUnittestScheme = UnittestSchemeAnchorSource;
+
+TEST(FileDistanceTests, URI) {
+  FileDistanceOptions Opts;
+  Opts.UpCost = 5;
+  Opts.DownCost = 3;
+  SourceParams CostLots;
+  CostLots.Cost = 1000;
+
+  URIDistance D(
+      {{testPath("foo"), CostLots}, {"/not/a/testpath", SourceParams()}}, Opts);
+  EXPECT_EQ(D.distance("file:///not/a/testpath/either"), 3u);
+  EXPECT_EQ(D.distance("unittest:foo"), 1000u);
+  EXPECT_EQ(D.distance("unittest:bar"), 1008u);
+}
+
+TEST(FileDistance, LimitUpTraversals) {
+  FileDistanceOptions Opts;
+  Opts.UpCost = Opts.DownCost = 1;
+  SourceParams CheapButLimited, CostLots;
+  CheapButLimited.MaxUpTraversals = 1;
+  CostLots.Cost = 100;
+
+  FileDistance D({{"/", CostLots}, {"/a/b/c", CheapButLimited}}, Opts);
+  EXPECT_EQ(D.distance("/a"), 101u);
+  EXPECT_EQ(D.distance("/a/z"), 102u);
+  EXPECT_EQ(D.distance("/a/b"), 1u);
+  EXPECT_EQ(D.distance("/a/b/z"), 2u);
+}
+
+} // namespace
+} // namespace clangd
+} // namespace clang

Modified: clang-tools-extra/trunk/unittests/clangd/HeadersTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/HeadersTests.cpp?rev=336177&r1=336176&r2=336177&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/HeadersTests.cpp (original)
+++ clang-tools-extra/trunk/unittests/clangd/HeadersTests.cpp Tue Jul  3 01:09:29 2018
@@ -64,18 +64,17 @@ private:
   }
 
 protected:
-  std::vector<Inclusion> collectIncludes() {
+  IncludeStructure collectIncludes() {
     auto Clang = setupClang();
     PreprocessOnlyAction Action;
     EXPECT_TRUE(
         Action.BeginSourceFile(*Clang, Clang->getFrontendOpts().Inputs[0]));
-    std::vector<Inclusion> Inclusions;
-    Clang->getPreprocessor().addPPCallbacks(collectInclusionsInMainFileCallback(
-        Clang->getSourceManager(),
-        [&](Inclusion Inc) { Inclusions.push_back(std::move(Inc)); }));
+    IncludeStructure Includes;
+    Clang->getPreprocessor().addPPCallbacks(
+        collectIncludeStructureCallback(Clang->getSourceManager(), &Includes));
     EXPECT_TRUE(Action.Execute());
     Action.EndSourceFile();
-    return Inclusions;
+    return Includes;
   }
 
   // Calculates the include path, or returns "" on error or header should not be
@@ -133,6 +132,14 @@ protected:
 MATCHER_P(Written, Name, "") { return arg.Written == Name; }
 MATCHER_P(Resolved, Name, "") { return arg.Resolved == Name; }
 
+MATCHER_P2(Distance, File, D, "") {
+  if (arg.getKey() != File)
+    *result_listener << "file =" << arg.getKey().str();
+  if (arg.getValue() != D)
+    *result_listener << "distance =" << arg.getValue();
+  return arg.getKey() == File && arg.getValue() == D;
+}
+
 TEST_F(HeadersTest, CollectRewrittenAndResolved) {
   FS.Files[MainFile] = R"cpp(
 #include "sub/bar.h" // not shortest
@@ -140,9 +147,12 @@ TEST_F(HeadersTest, CollectRewrittenAndR
   std::string BarHeader = testPath("sub/bar.h");
   FS.Files[BarHeader] = "";
 
-  EXPECT_THAT(collectIncludes(),
+  EXPECT_THAT(collectIncludes().MainFileIncludes,
               UnorderedElementsAre(
                   AllOf(Written("\"sub/bar.h\""), Resolved(BarHeader))));
+  EXPECT_THAT(collectIncludes().includeDepth(MainFile),
+              UnorderedElementsAre(Distance(MainFile, 0u),
+                                   Distance(testPath("sub/bar.h"), 1u)));
 }
 
 TEST_F(HeadersTest, OnlyCollectInclusionsInMain) {
@@ -156,8 +166,16 @@ TEST_F(HeadersTest, OnlyCollectInclusion
 #include "bar.h"
 )cpp";
   EXPECT_THAT(
-      collectIncludes(),
+      collectIncludes().MainFileIncludes,
       UnorderedElementsAre(AllOf(Written("\"bar.h\""), Resolved(BarHeader))));
+  EXPECT_THAT(collectIncludes().includeDepth(MainFile),
+              UnorderedElementsAre(Distance(MainFile, 0u),
+                                   Distance(testPath("sub/bar.h"), 1u),
+                                   Distance(testPath("sub/baz.h"), 2u)));
+  // includeDepth() also works for non-main files.
+  EXPECT_THAT(collectIncludes().includeDepth(testPath("sub/bar.h")),
+              UnorderedElementsAre(Distance(testPath("sub/bar.h"), 0u),
+                                   Distance(testPath("sub/baz.h"), 1u)));
 }
 
 TEST_F(HeadersTest, UnResolvedInclusion) {
@@ -165,8 +183,10 @@ TEST_F(HeadersTest, UnResolvedInclusion)
 #include "foo.h"
 )cpp";
 
-  EXPECT_THAT(collectIncludes(),
+  EXPECT_THAT(collectIncludes().MainFileIncludes,
               UnorderedElementsAre(AllOf(Written("\"foo.h\""), Resolved(""))));
+  EXPECT_THAT(collectIncludes().includeDepth(MainFile),
+              UnorderedElementsAre(Distance(MainFile, 0u)));
 }
 
 TEST_F(HeadersTest, InsertInclude) {

Modified: clang-tools-extra/trunk/unittests/clangd/QualityTests.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/unittests/clangd/QualityTests.cpp?rev=336177&r1=336176&r2=336177&view=diff
==============================================================================
--- clang-tools-extra/trunk/unittests/clangd/QualityTests.cpp (original)
+++ clang-tools-extra/trunk/unittests/clangd/QualityTests.cpp Tue Jul  3 01:09:29 2018
@@ -17,6 +17,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "FileDistance.h"
 #include "Quality.h"
 #include "TestFS.h"
 #include "TestTU.h"
@@ -162,9 +163,22 @@ TEST(QualityTests, SymbolRelevanceSignal
   PoorNameMatch.NameMatch = 0.2f;
   EXPECT_LT(PoorNameMatch.evaluate(), Default.evaluate());
 
-  SymbolRelevanceSignals WithProximity;
-  WithProximity.SemaProximityScore = 0.2f;
-  EXPECT_GT(WithProximity.evaluate(), Default.evaluate());
+  SymbolRelevanceSignals WithSemaProximity;
+  WithSemaProximity.SemaProximityScore = 0.2f;
+  EXPECT_GT(WithSemaProximity.evaluate(), Default.evaluate());
+
+  SymbolRelevanceSignals IndexProximate;
+  IndexProximate.SymbolURI = "unittest:/foo/bar.h";
+  llvm::StringMap<SourceParams> ProxSources;
+  ProxSources.try_emplace(testPath("foo/baz.h"));
+  URIDistance Distance(ProxSources);
+  IndexProximate.FileProximityMatch = &Distance;
+  EXPECT_GT(IndexProximate.evaluate(), Default.evaluate());
+  SymbolRelevanceSignals IndexDistant = IndexProximate;
+  IndexDistant.SymbolURI = "unittest:/elsewhere/path.h";
+  EXPECT_GT(IndexProximate.evaluate(), IndexDistant.evaluate())
+      << IndexProximate << IndexDistant;
+  EXPECT_GT(IndexDistant.evaluate(), Default.evaluate());
 
   SymbolRelevanceSignals Scoped;
   Scoped.Scope = SymbolRelevanceSignals::FileScope;
@@ -185,59 +199,6 @@ TEST(QualityTests, SortText) {
   EXPECT_LT(sortText(0, "a"), sortText(0, "z"));
 }
 
-// {a,b,c} becomes /clangd-test/a/b/c
-std::string joinPaths(llvm::ArrayRef<StringRef> Parts) {
-  return testPath(
-      llvm::join(Parts.begin(), Parts.end(), llvm::sys::path::get_separator()));
-}
-
-static constexpr float ProximityBase = 0.7f;
-
-// Calculates a proximity score for an index symbol with declaration file
-// SymPath with the given URI scheme.
-float URIProximity(const FileProximityMatcher &Matcher, StringRef SymPath,
-                     StringRef Scheme = "file") {
-  auto U = URI::create(SymPath, Scheme);
-  EXPECT_TRUE(static_cast<bool>(U)) << llvm::toString(U.takeError());
-  return Matcher.uriProximity(U->toString());
-}
-
-TEST(QualityTests, URIProximityScores) {
-  FileProximityMatcher Matcher(
-      /*ProximityPaths=*/{joinPaths({"a", "b", "c", "d", "x"})});
-
-  EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "x"})),
-                  1);
-  EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "y"})),
-                  ProximityBase);
-  EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"a", "y", "z"})),
-                  std::pow(ProximityBase, 5));
-  EXPECT_FLOAT_EQ(
-      URIProximity(Matcher, joinPaths({"a", "b", "c", "d", "e", "y"})),
-      std::pow(ProximityBase, 2));
-  EXPECT_FLOAT_EQ(
-      URIProximity(Matcher, joinPaths({"a", "b", "m", "n", "o", "y"})),
-      std::pow(ProximityBase, 5));
-  EXPECT_FLOAT_EQ(
-      URIProximity(Matcher, joinPaths({"a", "t", "m", "n", "o", "y"})),
-      std::pow(ProximityBase, 6));
-  // Note the common directory is /clang-test/
-  EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"m", "n", "o", "p", "y"})),
-                  std::pow(ProximityBase, 7));
-}
-
-TEST(QualityTests, URIProximityScoresWithTestURI) {
-  FileProximityMatcher Matcher(
-      /*ProximityPaths=*/{joinPaths({"b", "c", "x"})});
-  EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"b", "c", "x"}), "unittest"),
-                  1);
-  EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"b", "y"}), "unittest"),
-                  std::pow(ProximityBase, 2));
-  // unittest:///b/c/x vs unittest:///m/n/y. No common directory.
-  EXPECT_FLOAT_EQ(URIProximity(Matcher, joinPaths({"m", "n", "y"}), "unittest"),
-                  std::pow(ProximityBase, 4));
-}
-
 } // namespace
 } // namespace clangd
 } // namespace clang


_______________________________________________
cfe-commits mailing list
cfe-commits at lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


More information about the cfe-commits mailing list