[clang-tools-extra] r291664 - Improve include fixer's ranking by taking the paths into account.

Manuel Klimek via cfe-commits cfe-commits at lists.llvm.org
Wed Jan 11 02:32:47 PST 2017


Author: klimek
Date: Wed Jan 11 04:32:47 2017
New Revision: 291664

URL: http://llvm.org/viewvc/llvm-project?rev=291664&view=rev
Log:
Improve include fixer's ranking by taking the paths into account.

Instead of just using popularity, we also take into account how similar the
path of the current file is to the path of the header.
Our first approach is to get popularity into a reasonably small scale by taking
log2 (which is roughly intuitive to how humans would bucket popularity), and
multiply that with the number of matching prefix path fragments of the included
header with the current file.
Note that currently we do not take special care for unclean paths containing
"../" or "./".

Differential Revision: https://reviews.llvm.org/D28548

Modified:
    clang-tools-extra/trunk/include-fixer/IncludeFixer.cpp
    clang-tools-extra/trunk/include-fixer/SymbolIndexManager.cpp
    clang-tools-extra/trunk/include-fixer/SymbolIndexManager.h
    clang-tools-extra/trunk/include-fixer/tool/ClangIncludeFixer.cpp
    clang-tools-extra/trunk/test/include-fixer/Inputs/fake_yaml_db.yaml
    clang-tools-extra/trunk/test/include-fixer/ranking.cpp

Modified: clang-tools-extra/trunk/include-fixer/IncludeFixer.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/include-fixer/IncludeFixer.cpp?rev=291664&r1=291663&r2=291664&view=diff
==============================================================================
--- clang-tools-extra/trunk/include-fixer/IncludeFixer.cpp (original)
+++ clang-tools-extra/trunk/include-fixer/IncludeFixer.cpp Wed Jan 11 04:32:47 2017
@@ -365,6 +365,9 @@ IncludeFixerSemaSource::query(StringRef
             .getLocWithOffset(Range.getOffset())
             .print(llvm::dbgs(), CI->getSourceManager()));
   DEBUG(llvm::dbgs() << " ...");
+  llvm::StringRef FileName = CI->getSourceManager().getFilename(
+      CI->getSourceManager().getLocForStartOfFile(
+          CI->getSourceManager().getMainFileID()));
 
   QuerySymbolInfos.push_back({Query.str(), ScopedQualifiers, Range});
 
@@ -385,9 +388,10 @@ IncludeFixerSemaSource::query(StringRef
   // context, it might treat the identifier as a nested class of the scoped
   // namespace.
   std::vector<find_all_symbols::SymbolInfo> MatchedSymbols =
-      SymbolIndexMgr.search(QueryString, /*IsNestedSearch=*/false);
+      SymbolIndexMgr.search(QueryString, /*IsNestedSearch=*/false, FileName);
   if (MatchedSymbols.empty())
-    MatchedSymbols = SymbolIndexMgr.search(Query);
+    MatchedSymbols =
+        SymbolIndexMgr.search(Query, /*IsNestedSearch=*/true, FileName);
   DEBUG(llvm::dbgs() << "Having found " << MatchedSymbols.size()
                      << " symbols\n");
   // We store a copy of MatchedSymbols in a place where it's globally reachable.

Modified: clang-tools-extra/trunk/include-fixer/SymbolIndexManager.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/include-fixer/SymbolIndexManager.cpp?rev=291664&r1=291663&r2=291664&view=diff
==============================================================================
--- clang-tools-extra/trunk/include-fixer/SymbolIndexManager.cpp (original)
+++ clang-tools-extra/trunk/include-fixer/SymbolIndexManager.cpp Wed Jan 11 04:32:47 2017
@@ -12,6 +12,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Path.h"
 
 #define DEBUG_TYPE "include-fixer"
 
@@ -20,30 +21,57 @@ namespace include_fixer {
 
 using clang::find_all_symbols::SymbolInfo;
 
-/// Sorts SymbolInfos based on the popularity info in SymbolInfo.
-static void rankByPopularity(std::vector<SymbolInfo> &Symbols) {
-  // First collect occurrences per header file.
-  llvm::DenseMap<llvm::StringRef, unsigned> HeaderPopularity;
-  for (const SymbolInfo &Symbol : Symbols) {
-    unsigned &Popularity = HeaderPopularity[Symbol.getFilePath()];
-    Popularity = std::max(Popularity, Symbol.getNumOccurrences());
+// Calculate a score based on whether we think the given header is closely
+// related to the given source file.
+static double similarityScore(llvm::StringRef FileName,
+                              llvm::StringRef Header) {
+  // Compute the maximum number of common path segements between Header and
+  // a suffix of FileName.
+  // We do not do a full longest common substring computation, as Header
+  // specifies the path we would directly #include, so we assume it is rooted
+  // relatively to a subproject of the repository.
+  int MaxSegments = 1;
+  for (auto FileI = llvm::sys::path::begin(FileName),
+            FileE = llvm::sys::path::end(FileName);
+       FileI != FileE; ++FileI) {
+    int Segments = 0;
+    for (auto HeaderI = llvm::sys::path::begin(Header),
+              HeaderE = llvm::sys::path::end(Header), I = FileI;
+         HeaderI != HeaderE && *I == *HeaderI && I != FileE; ++I, ++HeaderI) {
+      ++Segments;
+    }
+    MaxSegments = std::max(Segments, MaxSegments);
   }
+  return MaxSegments;
+}
 
-  // Sort by the gathered popularities. Use file name as a tie breaker so we can
+static void rank(std::vector<SymbolInfo> &Symbols,
+                 llvm::StringRef FileName) {
+  llvm::DenseMap<llvm::StringRef, double> Score;
+  for (const SymbolInfo &Symbol : Symbols) {
+    // Calculate a score from the similarity of the header the symbol is in
+    // with the current file and the popularity of the symbol.
+    double NewScore = similarityScore(FileName, Symbol.getFilePath()) *
+                      (1.0 + std::log2(1 + Symbol.getNumOccurrences()));
+    double &S = Score[Symbol.getFilePath()];
+    S = std::max(S, NewScore);
+  }
+  // Sort by the gathered scores. Use file name as a tie breaker so we can
   // deduplicate.
   std::sort(Symbols.begin(), Symbols.end(),
             [&](const SymbolInfo &A, const SymbolInfo &B) {
-              auto APop = HeaderPopularity[A.getFilePath()];
-              auto BPop = HeaderPopularity[B.getFilePath()];
-              if (APop != BPop)
-                return APop > BPop;
+              auto AS = Score[A.getFilePath()];
+              auto BS = Score[B.getFilePath()];
+              if (AS != BS)
+                return AS > BS;
               return A.getFilePath() < B.getFilePath();
             });
 }
 
 std::vector<find_all_symbols::SymbolInfo>
 SymbolIndexManager::search(llvm::StringRef Identifier,
-                           bool IsNestedSearch) const {
+                           bool IsNestedSearch,
+                           llvm::StringRef FileName) const {
   // The identifier may be fully qualified, so split it and get all the context
   // names.
   llvm::SmallVector<llvm::StringRef, 8> Names;
@@ -119,7 +147,7 @@ SymbolIndexManager::search(llvm::StringR
     TookPrefix = true;
   } while (MatchedSymbols.empty() && !Names.empty() && IsNestedSearch);
 
-  rankByPopularity(MatchedSymbols);
+  rank(MatchedSymbols, FileName);
   return MatchedSymbols;
 }
 

Modified: clang-tools-extra/trunk/include-fixer/SymbolIndexManager.h
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/include-fixer/SymbolIndexManager.h?rev=291664&r1=291663&r2=291664&view=diff
==============================================================================
--- clang-tools-extra/trunk/include-fixer/SymbolIndexManager.h (original)
+++ clang-tools-extra/trunk/include-fixer/SymbolIndexManager.h Wed Jan 11 04:32:47 2017
@@ -42,7 +42,8 @@ public:
   ///
   /// \returns A list of symbol candidates.
   std::vector<find_all_symbols::SymbolInfo>
-  search(llvm::StringRef Identifier, bool IsNestedSearch = true) const;
+  search(llvm::StringRef Identifier, bool IsNestedSearch = true,
+         llvm::StringRef FileName = "") const;
 
 private:
   std::vector<std::shared_future<std::unique_ptr<SymbolIndex>>> SymbolIndices;

Modified: clang-tools-extra/trunk/include-fixer/tool/ClangIncludeFixer.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/include-fixer/tool/ClangIncludeFixer.cpp?rev=291664&r1=291663&r2=291664&view=diff
==============================================================================
--- clang-tools-extra/trunk/include-fixer/tool/ClangIncludeFixer.cpp (original)
+++ clang-tools-extra/trunk/include-fixer/tool/ClangIncludeFixer.cpp Wed Jan 11 04:32:47 2017
@@ -332,7 +332,8 @@ int includeFixerMain(int argc, const cha
 
   // Query symbol mode.
   if (!QuerySymbol.empty()) {
-    auto MatchedSymbols = SymbolIndexMgr->search(QuerySymbol);
+    auto MatchedSymbols = SymbolIndexMgr->search(
+        QuerySymbol, /*IsNestedSearch=*/true, SourceFilePath);
     for (auto &Symbol : MatchedSymbols) {
       std::string HeaderPath = Symbol.getFilePath().str();
       Symbol.SetFilePath(((HeaderPath[0] == '"' || HeaderPath[0] == '<')

Modified: clang-tools-extra/trunk/test/include-fixer/Inputs/fake_yaml_db.yaml
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/test/include-fixer/Inputs/fake_yaml_db.yaml?rev=291664&r1=291663&r2=291664&view=diff
==============================================================================
--- clang-tools-extra/trunk/test/include-fixer/Inputs/fake_yaml_db.yaml (original)
+++ clang-tools-extra/trunk/test/include-fixer/Inputs/fake_yaml_db.yaml Wed Jan 11 04:32:47 2017
@@ -9,7 +9,6 @@ FilePath:        foo.h
 LineNumber:      1
 Type:            Class
 NumOccurrences:  1
-...
 ---
 Name:           bar
 Contexts:
@@ -21,7 +20,7 @@ FilePath:        ../include/bar.h
 LineNumber:      1
 Type:            Class
 NumOccurrences:  1
-...
+---
 Name:           bar
 Contexts:
   - ContextType:     Namespace
@@ -32,7 +31,7 @@ FilePath:        ../include/bar.h
 LineNumber:      2
 Type:            Class
 NumOccurrences:  3
-...
+---
 Name:           bar
 Contexts:
   - ContextType:     Namespace
@@ -50,4 +49,12 @@ FilePath:        var.h
 LineNumber:      1
 Type:            Variable
 NumOccurrences:  1
-...
+---
+Name:            bar
+Contexts:
+  - ContextType:    Namespace
+    ContextName:    c
+FilePath:        test/include-fixer/baz.h
+LineNumber:      1
+Type:            Class
+NumOccurrences:  1

Modified: clang-tools-extra/trunk/test/include-fixer/ranking.cpp
URL: http://llvm.org/viewvc/llvm-project/clang-tools-extra/trunk/test/include-fixer/ranking.cpp?rev=291664&r1=291663&r2=291664&view=diff
==============================================================================
--- clang-tools-extra/trunk/test/include-fixer/ranking.cpp (original)
+++ clang-tools-extra/trunk/test/include-fixer/ranking.cpp Wed Jan 11 04:32:47 2017
@@ -1,6 +1,9 @@
 // RUN: clang-include-fixer -db=yaml -input=%S/Inputs/fake_yaml_db.yaml -output-headers %s -- | FileCheck %s
+// RUN: clang-include-fixer -query-symbol bar -db=yaml -input=%S/Inputs/fake_yaml_db.yaml -output-headers %s -- | FileCheck %s
 
 // CHECK:     "HeaderInfos": [
+// CHECK-NEXT:  {"Header": "\"test/include-fixer/baz.h\"",
+// CHECK-NEXT:   "QualifiedName": "c::bar"},
 // CHECK-NEXT:  {"Header": "\"../include/bar.h\"",
 // CHECK-NEXT:   "QualifiedName": "b::a::bar"},
 // CHECK-NEXT:  {"Header": "\"../include/zbar.h\"",




More information about the cfe-commits mailing list