[clang-tools-extra] 891f822 - [clangd] Implement range patching heuristics for cross-file rename.

Mon Dec 9 08:02:52 PST 2019

Author: Haojian Wu
Date: 2019-12-09T17:01:05+01:00
New Revision: 891f82222bb8436bfd8db0acfbd5f3621fa53425

URL: https://github.com/llvm/llvm-project/commit/891f82222bb8436bfd8db0acfbd5f3621fa53425
DIFF: https://github.com/llvm/llvm-project/commit/891f82222bb8436bfd8db0acfbd5f3621fa53425.diff

LOG: [clangd] Implement range patching heuristics for cross-file rename.

Reviewers: sammccall, ilya-biryukov

Reviewed By: sammccall

Subscribers: merge_guards_bot, MaskRay, jkorous, mgrang, arphaman, kadircet, usaxena95, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D70594

Added: 
    

Modified: 
    clang-tools-extra/clangd/refactor/Rename.cpp
    clang-tools-extra/clangd/refactor/Rename.h
    clang-tools-extra/clangd/unittests/RenameTests.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clangd/refactor/Rename.cpp b/clang-tools-extra/clangd/refactor/Rename.cpp
index 3f3c216c5909..103f59bc307c 100644

--- a/clang-tools-extra/clangd/refactor/Rename.cpp
+++ b/clang-tools-extra/clangd/refactor/Rename.cpp
@@ -18,9 +18,11 @@
 #include "clang/AST/DeclTemplate.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Tooling/Refactoring/Rename/USRFindingAction.h"
+#include "llvm/ADT/None.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FormatVariadic.h"
+#include <algorithm>
 
 namespace clang {
 namespace clangd {
@@ -355,9 +357,22 @@ llvm::Expected<FileEdits> renameOutsideFile(
       elog("Fail to read file content: {0}", AffectedFileCode.takeError());
       continue;
     }
+    auto RenameRanges =
+        adjustRenameRanges(*AffectedFileCode, RenameDecl.getNameAsString(),
+                           std::move(FileAndOccurrences.second),
+                           RenameDecl.getASTContext().getLangOpts());
+    if (!RenameRanges) {
+      // Our heuristice fails to adjust rename ranges to the current state of
+      // the file, it is most likely the index is stale, so we give up the
+      // entire rename.
+      return llvm::make_error<llvm::StringError>(
+          llvm::formatv("Index results don't match the content of file {0} "
+                        "(the index may be stale)",
+                        FilePath),
+          llvm::inconvertibleErrorCode());
+    }
     auto RenameEdit =
-        buildRenameEdit(FilePath, *AffectedFileCode,
-                        std::move(FileAndOccurrences.second), NewName);
+        buildRenameEdit(FilePath, *AffectedFileCode, *RenameRanges, NewName);
     if (!RenameEdit) {
       return llvm::make_error<llvm::StringError>(
           llvm::formatv("fail to build rename edit for file {0}: {1}", FilePath,
@@ -370,6 +385,44 @@ llvm::Expected<FileEdits> renameOutsideFile(
   return Results;
 }
 
+// A simple edit is eithor changing line or column, but not both.
+bool impliesSimpleEdit(const Position &LHS, const Position &RHS) {
+  return LHS.line == RHS.line || LHS.character == RHS.character;
+}
+
+// Performs a DFS to enumerate all possible near-miss matches.
+// It finds the locations where the indexed occurrences are now spelled in
+// Lexed occurrences, a near miss is defined as:
+//   - a near miss maps all of the **name** occurrences from the index onto a
+//     *subset* of lexed occurrences (we allow a single name refers to more
+//     than one symbol)
+//   - all indexed occurrences must be mapped, and Result must be distinct and
+//     preseve order (only support detecting simple edits to ensure a
+//     robust mapping)
+//   - each indexed -> lexed occurrences mapping correspondence may change the
+//     *line* or *column*, but not both (increases chance of a robust mapping)
+void findNearMiss(
+    std::vector<size_t> &PartialMatch, ArrayRef<Range> IndexedRest,
+    ArrayRef<Range> LexedRest, int LexedIndex, int &Fuel,
+    llvm::function_ref<void(const std::vector<size_t> &)> MatchedCB) {
+  if (--Fuel < 0)
+    return;
+  if (IndexedRest.size() > LexedRest.size())
+    return;
+  if (IndexedRest.empty()) {
+    MatchedCB(PartialMatch);
+    return;
+  }
+  if (impliesSimpleEdit(IndexedRest.front().start, LexedRest.front().start)) {
+    PartialMatch.push_back(LexedIndex);
+    findNearMiss(PartialMatch, IndexedRest.drop_front(), LexedRest.drop_front(),
+                 LexedIndex + 1, Fuel, MatchedCB);
+    PartialMatch.pop_back();
+  }
+  findNearMiss(PartialMatch, IndexedRest, LexedRest.drop_front(),
+               LexedIndex + 1, Fuel, MatchedCB);
+}
+
 } // namespace
 
 llvm::Expected<FileEdits> rename(const RenameInputs &RInputs) {
@@ -504,5 +557,112 @@ llvm::Expected<Edit> buildRenameEdit(llvm::StringRef AbsFilePath,
   return Edit(InitialCode, std::move(RenameEdit));
 }
 
+// Details:
+//  - lex the draft code to get all rename candidates, this yields a superset
+//    of candidates.
+//  - apply range patching heuristics to generate "authoritative" occurrences,
+//    cases we consider:
+//      (a) index returns a subset of candidates, we use the indexed results.
+//        - fully equal, we are sure the index is up-to-date
+//        - proper subset, index is correct in most cases? there may be false
+//          positives (e.g. candidates got appended), but rename is still safe
+//      (b) index returns non-candidate results, we attempt to map the indexed
+//          ranges onto candidates in a plausible way (e.g. guess that lines
+//          were inserted). If such a "near miss" is found, the rename is still
+//          possible
+llvm::Optional<std::vector<Range>>
+adjustRenameRanges(llvm::StringRef DraftCode, llvm::StringRef Identifier,
+                   std::vector<Range> Indexed, const LangOptions &LangOpts) {
+  assert(!Indexed.empty());
+  std::vector<Range> Lexed =
+      collectIdentifierRanges(Identifier, DraftCode, LangOpts);
+  llvm::sort(Indexed);
+  llvm::sort(Lexed);
+  return getMappedRanges(Indexed, Lexed);
+}
+
+llvm::Optional<std::vector<Range>> getMappedRanges(ArrayRef<Range> Indexed,
+                                                   ArrayRef<Range> Lexed) {
+  assert(!Indexed.empty());
+  assert(std::is_sorted(Indexed.begin(), Indexed.end()));
+  assert(std::is_sorted(Lexed.begin(), Lexed.end()));
+
+  if (Indexed.size() > Lexed.size()) {
+    vlog("The number of lexed occurrences is less than indexed occurrences");
+    return llvm::None;
+  }
+  // Fast check for the special subset case.
+  if (std::includes(Indexed.begin(), Indexed.end(), Lexed.begin(), Lexed.end()))
+    return Indexed.vec();
+
+  std::vector<size_t> Best;
+  size_t BestCost = std::numeric_limits<size_t>::max();
+  bool HasMultiple = 0;
+  std::vector<size_t> ResultStorage;
+  int Fuel = 10000;
+  findNearMiss(ResultStorage, Indexed, Lexed, 0, Fuel,
+               [&](const std::vector<size_t> &Matched) {
+                 size_t MCost =
+                     renameRangeAdjustmentCost(Indexed, Lexed, Matched);
+                 if (MCost < BestCost) {
+                   BestCost = MCost;
+                   Best = std::move(Matched);
+                   HasMultiple = false; // reset
+                   return;
+                 }
+                 if (MCost == BestCost)
+                   HasMultiple = true;
+               });
+  if (HasMultiple) {
+    vlog("The best near miss is not unique.");
+    return llvm::None;
+  }
+  if (Best.empty()) {
+    vlog("Didn't find a near miss.");
+    return llvm::None;
+  }
+  std::vector<Range> Mapped;
+  for (auto I : Best)
+    Mapped.push_back(Lexed[I]);
+  return Mapped;
+}
+
+// The cost is the sum of the implied edit sizes between successive 
diff s, only
+// simple edits are considered:
+//   - insert/remove a line (change line offset)
+//   - insert/remove a character on an existing line (change column offset)
+//
+// Example I, total result is 1 + 1 = 2.
+//   
diff [0]: line + 1 <- insert a line before edit 0.
+//   
diff [1]: line + 1
+//   
diff [2]: line + 1
+//   
diff [3]: line + 2 <- insert a line before edits 2 and 3.
+//
+// Example II, total result is 1 + 1 + 1 = 3.
+//   
diff [0]: line + 1  <- insert a line before edit 0.
+//   
diff [1]: column + 1 <- remove a line between edits 0 and 1, and insert a
+//   character on edit 1.
+size_t renameRangeAdjustmentCost(ArrayRef<Range> Indexed, ArrayRef<Range> Lexed,
+                                 ArrayRef<size_t> MappedIndex) {
+  assert(Indexed.size() == MappedIndex.size());
+  assert(std::is_sorted(Indexed.begin(), Indexed.end()));
+  assert(std::is_sorted(Lexed.begin(), Lexed.end()));
+
+  int LastLine = -1;
+  int LastDLine = 0, LastDColumn = 0;
+  int Cost = 0;
+  for (size_t I = 0; I < Indexed.size(); ++I) {
+    int DLine = Indexed[I].start.line - Lexed[MappedIndex[I]].start.line;
+    int DColumn =
+        Indexed[I].start.character - Lexed[MappedIndex[I]].start.character;
+    int Line = Indexed[I].start.line;
+    if (Line != LastLine)
+      LastDColumn = 0; // colmun offsets don't carry cross lines.
+    Cost += abs(DLine - LastDLine) + abs(DColumn - LastDColumn);
+    std::tie(LastLine, LastDLine, LastDColumn) = std::tie(Line, DLine, DColumn);
+  }
+  return Cost;
+}
+
 } // namespace clangd
 } // namespace clang

diff  --git a/clang-tools-extra/clangd/refactor/Rename.h b/clang-tools-extra/clangd/refactor/Rename.h
index 6f38c14a3e2a..68821fa8a787 100644
--- a/clang-tools-extra/clangd/refactor/Rename.h
+++ b/clang-tools-extra/clangd/refactor/Rename.h
@@ -12,6 +12,7 @@
 #include "Path.h"
 #include "Protocol.h"
 #include "SourceCode.h"
+#include "clang/Basic/LangOptions.h"
 #include "clang/Tooling/Core/Replacement.h"
 #include "llvm/Support/Error.h"
 
@@ -55,6 +56,36 @@ llvm::Expected<Edit> buildRenameEdit(llvm::StringRef AbsFilePath,
                                      std::vector<Range> Occurrences,
                                      llvm::StringRef NewName);
 
+/// Adjusts indexed occurrences to match the current state of the file.
+///
+/// The Index is not always up to date. Blindly editing at the locations
+/// reported by the index may mangle the code in such cases.
+/// This function determines whether the indexed occurrences can be applied to
+/// this file, and heuristically repairs the occurrences if necessary.
+///
+/// The API assumes that Indexed contains only named occurrences (each
+/// occurrence has the same length).
+llvm::Optional<std::vector<Range>>
+adjustRenameRanges(llvm::StringRef DraftCode, llvm::StringRef Identifier,
+                   std::vector<Range> Indexed, const LangOptions &LangOpts);
+
+/// Calculates the lexed occurrences that the given indexed occurrences map to.
+/// Returns None if we don't find a mapping.
+///
+/// Exposed for testing only.
+///
+/// REQUIRED: Indexed and Lexed are sorted.
+llvm::Optional<std::vector<Range>> getMappedRanges(ArrayRef<Range> Indexed,
+                                                   ArrayRef<Range> Lexed);
+/// Evaluates how good the mapped result is. 0 indicates a perfect match.
+///
+/// Exposed for testing only.
+///
+/// REQUIRED: Indexed and Lexed are sorted, Indexed and MappedIndex have the
+/// same size.
+size_t renameRangeAdjustmentCost(ArrayRef<Range> Indexed, ArrayRef<Range> Lexed,
+                                 ArrayRef<size_t> MappedIndex);
+
 } // namespace clangd
 } // namespace clang
 

diff  --git a/clang-tools-extra/clangd/unittests/RenameTests.cpp b/clang-tools-extra/clangd/unittests/RenameTests.cpp
index 8a54b552258c..bce73f09fce3 100644
--- a/clang-tools-extra/clangd/unittests/RenameTests.cpp
+++ b/clang-tools-extra/clangd/unittests/RenameTests.cpp
@@ -14,9 +14,11 @@
 #include "index/Ref.h"
 #include "refactor/Rename.h"
 #include "clang/Tooling/Core/Replacement.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
+#include <algorithm>
 
 namespace clang {
 namespace clangd {
@@ -24,7 +26,9 @@ namespace {
 
 using testing::Eq;
 using testing::Pair;
+using testing::IsEmpty;
 using testing::UnorderedElementsAre;
+using testing::UnorderedElementsAreArray;
 
 // Build a RefSlab from all marked ranges in the annotation. The ranges are
 // assumed to associate with the given SymbolName.
@@ -853,6 +857,300 @@ TEST(CrossFileRenameTests, BuildRenameEdits) {
             expectedResult(Code, expectedResult(T, "abc")));
 }
 
+TEST(CrossFileRenameTests, adjustRenameRanges) {
+  // Ranges in IndexedCode indicate the indexed occurrences;
+  // ranges in DraftCode indicate the expected mapped result, empty indicates
+  // we expect no matched result found.
+  struct {
+    llvm::StringRef IndexedCode;
+    llvm::StringRef DraftCode;
+  } Tests[] = {
+    {
+      // both line and column are changed, not a near miss.
+      R"cpp(
+        int [[x]] = 0;
+      )cpp",
+      R"cpp(
+        // insert a line.
+        double x = 0;
+      )cpp",
+    },
+    {
+      // subset.
+      R"cpp(
+        int [[x]] = 0;
+      )cpp",
+      R"cpp(
+        int [[x]] = 0;
+        {int x = 0; }
+      )cpp",
+    },
+    {
+      // shift columns.
+      R"cpp(int [[x]] = 0; void foo(int x);)cpp",
+      R"cpp(double [[x]] = 0; void foo(double x);)cpp",
+    },
+    {
+      // shift lines.
+      R"cpp(
+        int [[x]] = 0;
+        void foo(int x);
+      )cpp",
+      R"cpp(
+        // insert a line.
+        int [[x]] = 0;
+        void foo(int x);
+      )cpp",
+    },
+  };
+  LangOptions LangOpts;
+  LangOpts.CPlusPlus = true;
+  for (const auto &T : Tests) {
+    Annotations Draft(T.DraftCode);
+    auto ActualRanges = adjustRenameRanges(
+        Draft.code(), "x", Annotations(T.IndexedCode).ranges(), LangOpts);
+    if (!ActualRanges)
+       EXPECT_THAT(Draft.ranges(), testing::IsEmpty());
+    else
+      EXPECT_THAT(Draft.ranges(),
+                  testing::UnorderedElementsAreArray(*ActualRanges))
+          << T.DraftCode;
+  }
+}
+
+TEST(RangePatchingHeuristic, GetMappedRanges) {
+  // ^ in LexedCode marks the ranges we expect to be mapped; no ^ indicates
+  // there are no mapped ranges.
+  struct {
+    llvm::StringRef IndexedCode;
+    llvm::StringRef LexedCode;
+  } Tests[] = {
+    {
+      // no lexed ranges.
+      "[[]]",
+      "",
+    },
+    {
+      // both line and column are changed, not a near miss.
+      R"([[]])",
+      R"(
+        [[]]
+      )",
+    },
+    {
+      // subset.
+      "[[]]",
+      "^[[]]  [[]]"
+    },
+    {
+      // shift columns.
+      "[[]]   [[]]",
+      "  ^[[]]   ^[[]]  [[]]"
+    },
+    {
+      R"(
+        [[]]
+
+        [[]] [[]]
+      )",
+      R"(
+        // insert a line
+        ^[[]]
+
+        ^[[]] ^[[]]
+      )",
+    },
+    {
+      R"(
+        [[]]
+
+        [[]] [[]]
+      )",
+      R"(
+        // insert a line
+        ^[[]]
+          ^[[]]  ^[[]] // column is shifted.
+      )",
+    },
+    {
+      R"(
+        [[]]
+
+        [[]] [[]]
+      )",
+      R"(
+        // insert a line
+        [[]]
+
+          [[]]  [[]] // not mapped (both line and column are changed).
+      )",
+    },
+    {
+      R"(
+        [[]]
+                [[]]
+
+                   [[]]
+                  [[]]
+
+        }
+      )",
+      R"(
+        // insert a new line
+        ^[[]]
+                ^[[]]
+             [[]] // additional range
+                   ^[[]]
+                  ^[[]]
+            [[]] // additional range
+      )",
+    },
+    {
+      // non-distinct result (two best results), not a near miss
+      R"(
+        [[]]
+            [[]]
+            [[]]
+      )",
+      R"(
+        [[]]
+        [[]]
+            [[]]
+            [[]]
+      )",
+    }
+  };
+  for (const auto &T : Tests) {
+    auto Lexed = Annotations(T.LexedCode);
+    auto LexedRanges = Lexed.ranges();
+    std::vector<Range> ExpectedMatches;
+    for (auto P : Lexed.points()) {
+      auto Match = llvm::find_if(LexedRanges, [&P](const Range& R) {
+        return R.start == P;
+      });
+      ASSERT_NE(Match, LexedRanges.end());
+      ExpectedMatches.push_back(*Match);
+    }
+
+    auto Mapped =
+        getMappedRanges(Annotations(T.IndexedCode).ranges(), LexedRanges);
+    if (!Mapped)
+      EXPECT_THAT(ExpectedMatches, IsEmpty());
+    else
+      EXPECT_THAT(ExpectedMatches, UnorderedElementsAreArray(*Mapped))
+          << T.IndexedCode;
+  }
+}
+
+TEST(CrossFileRenameTests, adjustmentCost) {
+  struct {
+    llvm::StringRef RangeCode;
+    size_t ExpectedCost;
+  } Tests[] = {
+    {
+      R"(
+        $idx[[]]$lex[[]] // 
diff : 0
+      )",
+      0,
+    },
+    {
+      R"(
+        $idx[[]]
+        $lex[[]] // line 
diff : +1
+                       $idx[[]]
+                       $lex[[]] // line 
diff : +1
+        $idx[[]]
+        $lex[[]] // line 
diff : +1
+
+          $idx[[]]
+
+          $lex[[]] // line 
diff : +2
+      )",
+      1 + 1
+    },
+    {
+       R"(
+        $idx[[]]
+        $lex[[]] // line 
diff : +1
+                       $idx[[]]
+
+                       $lex[[]] // line 
diff : +2
+        $idx[[]]
+
+
+        $lex[[]] // line 
diff : +3
+      )",
+      1 + 1 + 1
+    },
+    {
+       R"(
+        $idx[[]]
+
+
+        $lex[[]] // line 
diff : +3
+                       $idx[[]]
+
+                       $lex[[]] // line 
diff : +2
+        $idx[[]]
+        $lex[[]] // line 
diff : +1
+      )",
+      3 + 1 + 1
+    },
+    {
+      R"(
+        $idx[[]]
+        $lex[[]] // line 
diff : +1
+                       $lex[[]] // line 
diff : -2
+
+                       $idx[[]]
+        $idx[[]]
+
+
+        $lex[[]] // line 
diff : +3
+      )",
+      1 + 3 + 5
+    },
+    {
+      R"(
+                       $idx[[]] $lex[[]] // column 
diff : +1
+        $idx[[]]$lex[[]] // 
diff : 0
+      )",
+      1
+    },
+    {
+      R"(
+        $idx[[]]
+        $lex[[]] // 
diff : +1
+                       $idx[[]] $lex[[]] // column 
diff : +1
+        $idx[[]]$lex[[]] // 
diff : 0
+      )",
+      1 + 1 + 1
+    },
+    {
+      R"(
+        $idx[[]] $lex[[]] // column 
diff : +1
+      )",
+      1
+    },
+    {
+      R"(
+        // column 
diff s: +1, +2, +3
+        $idx[[]] $lex[[]] $idx[[]]  $lex[[]] $idx[[]]   $lex[[]]
+      )",
+      1 + 1 + 1,
+    },
+  };
+  for (const auto &T : Tests) {
+    Annotations C(T.RangeCode);
+    std::vector<size_t> MappedIndex;
+    for (size_t I = 0; I < C.ranges("lex").size(); ++I)
+      MappedIndex.push_back(I);
+    EXPECT_EQ(renameRangeAdjustmentCost(C.ranges("idx"), C.ranges("lex"),
+                                        MappedIndex),
+              T.ExpectedCost) << T.RangeCode;
+  }
+}
+
 } // namespace
 } // namespace clangd
 } // namespace clang