[clang-tools-extra] 891f822 - [clangd] Implement range patching heuristics for cross-file rename.
Haojian Wu via cfe-commits
cfe-commits at lists.llvm.org
Mon Dec 9 08:02:52 PST 2019
Author: Haojian Wu
Date: 2019-12-09T17:01:05+01:00
New Revision: 891f82222bb8436bfd8db0acfbd5f3621fa53425
URL: https://github.com/llvm/llvm-project/commit/891f82222bb8436bfd8db0acfbd5f3621fa53425
DIFF: https://github.com/llvm/llvm-project/commit/891f82222bb8436bfd8db0acfbd5f3621fa53425.diff
LOG: [clangd] Implement range patching heuristics for cross-file rename.
Reviewers: sammccall, ilya-biryukov
Reviewed By: sammccall
Subscribers: merge_guards_bot, MaskRay, jkorous, mgrang, arphaman, kadircet, usaxena95, cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D70594
Added:
Modified:
clang-tools-extra/clangd/refactor/Rename.cpp
clang-tools-extra/clangd/refactor/Rename.h
clang-tools-extra/clangd/unittests/RenameTests.cpp
Removed:
################################################################################
diff --git a/clang-tools-extra/clangd/refactor/Rename.cpp b/clang-tools-extra/clangd/refactor/Rename.cpp
index 3f3c216c5909..103f59bc307c 100644
--- a/clang-tools-extra/clangd/refactor/Rename.cpp
+++ b/clang-tools-extra/clangd/refactor/Rename.cpp
@@ -18,9 +18,11 @@
#include "clang/AST/DeclTemplate.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Tooling/Refactoring/Rename/USRFindingAction.h"
+#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FormatVariadic.h"
+#include <algorithm>
namespace clang {
namespace clangd {
@@ -355,9 +357,22 @@ llvm::Expected<FileEdits> renameOutsideFile(
elog("Fail to read file content: {0}", AffectedFileCode.takeError());
continue;
}
+ auto RenameRanges =
+ adjustRenameRanges(*AffectedFileCode, RenameDecl.getNameAsString(),
+ std::move(FileAndOccurrences.second),
+ RenameDecl.getASTContext().getLangOpts());
+ if (!RenameRanges) {
+ // Our heuristice fails to adjust rename ranges to the current state of
+ // the file, it is most likely the index is stale, so we give up the
+ // entire rename.
+ return llvm::make_error<llvm::StringError>(
+ llvm::formatv("Index results don't match the content of file {0} "
+ "(the index may be stale)",
+ FilePath),
+ llvm::inconvertibleErrorCode());
+ }
auto RenameEdit =
- buildRenameEdit(FilePath, *AffectedFileCode,
- std::move(FileAndOccurrences.second), NewName);
+ buildRenameEdit(FilePath, *AffectedFileCode, *RenameRanges, NewName);
if (!RenameEdit) {
return llvm::make_error<llvm::StringError>(
llvm::formatv("fail to build rename edit for file {0}: {1}", FilePath,
@@ -370,6 +385,44 @@ llvm::Expected<FileEdits> renameOutsideFile(
return Results;
}
+// A simple edit is eithor changing line or column, but not both.
+bool impliesSimpleEdit(const Position &LHS, const Position &RHS) {
+ return LHS.line == RHS.line || LHS.character == RHS.character;
+}
+
+// Performs a DFS to enumerate all possible near-miss matches.
+// It finds the locations where the indexed occurrences are now spelled in
+// Lexed occurrences, a near miss is defined as:
+// - a near miss maps all of the **name** occurrences from the index onto a
+// *subset* of lexed occurrences (we allow a single name refers to more
+// than one symbol)
+// - all indexed occurrences must be mapped, and Result must be distinct and
+// preseve order (only support detecting simple edits to ensure a
+// robust mapping)
+// - each indexed -> lexed occurrences mapping correspondence may change the
+// *line* or *column*, but not both (increases chance of a robust mapping)
+void findNearMiss(
+ std::vector<size_t> &PartialMatch, ArrayRef<Range> IndexedRest,
+ ArrayRef<Range> LexedRest, int LexedIndex, int &Fuel,
+ llvm::function_ref<void(const std::vector<size_t> &)> MatchedCB) {
+ if (--Fuel < 0)
+ return;
+ if (IndexedRest.size() > LexedRest.size())
+ return;
+ if (IndexedRest.empty()) {
+ MatchedCB(PartialMatch);
+ return;
+ }
+ if (impliesSimpleEdit(IndexedRest.front().start, LexedRest.front().start)) {
+ PartialMatch.push_back(LexedIndex);
+ findNearMiss(PartialMatch, IndexedRest.drop_front(), LexedRest.drop_front(),
+ LexedIndex + 1, Fuel, MatchedCB);
+ PartialMatch.pop_back();
+ }
+ findNearMiss(PartialMatch, IndexedRest, LexedRest.drop_front(),
+ LexedIndex + 1, Fuel, MatchedCB);
+}
+
} // namespace
llvm::Expected<FileEdits> rename(const RenameInputs &RInputs) {
@@ -504,5 +557,112 @@ llvm::Expected<Edit> buildRenameEdit(llvm::StringRef AbsFilePath,
return Edit(InitialCode, std::move(RenameEdit));
}
+// Details:
+// - lex the draft code to get all rename candidates, this yields a superset
+// of candidates.
+// - apply range patching heuristics to generate "authoritative" occurrences,
+// cases we consider:
+// (a) index returns a subset of candidates, we use the indexed results.
+// - fully equal, we are sure the index is up-to-date
+// - proper subset, index is correct in most cases? there may be false
+// positives (e.g. candidates got appended), but rename is still safe
+// (b) index returns non-candidate results, we attempt to map the indexed
+// ranges onto candidates in a plausible way (e.g. guess that lines
+// were inserted). If such a "near miss" is found, the rename is still
+// possible
+llvm::Optional<std::vector<Range>>
+adjustRenameRanges(llvm::StringRef DraftCode, llvm::StringRef Identifier,
+ std::vector<Range> Indexed, const LangOptions &LangOpts) {
+ assert(!Indexed.empty());
+ std::vector<Range> Lexed =
+ collectIdentifierRanges(Identifier, DraftCode, LangOpts);
+ llvm::sort(Indexed);
+ llvm::sort(Lexed);
+ return getMappedRanges(Indexed, Lexed);
+}
+
+llvm::Optional<std::vector<Range>> getMappedRanges(ArrayRef<Range> Indexed,
+ ArrayRef<Range> Lexed) {
+ assert(!Indexed.empty());
+ assert(std::is_sorted(Indexed.begin(), Indexed.end()));
+ assert(std::is_sorted(Lexed.begin(), Lexed.end()));
+
+ if (Indexed.size() > Lexed.size()) {
+ vlog("The number of lexed occurrences is less than indexed occurrences");
+ return llvm::None;
+ }
+ // Fast check for the special subset case.
+ if (std::includes(Indexed.begin(), Indexed.end(), Lexed.begin(), Lexed.end()))
+ return Indexed.vec();
+
+ std::vector<size_t> Best;
+ size_t BestCost = std::numeric_limits<size_t>::max();
+ bool HasMultiple = 0;
+ std::vector<size_t> ResultStorage;
+ int Fuel = 10000;
+ findNearMiss(ResultStorage, Indexed, Lexed, 0, Fuel,
+ [&](const std::vector<size_t> &Matched) {
+ size_t MCost =
+ renameRangeAdjustmentCost(Indexed, Lexed, Matched);
+ if (MCost < BestCost) {
+ BestCost = MCost;
+ Best = std::move(Matched);
+ HasMultiple = false; // reset
+ return;
+ }
+ if (MCost == BestCost)
+ HasMultiple = true;
+ });
+ if (HasMultiple) {
+ vlog("The best near miss is not unique.");
+ return llvm::None;
+ }
+ if (Best.empty()) {
+ vlog("Didn't find a near miss.");
+ return llvm::None;
+ }
+ std::vector<Range> Mapped;
+ for (auto I : Best)
+ Mapped.push_back(Lexed[I]);
+ return Mapped;
+}
+
+// The cost is the sum of the implied edit sizes between successive
diff s, only
+// simple edits are considered:
+// - insert/remove a line (change line offset)
+// - insert/remove a character on an existing line (change column offset)
+//
+// Example I, total result is 1 + 1 = 2.
+//
diff [0]: line + 1 <- insert a line before edit 0.
+//
diff [1]: line + 1
+//
diff [2]: line + 1
+//
diff [3]: line + 2 <- insert a line before edits 2 and 3.
+//
+// Example II, total result is 1 + 1 + 1 = 3.
+//
diff [0]: line + 1 <- insert a line before edit 0.
+//
diff [1]: column + 1 <- remove a line between edits 0 and 1, and insert a
+// character on edit 1.
+size_t renameRangeAdjustmentCost(ArrayRef<Range> Indexed, ArrayRef<Range> Lexed,
+ ArrayRef<size_t> MappedIndex) {
+ assert(Indexed.size() == MappedIndex.size());
+ assert(std::is_sorted(Indexed.begin(), Indexed.end()));
+ assert(std::is_sorted(Lexed.begin(), Lexed.end()));
+
+ int LastLine = -1;
+ int LastDLine = 0, LastDColumn = 0;
+ int Cost = 0;
+ for (size_t I = 0; I < Indexed.size(); ++I) {
+ int DLine = Indexed[I].start.line - Lexed[MappedIndex[I]].start.line;
+ int DColumn =
+ Indexed[I].start.character - Lexed[MappedIndex[I]].start.character;
+ int Line = Indexed[I].start.line;
+ if (Line != LastLine)
+ LastDColumn = 0; // colmun offsets don't carry cross lines.
+ Cost += abs(DLine - LastDLine) + abs(DColumn - LastDColumn);
+ std::tie(LastLine, LastDLine, LastDColumn) = std::tie(Line, DLine, DColumn);
+ }
+ return Cost;
+}
+
} // namespace clangd
} // namespace clang
diff --git a/clang-tools-extra/clangd/refactor/Rename.h b/clang-tools-extra/clangd/refactor/Rename.h
index 6f38c14a3e2a..68821fa8a787 100644
--- a/clang-tools-extra/clangd/refactor/Rename.h
+++ b/clang-tools-extra/clangd/refactor/Rename.h
@@ -12,6 +12,7 @@
#include "Path.h"
#include "Protocol.h"
#include "SourceCode.h"
+#include "clang/Basic/LangOptions.h"
#include "clang/Tooling/Core/Replacement.h"
#include "llvm/Support/Error.h"
@@ -55,6 +56,36 @@ llvm::Expected<Edit> buildRenameEdit(llvm::StringRef AbsFilePath,
std::vector<Range> Occurrences,
llvm::StringRef NewName);
+/// Adjusts indexed occurrences to match the current state of the file.
+///
+/// The Index is not always up to date. Blindly editing at the locations
+/// reported by the index may mangle the code in such cases.
+/// This function determines whether the indexed occurrences can be applied to
+/// this file, and heuristically repairs the occurrences if necessary.
+///
+/// The API assumes that Indexed contains only named occurrences (each
+/// occurrence has the same length).
+llvm::Optional<std::vector<Range>>
+adjustRenameRanges(llvm::StringRef DraftCode, llvm::StringRef Identifier,
+ std::vector<Range> Indexed, const LangOptions &LangOpts);
+
+/// Calculates the lexed occurrences that the given indexed occurrences map to.
+/// Returns None if we don't find a mapping.
+///
+/// Exposed for testing only.
+///
+/// REQUIRED: Indexed and Lexed are sorted.
+llvm::Optional<std::vector<Range>> getMappedRanges(ArrayRef<Range> Indexed,
+ ArrayRef<Range> Lexed);
+/// Evaluates how good the mapped result is. 0 indicates a perfect match.
+///
+/// Exposed for testing only.
+///
+/// REQUIRED: Indexed and Lexed are sorted, Indexed and MappedIndex have the
+/// same size.
+size_t renameRangeAdjustmentCost(ArrayRef<Range> Indexed, ArrayRef<Range> Lexed,
+ ArrayRef<size_t> MappedIndex);
+
} // namespace clangd
} // namespace clang
diff --git a/clang-tools-extra/clangd/unittests/RenameTests.cpp b/clang-tools-extra/clangd/unittests/RenameTests.cpp
index 8a54b552258c..bce73f09fce3 100644
--- a/clang-tools-extra/clangd/unittests/RenameTests.cpp
+++ b/clang-tools-extra/clangd/unittests/RenameTests.cpp
@@ -14,9 +14,11 @@
#include "index/Ref.h"
#include "refactor/Rename.h"
#include "clang/Tooling/Core/Replacement.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/MemoryBuffer.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
+#include <algorithm>
namespace clang {
namespace clangd {
@@ -24,7 +26,9 @@ namespace {
using testing::Eq;
using testing::Pair;
+using testing::IsEmpty;
using testing::UnorderedElementsAre;
+using testing::UnorderedElementsAreArray;
// Build a RefSlab from all marked ranges in the annotation. The ranges are
// assumed to associate with the given SymbolName.
@@ -853,6 +857,300 @@ TEST(CrossFileRenameTests, BuildRenameEdits) {
expectedResult(Code, expectedResult(T, "abc")));
}
+TEST(CrossFileRenameTests, adjustRenameRanges) {
+ // Ranges in IndexedCode indicate the indexed occurrences;
+ // ranges in DraftCode indicate the expected mapped result, empty indicates
+ // we expect no matched result found.
+ struct {
+ llvm::StringRef IndexedCode;
+ llvm::StringRef DraftCode;
+ } Tests[] = {
+ {
+ // both line and column are changed, not a near miss.
+ R"cpp(
+ int [[x]] = 0;
+ )cpp",
+ R"cpp(
+ // insert a line.
+ double x = 0;
+ )cpp",
+ },
+ {
+ // subset.
+ R"cpp(
+ int [[x]] = 0;
+ )cpp",
+ R"cpp(
+ int [[x]] = 0;
+ {int x = 0; }
+ )cpp",
+ },
+ {
+ // shift columns.
+ R"cpp(int [[x]] = 0; void foo(int x);)cpp",
+ R"cpp(double [[x]] = 0; void foo(double x);)cpp",
+ },
+ {
+ // shift lines.
+ R"cpp(
+ int [[x]] = 0;
+ void foo(int x);
+ )cpp",
+ R"cpp(
+ // insert a line.
+ int [[x]] = 0;
+ void foo(int x);
+ )cpp",
+ },
+ };
+ LangOptions LangOpts;
+ LangOpts.CPlusPlus = true;
+ for (const auto &T : Tests) {
+ Annotations Draft(T.DraftCode);
+ auto ActualRanges = adjustRenameRanges(
+ Draft.code(), "x", Annotations(T.IndexedCode).ranges(), LangOpts);
+ if (!ActualRanges)
+ EXPECT_THAT(Draft.ranges(), testing::IsEmpty());
+ else
+ EXPECT_THAT(Draft.ranges(),
+ testing::UnorderedElementsAreArray(*ActualRanges))
+ << T.DraftCode;
+ }
+}
+
+TEST(RangePatchingHeuristic, GetMappedRanges) {
+ // ^ in LexedCode marks the ranges we expect to be mapped; no ^ indicates
+ // there are no mapped ranges.
+ struct {
+ llvm::StringRef IndexedCode;
+ llvm::StringRef LexedCode;
+ } Tests[] = {
+ {
+ // no lexed ranges.
+ "[[]]",
+ "",
+ },
+ {
+ // both line and column are changed, not a near miss.
+ R"([[]])",
+ R"(
+ [[]]
+ )",
+ },
+ {
+ // subset.
+ "[[]]",
+ "^[[]] [[]]"
+ },
+ {
+ // shift columns.
+ "[[]] [[]]",
+ " ^[[]] ^[[]] [[]]"
+ },
+ {
+ R"(
+ [[]]
+
+ [[]] [[]]
+ )",
+ R"(
+ // insert a line
+ ^[[]]
+
+ ^[[]] ^[[]]
+ )",
+ },
+ {
+ R"(
+ [[]]
+
+ [[]] [[]]
+ )",
+ R"(
+ // insert a line
+ ^[[]]
+ ^[[]] ^[[]] // column is shifted.
+ )",
+ },
+ {
+ R"(
+ [[]]
+
+ [[]] [[]]
+ )",
+ R"(
+ // insert a line
+ [[]]
+
+ [[]] [[]] // not mapped (both line and column are changed).
+ )",
+ },
+ {
+ R"(
+ [[]]
+ [[]]
+
+ [[]]
+ [[]]
+
+ }
+ )",
+ R"(
+ // insert a new line
+ ^[[]]
+ ^[[]]
+ [[]] // additional range
+ ^[[]]
+ ^[[]]
+ [[]] // additional range
+ )",
+ },
+ {
+ // non-distinct result (two best results), not a near miss
+ R"(
+ [[]]
+ [[]]
+ [[]]
+ )",
+ R"(
+ [[]]
+ [[]]
+ [[]]
+ [[]]
+ )",
+ }
+ };
+ for (const auto &T : Tests) {
+ auto Lexed = Annotations(T.LexedCode);
+ auto LexedRanges = Lexed.ranges();
+ std::vector<Range> ExpectedMatches;
+ for (auto P : Lexed.points()) {
+ auto Match = llvm::find_if(LexedRanges, [&P](const Range& R) {
+ return R.start == P;
+ });
+ ASSERT_NE(Match, LexedRanges.end());
+ ExpectedMatches.push_back(*Match);
+ }
+
+ auto Mapped =
+ getMappedRanges(Annotations(T.IndexedCode).ranges(), LexedRanges);
+ if (!Mapped)
+ EXPECT_THAT(ExpectedMatches, IsEmpty());
+ else
+ EXPECT_THAT(ExpectedMatches, UnorderedElementsAreArray(*Mapped))
+ << T.IndexedCode;
+ }
+}
+
+TEST(CrossFileRenameTests, adjustmentCost) {
+ struct {
+ llvm::StringRef RangeCode;
+ size_t ExpectedCost;
+ } Tests[] = {
+ {
+ R"(
+ $idx[[]]$lex[[]] //
diff : 0
+ )",
+ 0,
+ },
+ {
+ R"(
+ $idx[[]]
+ $lex[[]] // line
diff : +1
+ $idx[[]]
+ $lex[[]] // line
diff : +1
+ $idx[[]]
+ $lex[[]] // line
diff : +1
+
+ $idx[[]]
+
+ $lex[[]] // line
diff : +2
+ )",
+ 1 + 1
+ },
+ {
+ R"(
+ $idx[[]]
+ $lex[[]] // line
diff : +1
+ $idx[[]]
+
+ $lex[[]] // line
diff : +2
+ $idx[[]]
+
+
+ $lex[[]] // line
diff : +3
+ )",
+ 1 + 1 + 1
+ },
+ {
+ R"(
+ $idx[[]]
+
+
+ $lex[[]] // line
diff : +3
+ $idx[[]]
+
+ $lex[[]] // line
diff : +2
+ $idx[[]]
+ $lex[[]] // line
diff : +1
+ )",
+ 3 + 1 + 1
+ },
+ {
+ R"(
+ $idx[[]]
+ $lex[[]] // line
diff : +1
+ $lex[[]] // line
diff : -2
+
+ $idx[[]]
+ $idx[[]]
+
+
+ $lex[[]] // line
diff : +3
+ )",
+ 1 + 3 + 5
+ },
+ {
+ R"(
+ $idx[[]] $lex[[]] // column
diff : +1
+ $idx[[]]$lex[[]] //
diff : 0
+ )",
+ 1
+ },
+ {
+ R"(
+ $idx[[]]
+ $lex[[]] //
diff : +1
+ $idx[[]] $lex[[]] // column
diff : +1
+ $idx[[]]$lex[[]] //
diff : 0
+ )",
+ 1 + 1 + 1
+ },
+ {
+ R"(
+ $idx[[]] $lex[[]] // column
diff : +1
+ )",
+ 1
+ },
+ {
+ R"(
+ // column
diff s: +1, +2, +3
+ $idx[[]] $lex[[]] $idx[[]] $lex[[]] $idx[[]] $lex[[]]
+ )",
+ 1 + 1 + 1,
+ },
+ };
+ for (const auto &T : Tests) {
+ Annotations C(T.RangeCode);
+ std::vector<size_t> MappedIndex;
+ for (size_t I = 0; I < C.ranges("lex").size(); ++I)
+ MappedIndex.push_back(I);
+ EXPECT_EQ(renameRangeAdjustmentCost(C.ranges("idx"), C.ranges("lex"),
+ MappedIndex),
+ T.ExpectedCost) << T.RangeCode;
+ }
+}
+
} // namespace
} // namespace clangd
} // namespace clang
More information about the cfe-commits
mailing list