[PATCH] D53527: Fix range length comparison in DraftStore::UpdateDraft when Unicode characters are removed from the document
Sam McCall via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 23 04:54:07 PDT 2018
This revision was automatically updated to reflect the committed changes.
Closed by commit rL345020: Fix range length comparison in DraftStore::UpdateDraft when Unicode characters… (authored by sammccall, committed by ).
Herald added a subscriber: llvm-commits.
Changed prior to commit:
https://reviews.llvm.org/D53527?vs=170603&id=170607#toc
Repository:
rL LLVM
https://reviews.llvm.org/D53527
Files:
clang-tools-extra/trunk/clangd/DraftStore.cpp
clang-tools-extra/trunk/clangd/SourceCode.cpp
clang-tools-extra/trunk/clangd/SourceCode.h
clang-tools-extra/trunk/unittests/clangd/SourceCodeTests.cpp
Index: clang-tools-extra/trunk/unittests/clangd/SourceCodeTests.cpp
===================================================================
--- clang-tools-extra/trunk/unittests/clangd/SourceCodeTests.cpp
+++ clang-tools-extra/trunk/unittests/clangd/SourceCodeTests.cpp
@@ -42,6 +42,16 @@
return range;
}
+TEST(SourceCodeTests, lspLength) {
+ EXPECT_EQ(lspLength(""), 0UL);
+ EXPECT_EQ(lspLength("ascii"), 5UL);
+ // BMP
+ EXPECT_EQ(lspLength("↓"), 1UL);
+ EXPECT_EQ(lspLength("¥"), 1UL);
+ // astral
+ EXPECT_EQ(lspLength("😂"), 2UL);
+}
+
TEST(SourceCodeTests, PositionToOffset) {
// line out of bounds
EXPECT_THAT_EXPECTED(positionToOffset(File, position(-1, 2)), Failed());
Index: clang-tools-extra/trunk/clangd/SourceCode.cpp
===================================================================
--- clang-tools-extra/trunk/clangd/SourceCode.cpp
+++ clang-tools-extra/trunk/clangd/SourceCode.cpp
@@ -67,13 +67,12 @@
return std::min(Result, U8.size());
}
-// Counts the number of UTF-16 code units needed to represent a string.
// Like most strings in clangd, the input is UTF-8 encoded.
-static size_t utf16Len(StringRef U8) {
+size_t lspLength(StringRef Code) {
// A codepoint takes two UTF-16 code unit if it's astral (outside BMP).
// Astral codepoints are encoded as 4 bytes in UTF-8, starting with 11110xxx.
size_t Count = 0;
- iterateCodepoints(U8, [&](int U8Len, int U16Len) {
+ iterateCodepoints(Code, [&](int U8Len, int U16Len) {
Count += U16Len;
return false;
});
@@ -123,7 +122,7 @@
size_t StartOfLine = (PrevNL == StringRef::npos) ? 0 : (PrevNL + 1);
Position Pos;
Pos.line = Lines;
- Pos.character = utf16Len(Before.substr(StartOfLine));
+ Pos.character = lspLength(Before.substr(StartOfLine));
return Pos;
}
@@ -139,7 +138,7 @@
if (!Invalid) {
auto ColumnInBytes = SM.getColumnNumber(FID, Offset) - 1;
auto LineSoFar = Code.substr(Offset - ColumnInBytes, ColumnInBytes);
- P.character = utf16Len(LineSoFar);
+ P.character = lspLength(LineSoFar);
}
return P;
}
Index: clang-tools-extra/trunk/clangd/DraftStore.cpp
===================================================================
--- clang-tools-extra/trunk/clangd/DraftStore.cpp
+++ clang-tools-extra/trunk/clangd/DraftStore.cpp
@@ -77,8 +77,17 @@
End, Start),
llvm::errc::invalid_argument);
- if (Change.rangeLength &&
- (ssize_t)(*EndIndex - *StartIndex) != *Change.rangeLength)
+ // Since the range length between two LSP positions is dependent on the
+ // contents of the buffer we compute the range length between the start and
+ // end position ourselves and compare it to the range length of the LSP
+ // message to verify the buffers of the client and server are in sync.
+
+ // EndIndex and StartIndex are in bytes, but Change.rangeLength is in UTF-16
+ // code units.
+ ssize_t ComputedRangeLength =
+ lspLength(Contents.substr(*StartIndex, *EndIndex - *StartIndex));
+
+ if (Change.rangeLength && ComputedRangeLength != *Change.rangeLength)
return make_error<StringError>(
formatv("Change's rangeLength ({0}) doesn't match the "
"computed range length ({1}).",
Index: clang-tools-extra/trunk/clangd/SourceCode.h
===================================================================
--- clang-tools-extra/trunk/clangd/SourceCode.h
+++ clang-tools-extra/trunk/clangd/SourceCode.h
@@ -23,6 +23,10 @@
namespace clangd {
+// Counts the number of UTF-16 code units needed to represent a string (LSP
+// specifies string lengths in UTF-16 code units).
+size_t lspLength(StringRef Code);
+
/// Turn a [line, column] pair into an offset in Code.
///
/// If P.character exceeds the line length, returns the offset at end-of-line.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D53527.170607.patch
Type: text/x-patch
Size: 3811 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20181023/ed10b054/attachment.bin>
More information about the llvm-commits
mailing list