[PATCH] D53527: Fix range length comparison in DraftStore::UpdateDraft when Unicode characters are removed from the document
Daan De Meyer via Phabricator via cfe-commits
cfe-commits at lists.llvm.org
Tue Oct 23 04:17:19 PDT 2018
DaanDeMeyer updated this revision to Diff 170603.
DaanDeMeyer added a comment.
Update diff according to comments.
Repository:
rCTE Clang Tools Extra
https://reviews.llvm.org/D53527
Files:
clangd/DraftStore.cpp
clangd/SourceCode.cpp
clangd/SourceCode.h
unittests/clangd/SourceCodeTests.cpp
Index: unittests/clangd/SourceCodeTests.cpp
===================================================================
--- unittests/clangd/SourceCodeTests.cpp
+++ unittests/clangd/SourceCodeTests.cpp
@@ -42,6 +42,16 @@
return range;
}
+TEST(SourceCodeTests, lspLength) {
+ EXPECT_EQ(lspLength(""), 0UL);
+ EXPECT_EQ(lspLength("ascii"), 5UL);
+ // BMP
+ EXPECT_EQ(lspLength("↓"), 1UL);
+ EXPECT_EQ(lspLength("¥"), 1UL);
+ // astral
+ EXPECT_EQ(lspLength("😂"), 2UL);
+}
+
TEST(SourceCodeTests, PositionToOffset) {
// line out of bounds
EXPECT_THAT_EXPECTED(positionToOffset(File, position(-1, 2)), Failed());
Index: clangd/SourceCode.cpp
===================================================================
--- clangd/SourceCode.cpp
+++ clangd/SourceCode.cpp
@@ -67,13 +67,12 @@
return std::min(Result, U8.size());
}
-// Counts the number of UTF-16 code units needed to represent a string.
// Like most strings in clangd, the input is UTF-8 encoded.
-static size_t utf16Len(StringRef U8) {
+size_t lspLength(StringRef Code) {
// A codepoint takes two UTF-16 code unit if it's astral (outside BMP).
// Astral codepoints are encoded as 4 bytes in UTF-8, starting with 11110xxx.
size_t Count = 0;
- iterateCodepoints(U8, [&](int U8Len, int U16Len) {
+ iterateCodepoints(Code, [&](int U8Len, int U16Len) {
Count += U16Len;
return false;
});
@@ -123,7 +122,7 @@
size_t StartOfLine = (PrevNL == StringRef::npos) ? 0 : (PrevNL + 1);
Position Pos;
Pos.line = Lines;
- Pos.character = utf16Len(Before.substr(StartOfLine));
+ Pos.character = lspLength(Before.substr(StartOfLine));
return Pos;
}
@@ -139,7 +138,7 @@
if (!Invalid) {
auto ColumnInBytes = SM.getColumnNumber(FID, Offset) - 1;
auto LineSoFar = Code.substr(Offset - ColumnInBytes, ColumnInBytes);
- P.character = utf16Len(LineSoFar);
+ P.character = lspLength(LineSoFar);
}
return P;
}
Index: clangd/SourceCode.h
===================================================================
--- clangd/SourceCode.h
+++ clangd/SourceCode.h
@@ -23,6 +23,10 @@
namespace clangd {
+// Counts the number of UTF-16 code units needed to represent a string (LSP
+// specifies string lengths in UTF-16 code units).
+size_t lspLength(StringRef Code);
+
/// Turn a [line, column] pair into an offset in Code.
///
/// If P.character exceeds the line length, returns the offset at end-of-line.
Index: clangd/DraftStore.cpp
===================================================================
--- clangd/DraftStore.cpp
+++ clangd/DraftStore.cpp
@@ -77,8 +77,17 @@
End, Start),
llvm::errc::invalid_argument);
- if (Change.rangeLength &&
- (ssize_t)(*EndIndex - *StartIndex) != *Change.rangeLength)
+ // Since the range length between two LSP positions is dependent on the
+ // contents of the buffer we compute the range length between the start and
+ // end position ourselves and compare it to the range length of the LSP
+ // message to verify the buffers of the client and server are in sync.
+
+ // EndIndex and StartIndex are in bytes, but Change.rangeLength is in UTF-16
+ // code units.
+ ssize_t ComputedRangeLength =
+ lspLength(Contents.substr(*StartIndex, *EndIndex - *StartIndex));
+
+ if (Change.rangeLength && ComputedRangeLength != *Change.rangeLength)
return make_error<StringError>(
formatv("Change's rangeLength ({0}) doesn't match the "
"computed range length ({1}).",
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D53527.170603.patch
Type: text/x-patch
Size: 3523 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20181023/86d75c77/attachment-0001.bin>
More information about the cfe-commits
mailing list