[PATCH] D53527: Fix range length comparison in DraftStore::UpdateDraft when Unicode characters are removed from the document

Daan De Meyer via Phabricator via cfe-commits cfe-commits at lists.llvm.org
Tue Oct 23 04:17:19 PDT 2018


DaanDeMeyer updated this revision to Diff 170603.
DaanDeMeyer added a comment.

Update diff according to comments.


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D53527

Files:
  clangd/DraftStore.cpp
  clangd/SourceCode.cpp
  clangd/SourceCode.h
  unittests/clangd/SourceCodeTests.cpp


Index: unittests/clangd/SourceCodeTests.cpp
===================================================================
--- unittests/clangd/SourceCodeTests.cpp
+++ unittests/clangd/SourceCodeTests.cpp
@@ -42,6 +42,16 @@
   return range;
 }
 
+TEST(SourceCodeTests, lspLength) {
+  EXPECT_EQ(lspLength(""), 0UL);
+  EXPECT_EQ(lspLength("ascii"), 5UL);
+  // BMP
+  EXPECT_EQ(lspLength("↓"), 1UL);
+  EXPECT_EQ(lspLength("¥"), 1UL);
+  // astral
+  EXPECT_EQ(lspLength("😂"), 2UL);
+}
+
 TEST(SourceCodeTests, PositionToOffset) {
   // line out of bounds
   EXPECT_THAT_EXPECTED(positionToOffset(File, position(-1, 2)), Failed());
Index: clangd/SourceCode.cpp
===================================================================
--- clangd/SourceCode.cpp
+++ clangd/SourceCode.cpp
@@ -67,13 +67,12 @@
   return std::min(Result, U8.size());
 }
 
-// Counts the number of UTF-16 code units needed to represent a string.
 // Like most strings in clangd, the input is UTF-8 encoded.
-static size_t utf16Len(StringRef U8) {
+size_t lspLength(StringRef Code) {
   // A codepoint takes two UTF-16 code unit if it's astral (outside BMP).
   // Astral codepoints are encoded as 4 bytes in UTF-8, starting with 11110xxx.
   size_t Count = 0;
-  iterateCodepoints(U8, [&](int U8Len, int U16Len) {
+  iterateCodepoints(Code, [&](int U8Len, int U16Len) {
     Count += U16Len;
     return false;
   });
@@ -123,7 +122,7 @@
   size_t StartOfLine = (PrevNL == StringRef::npos) ? 0 : (PrevNL + 1);
   Position Pos;
   Pos.line = Lines;
-  Pos.character = utf16Len(Before.substr(StartOfLine));
+  Pos.character = lspLength(Before.substr(StartOfLine));
   return Pos;
 }
 
@@ -139,7 +138,7 @@
   if (!Invalid) {
     auto ColumnInBytes = SM.getColumnNumber(FID, Offset) - 1;
     auto LineSoFar = Code.substr(Offset - ColumnInBytes, ColumnInBytes);
-    P.character = utf16Len(LineSoFar);
+    P.character = lspLength(LineSoFar);
   }
   return P;
 }
Index: clangd/SourceCode.h
===================================================================
--- clangd/SourceCode.h
+++ clangd/SourceCode.h
@@ -23,6 +23,10 @@
 
 namespace clangd {
 
+// Counts the number of UTF-16 code units needed to represent a string (LSP
+// specifies string lengths in UTF-16 code units).
+size_t lspLength(StringRef Code);
+
 /// Turn a [line, column] pair into an offset in Code.
 ///
 /// If P.character exceeds the line length, returns the offset at end-of-line.
Index: clangd/DraftStore.cpp
===================================================================
--- clangd/DraftStore.cpp
+++ clangd/DraftStore.cpp
@@ -77,8 +77,17 @@
                   End, Start),
           llvm::errc::invalid_argument);
 
-    if (Change.rangeLength &&
-        (ssize_t)(*EndIndex - *StartIndex) != *Change.rangeLength)
+    // Since the range length between two LSP positions is dependent on the
+    // contents of the buffer we compute the range length between the start and
+    // end position ourselves and compare it to the range length of the LSP
+    // message to verify the buffers of the client and server are in sync.
+
+    // EndIndex and StartIndex are in bytes, but Change.rangeLength is in UTF-16
+    // code units.
+    ssize_t ComputedRangeLength =
+        lspLength(Contents.substr(*StartIndex, *EndIndex - *StartIndex));
+
+    if (Change.rangeLength && ComputedRangeLength != *Change.rangeLength)
       return make_error<StringError>(
           formatv("Change's rangeLength ({0}) doesn't match the "
                   "computed range length ({1}).",


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D53527.170603.patch
Type: text/x-patch
Size: 3523 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20181023/86d75c77/attachment-0001.bin>


More information about the cfe-commits mailing list