[clang-tools-extra] 333620d - [clangd] Support multiline semantic tokens

Wed Jun 29 04:49:30 PDT 2022

Author: Kadir Cetinkaya
Date: 2022-06-29T13:49:03+02:00
New Revision: 333620d37a26949e9f66c823425cf9a2065e3890

URL: https://github.com/llvm/llvm-project/commit/333620d37a26949e9f66c823425cf9a2065e3890
DIFF: https://github.com/llvm/llvm-project/commit/333620d37a26949e9f66c823425cf9a2065e3890.diff

LOG: [clangd] Support multiline semantic tokens

Per LSP, multiline tokens should be handled as if they end at the end
of the line starting the token (there's also a capability to enable them, but
that's an adventure for a different day).

Fixes https://github.com/clangd/clangd/issues/1145

Differential Revision: https://reviews.llvm.org/D127856

Added: 
    

Modified: 
    clang-tools-extra/clangd/ClangdLSPServer.cpp
    clang-tools-extra/clangd/SemanticHighlighting.cpp
    clang-tools-extra/clangd/SemanticHighlighting.h
    clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp
index edafb40ff2b79..54e6765be315b 100644

--- a/clang-tools-extra/clangd/ClangdLSPServer.cpp
+++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp
@@ -1397,14 +1397,15 @@ static void increment(std::string &S) {
 
 void ClangdLSPServer::onSemanticTokens(const SemanticTokensParams &Params,
                                        Callback<SemanticTokens> CB) {
+  auto File = Params.textDocument.uri.file();
   Server->semanticHighlights(
       Params.textDocument.uri.file(),
-      [this, File(Params.textDocument.uri.file().str()), CB(std::move(CB))](
+      [this, File(File.str()), CB(std::move(CB)), Code(Server->getDraft(File))](
           llvm::Expected<std::vector<HighlightingToken>> HT) mutable {
         if (!HT)
           return CB(HT.takeError());
         SemanticTokens Result;
-        Result.tokens = toSemanticTokens(*HT);
+        Result.tokens = toSemanticTokens(*HT, *Code);
         {
           std::lock_guard<std::mutex> Lock(SemanticTokensMutex);
           auto &Last = LastSemanticTokens[File];
@@ -1420,14 +1421,15 @@ void ClangdLSPServer::onSemanticTokens(const SemanticTokensParams &Params,
 void ClangdLSPServer::onSemanticTokensDelta(
     const SemanticTokensDeltaParams &Params,
     Callback<SemanticTokensOrDelta> CB) {
+  auto File = Params.textDocument.uri.file();
   Server->semanticHighlights(
       Params.textDocument.uri.file(),
-      [this, PrevResultID(Params.previousResultId),
-       File(Params.textDocument.uri.file().str()), CB(std::move(CB))](
+      [this, PrevResultID(Params.previousResultId), File(File.str()),
+       CB(std::move(CB)), Code(Server->getDraft(File))](
           llvm::Expected<std::vector<HighlightingToken>> HT) mutable {
         if (!HT)
           return CB(HT.takeError());
-        std::vector<SemanticToken> Toks = toSemanticTokens(*HT);
+        std::vector<SemanticToken> Toks = toSemanticTokens(*HT, *Code);
 
         SemanticTokensOrDelta Result;
         {

diff  --git a/clang-tools-extra/clangd/SemanticHighlighting.cpp b/clang-tools-extra/clangd/SemanticHighlighting.cpp
index 489bb93856a04..2ab7461eee9c6 100644
--- a/clang-tools-extra/clangd/SemanticHighlighting.cpp
+++ b/clang-tools-extra/clangd/SemanticHighlighting.cpp
@@ -30,7 +30,9 @@
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/Error.h"
 #include <algorithm>
 
 namespace clang {
@@ -918,33 +920,69 @@ bool operator<(const HighlightingToken &L, const HighlightingToken &R) {
 }
 
 std::vector<SemanticToken>
-toSemanticTokens(llvm::ArrayRef<HighlightingToken> Tokens) {
+toSemanticTokens(llvm::ArrayRef<HighlightingToken> Tokens,
+                 llvm::StringRef Code) {
   assert(std::is_sorted(Tokens.begin(), Tokens.end()));
   std::vector<SemanticToken> Result;
+  // In case we split a HighlightingToken into multiple tokens (e.g. because it
+  // was spanning multiple lines), this tracks the last one. This prevents
+  // having a copy all the time.
+  HighlightingToken Scratch;
   const HighlightingToken *Last = nullptr;
   for (const HighlightingToken &Tok : Tokens) {
     Result.emplace_back();
-    SemanticToken &Out = Result.back();
+    SemanticToken *Out = &Result.back();
     // deltaStart/deltaLine are relative if possible.
     if (Last) {
-      assert(Tok.R.start.line >= Last->R.start.line);
-      Out.deltaLine = Tok.R.start.line - Last->R.start.line;
-      if (Out.deltaLine == 0) {
+      assert(Tok.R.start.line >= Last->R.end.line);
+      Out->deltaLine = Tok.R.start.line - Last->R.end.line;
+      if (Out->deltaLine == 0) {
         assert(Tok.R.start.character >= Last->R.start.character);
-        Out.deltaStart = Tok.R.start.character - Last->R.start.character;
+        Out->deltaStart = Tok.R.start.character - Last->R.start.character;
       } else {
-        Out.deltaStart = Tok.R.start.character;
+        Out->deltaStart = Tok.R.start.character;
       }
     } else {
-      Out.deltaLine = Tok.R.start.line;
-      Out.deltaStart = Tok.R.start.character;
+      Out->deltaLine = Tok.R.start.line;
+      Out->deltaStart = Tok.R.start.character;
     }
-    assert(Tok.R.end.line == Tok.R.start.line);
-    Out.length = Tok.R.end.character - Tok.R.start.character;
-    Out.tokenType = static_cast<unsigned>(Tok.Kind);
-    Out.tokenModifiers = Tok.Modifiers;
-
+    Out->tokenType = static_cast<unsigned>(Tok.Kind);
+    Out->tokenModifiers = Tok.Modifiers;
     Last = &Tok;
+
+    if (Tok.R.end.line == Tok.R.start.line) {
+      Out->length = Tok.R.end.character - Tok.R.start.character;
+    } else {
+      // If the token spans a line break, split it into multiple pieces for each
+      // line.
+      // This is slow, but multiline tokens are rare.
+      // FIXME: There's a client capability for supporting multiline tokens,
+      // respect that.
+      auto TokStartOffset = llvm::cantFail(positionToOffset(Code, Tok.R.start));
+      // Note that the loop doesn't cover the last line, which has a special
+      // length.
+      for (int I = Tok.R.start.line; I < Tok.R.end.line; ++I) {
+        auto LineEnd = Code.find('\n', TokStartOffset);
+        assert(LineEnd != Code.npos);
+        Out->length = LineEnd - TokStartOffset;
+        // Token continues on next line, right after the line break.
+        TokStartOffset = LineEnd + 1;
+        Result.emplace_back();
+        Out = &Result.back();
+        *Out = Result[Result.size() - 2];
+        // New token starts at the first column of the next line.
+        Out->deltaLine = 1;
+        Out->deltaStart = 0;
+      }
+      // This is the token on last line.
+      Out->length = Tok.R.end.character;
+      // Update the start location for last token, as that's used in the
+      // relative delta calculation for following tokens.
+      Scratch = *Last;
+      Scratch.R.start.line = Tok.R.end.line;
+      Scratch.R.start.character = 0;
+      Last = &Scratch;
+    }
   }
   return Result;
 }

diff  --git a/clang-tools-extra/clangd/SemanticHighlighting.h b/clang-tools-extra/clangd/SemanticHighlighting.h
index b44aa505b4ba3..17863fce5ae87 100644
--- a/clang-tools-extra/clangd/SemanticHighlighting.h
+++ b/clang-tools-extra/clangd/SemanticHighlighting.h
@@ -21,6 +21,7 @@
 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SEMANTICHIGHLIGHTING_H
 
 #include "Protocol.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/raw_ostream.h"
 
 namespace clang {
@@ -101,7 +102,8 @@ bool operator<(const HighlightingToken &L, const HighlightingToken &R);
 // main AST.
 std::vector<HighlightingToken> getSemanticHighlightings(ParsedAST &AST);
 
-std::vector<SemanticToken> toSemanticTokens(llvm::ArrayRef<HighlightingToken>);
+std::vector<SemanticToken> toSemanticTokens(llvm::ArrayRef<HighlightingToken>,
+                                            llvm::StringRef Code);
 llvm::StringRef toSemanticTokenType(HighlightingKind Kind);
 llvm::StringRef toSemanticTokenModifier(HighlightingModifier Modifier);
 std::vector<SemanticTokensEdit> 
diff Tokens(llvm::ArrayRef<SemanticToken> Before,

diff  --git a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp
index f60c62500c7de..b877da9755a3a 100644
--- a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp
@@ -944,7 +944,7 @@ TEST(SemanticHighlighting, toSemanticTokens) {
   )");
   Tokens.front().Modifiers |= unsigned(HighlightingModifier::Declaration);
   Tokens.front().Modifiers |= unsigned(HighlightingModifier::Readonly);
-  auto Results = toSemanticTokens(Tokens);
+  auto Results = toSemanticTokens(Tokens, /*Code=*/"");
 
   ASSERT_THAT(Results, SizeIs(3));
   EXPECT_EQ(Results[0].tokenType, unsigned(HighlightingKind::Variable));
@@ -972,13 +972,15 @@ TEST(SemanticHighlighting, 
diff SemanticTokens) {
   auto Before = toSemanticTokens(tokens(R"(
     [[foo]] [[bar]] [[baz]]
     [[one]] [[two]] [[three]]
-  )"));
+  )"),
+                                 /*Code=*/"");
   EXPECT_THAT(
diff Tokens(Before, Before), IsEmpty());
 
   auto After = toSemanticTokens(tokens(R"(
     [[foo]] [[hello]] [[world]] [[baz]]
     [[one]] [[two]] [[three]]
-  )"));
+  )"),
+                                /*Code=*/"");
 
   // Replace [bar, baz] with [hello, world, baz]
   auto Diff = 
diff Tokens(Before, After);
@@ -1000,6 +1002,30 @@ TEST(SemanticHighlighting, 
diff SemanticTokens) {
   EXPECT_EQ(3u, Diff.front().tokens[2].length);
 }
 
+TEST(SemanticHighlighting, MultilineTokens) {
+  llvm::StringRef AnnotatedCode = R"cpp(
+  [[fo
+o
+o]] [[bar]])cpp";
+  auto Toks = toSemanticTokens(tokens(AnnotatedCode),
+                               Annotations(AnnotatedCode).code());
+  ASSERT_THAT(Toks, SizeIs(4));
+  // foo
+  EXPECT_EQ(Toks[0].deltaLine, 1u);
+  EXPECT_EQ(Toks[0].deltaStart, 2u);
+  EXPECT_EQ(Toks[0].length, 2u);
+  EXPECT_EQ(Toks[1].deltaLine, 1u);
+  EXPECT_EQ(Toks[1].deltaStart, 0u);
+  EXPECT_EQ(Toks[1].length, 1u);
+  EXPECT_EQ(Toks[2].deltaLine, 1u);
+  EXPECT_EQ(Toks[2].deltaStart, 0u);
+  EXPECT_EQ(Toks[2].length, 1u);
+
+  // bar
+  EXPECT_EQ(Toks[3].deltaLine, 0u);
+  EXPECT_EQ(Toks[3].deltaStart, 2u);
+  EXPECT_EQ(Toks[3].length, 3u);
+}
 } // namespace
 } // namespace clangd
 } // namespace clang