[PATCH] D88567: [clangd] Fix invalid UTF8 when extracting doc comments.

Sam McCall via Phabricator via cfe-commits cfe-commits at lists.llvm.org
Wed Sep 30 06:45:26 PDT 2020


sammccall created this revision.
sammccall added a reviewer: kadircet.
Herald added subscribers: cfe-commits, usaxena95, arphaman.
Herald added a project: clang.
sammccall requested review of this revision.
Herald added subscribers: MaskRay, ilya-biryukov.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D88567

Files:
  clang-tools-extra/clangd/CodeCompletionStrings.cpp
  clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp


Index: clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp
===================================================================
--- clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp
+++ clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodeCompletionStrings.h"
+#include "TestTU.h"
 #include "clang/Sema/CodeCompleteConsumer.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
@@ -56,6 +57,14 @@
             "Annotation: Ano\n\nIs this brief?");
 }
 
+TEST_F(CompletionStringTest, GetDeclCommentBadUTF8) {
+  // <ff> is not a valid byte here, should be replaced by encoded <U+FFFD>.
+  auto TU = TestTU::withCode("/*x\xffy*/ struct X;");
+  auto AST = TU.build();
+  EXPECT_EQ("x\xef\xbf\xbdy",
+            getDeclComment(AST.getASTContext(), findDecl(AST, "X")));
+}
+
 TEST_F(CompletionStringTest, MultipleAnnotations) {
   Builder.AddAnnotation("Ano1");
   Builder.AddAnnotation("Ano2");
Index: clang-tools-extra/clangd/CodeCompletionStrings.cpp
===================================================================
--- clang-tools-extra/clangd/CodeCompletionStrings.cpp
+++ clang-tools-extra/clangd/CodeCompletionStrings.cpp
@@ -12,6 +12,7 @@
 #include "clang/AST/RawCommentList.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Sema/CodeCompleteConsumer.h"
+#include "llvm/Support/JSON.h"
 #include <limits>
 #include <utility>
 
@@ -86,7 +87,12 @@
   assert(!Ctx.getSourceManager().isLoadedSourceLocation(RC->getBeginLoc()));
   std::string Doc =
       RC->getFormattedText(Ctx.getSourceManager(), Ctx.getDiagnostics());
-  return looksLikeDocComment(Doc) ? Doc : "";
+  if (!looksLikeDocComment(Doc))
+    return "";
+  // Clang requires source to be UTF-8, but doesn't enforce this in comments.
+  if (!llvm::json::isUTF8(Doc))
+    Doc = llvm::json::fixUTF8(Doc);
+  return Doc;
 }
 
 void getSignature(const CodeCompletionString &CCS, std::string *Signature,


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D88567.295265.patch
Type: text/x-patch
Size: 2039 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20200930/9c5df2a7/attachment.bin>


More information about the cfe-commits mailing list