[clang-tools-extra] 216af81 - [clangd] Fix invalid UTF8 when extracting doc comments.
Sam McCall via cfe-commits
cfe-commits at lists.llvm.org
Wed Sep 30 07:05:19 PDT 2020
Author: Sam McCall
Date: 2020-09-30T16:05:12+02:00
New Revision: 216af81c39d1cc4e90af7b991d517c4c7acc912e
URL: https://github.com/llvm/llvm-project/commit/216af81c39d1cc4e90af7b991d517c4c7acc912e
DIFF: https://github.com/llvm/llvm-project/commit/216af81c39d1cc4e90af7b991d517c4c7acc912e.diff
LOG: [clangd] Fix invalid UTF8 when extracting doc comments.
Differential Revision: https://reviews.llvm.org/D88567
Added:
Modified:
clang-tools-extra/clangd/CodeCompletionStrings.cpp
clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp
clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp
Removed:
################################################################################
diff --git a/clang-tools-extra/clangd/CodeCompletionStrings.cpp b/clang-tools-extra/clangd/CodeCompletionStrings.cpp
index ef44c153425a..d4a3bdafcae0 100644
--- a/clang-tools-extra/clangd/CodeCompletionStrings.cpp
+++ b/clang-tools-extra/clangd/CodeCompletionStrings.cpp
@@ -12,6 +12,7 @@
#include "clang/AST/RawCommentList.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Sema/CodeCompleteConsumer.h"
+#include "llvm/Support/JSON.h"
#include <limits>
#include <utility>
@@ -86,7 +87,12 @@ std::string getDeclComment(const ASTContext &Ctx, const NamedDecl &Decl) {
assert(!Ctx.getSourceManager().isLoadedSourceLocation(RC->getBeginLoc()));
std::string Doc =
RC->getFormattedText(Ctx.getSourceManager(), Ctx.getDiagnostics());
- return looksLikeDocComment(Doc) ? Doc : "";
+ if (!looksLikeDocComment(Doc))
+ return "";
+ // Clang requires source to be UTF-8, but doesn't enforce this in comments.
+ if (!llvm::json::isUTF8(Doc))
+ Doc = llvm::json::fixUTF8(Doc);
+ return Doc;
}
void getSignature(const CodeCompletionString &CCS, std::string *Signature,
diff --git a/clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp
index 2531922a5ca1..7aace938b70c 100644
--- a/clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp
+++ b/clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "CodeCompletionStrings.h"
+#include "TestTU.h"
#include "clang/Sema/CodeCompleteConsumer.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
@@ -56,6 +57,14 @@ TEST_F(CompletionStringTest, DocumentationWithAnnotation) {
"Annotation: Ano\n\nIs this brief?");
}
+TEST_F(CompletionStringTest, GetDeclCommentBadUTF8) {
+ // <ff> is not a valid byte here, should be replaced by encoded <U+FFFD>.
+ auto TU = TestTU::withCode("/*x\xffy*/ struct X;");
+ auto AST = TU.build();
+ EXPECT_EQ("x\xef\xbf\xbdy",
+ getDeclComment(AST.getASTContext(), findDecl(AST, "X")));
+}
+
TEST_F(CompletionStringTest, MultipleAnnotations) {
Builder.AddAnnotation("Ano1");
Builder.AddAnnotation("Ano2");
diff --git a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp
index 3940946d8016..80995baf946f 100644
--- a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp
@@ -1606,11 +1606,11 @@ TEST_F(SymbolCollectorTest, BadUTF8) {
// Extracted from boost/spirit/home/support/char_encoding/iso8859_1.hpp
// This looks like UTF-8 and fools clang, but has high-ISO-8859-1 comments.
const char *Header = "int PUNCT = 0;\n"
- "int types[] = { /* \xa1 */PUNCT };";
+ "/* \xa1 */ int types[] = { /* \xa1 */PUNCT };";
CollectorOpts.RefFilter = RefKind::All;
CollectorOpts.RefsInHeaders = true;
runSymbolCollector(Header, "");
- EXPECT_THAT(Symbols, Contains(QName("types")));
+ EXPECT_THAT(Symbols, Contains(AllOf(QName("types"), Doc("\xef\xbf\xbd "))));
EXPECT_THAT(Symbols, Contains(QName("PUNCT")));
// Reference is stored, although offset within line is not reliable.
EXPECT_THAT(Refs, Contains(Pair(findSymbol(Symbols, "PUNCT").ID, _)));
More information about the cfe-commits
mailing list