[clang] 9f0fa65 - Comment parsing: Don't recognize commands in single-line double quotation
Aaron Puchert via cfe-commits
cfe-commits at lists.llvm.org
Fri Jan 14 13:48:25 PST 2022
Author: Aaron Puchert
Date: 2022-01-14T22:46:07+01:00
New Revision: 9f0fa6544012ed8f7b6b3d72fce6535bf4430e40
URL: https://github.com/llvm/llvm-project/commit/9f0fa6544012ed8f7b6b3d72fce6535bf4430e40
DIFF: https://github.com/llvm/llvm-project/commit/9f0fa6544012ed8f7b6b3d72fce6535bf4430e40.diff
LOG: Comment parsing: Don't recognize commands in single-line double quotation
This is consistent with the behavior of Doxygen, and allows users to
write strings with C escapes or document input/output formats containing
special characters (@ or \) without escaping them, which might be
confusing. For example, if a function wants to document its expected
input format as "user at host" it doesn't have to write user\@host instead,
which would look right in the documentation but confusing in the code.
Now users can just use double quotes (which they might do anyway).
This fixes a lot of false positives of -Wdocumentation-unknown-command,
but it could also fix issues with -Wdocumentation if the text triggers
an actual command.
Reviewed By: gribozavr2
Differential Revision: https://reviews.llvm.org/D116190
Added:
Modified:
clang/include/clang/AST/CommentLexer.h
clang/lib/AST/CommentLexer.cpp
clang/test/Sema/warn-documentation-unknown-command.cpp
clang/test/Sema/warn-documentation.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/AST/CommentLexer.h b/clang/include/clang/AST/CommentLexer.h
index 94f778501e758..9aa1681cb2c5c 100644
--- a/clang/include/clang/AST/CommentLexer.h
+++ b/clang/include/clang/AST/CommentLexer.h
@@ -320,6 +320,9 @@ class Lexer {
/// Eat string matching regexp \code \s*\* \endcode.
void skipLineStartingDecorations();
+ /// Skip over pure text.
+ const char *skipTextToken();
+
/// Lex comment text, including commands if ParseCommands is set to true.
void lexCommentText(Token &T);
diff --git a/clang/lib/AST/CommentLexer.cpp b/clang/lib/AST/CommentLexer.cpp
index 6e00c2aa7c280..61ce8979f13f5 100644
--- a/clang/lib/AST/CommentLexer.cpp
+++ b/clang/lib/AST/CommentLexer.cpp
@@ -270,6 +270,29 @@ void Lexer::formTokenWithChars(Token &Result, const char *TokEnd,
BufferPtr = TokEnd;
}
+const char *Lexer::skipTextToken() {
+ const char *TokenPtr = BufferPtr;
+ assert(TokenPtr < CommentEnd);
+ StringRef TokStartSymbols = ParseCommands ? "\n\r\\@\"&<" : "\n\r";
+
+again:
+ size_t End =
+ StringRef(TokenPtr, CommentEnd - TokenPtr).find_first_of(TokStartSymbols);
+ if (End == StringRef::npos)
+ return CommentEnd;
+
+ // Doxygen doesn't recognize any commands in a one-line double quotation.
+ // If we don't find an ending quotation mark, we pretend it never began.
+ if (*(TokenPtr + End) == '\"') {
+ TokenPtr += End + 1;
+ End = StringRef(TokenPtr, CommentEnd - TokenPtr).find_first_of("\n\r\"");
+ if (End != StringRef::npos && *(TokenPtr + End) == '\"')
+ TokenPtr += End + 1;
+ goto again;
+ }
+ return TokenPtr + End;
+}
+
void Lexer::lexCommentText(Token &T) {
assert(CommentState == LCS_InsideBCPLComment ||
CommentState == LCS_InsideCComment);
@@ -290,17 +313,8 @@ void Lexer::lexCommentText(Token &T) {
skipLineStartingDecorations();
return;
- default: {
- StringRef TokStartSymbols = ParseCommands ? "\n\r\\@&<" : "\n\r";
- size_t End = StringRef(TokenPtr, CommentEnd - TokenPtr)
- .find_first_of(TokStartSymbols);
- if (End != StringRef::npos)
- TokenPtr += End;
- else
- TokenPtr = CommentEnd;
- formTextToken(T, TokenPtr);
- return;
- }
+ default:
+ return formTextToken(T, skipTextToken());
}
};
diff --git a/clang/test/Sema/warn-documentation-unknown-command.cpp b/clang/test/Sema/warn-documentation-unknown-command.cpp
index 4328c9682f212..2cb261d627c56 100644
--- a/clang/test/Sema/warn-documentation-unknown-command.cpp
+++ b/clang/test/Sema/warn-documentation-unknown-command.cpp
@@ -9,6 +9,15 @@ int test_unknown_comand_1;
/// \retur aaa
int test_unknown_comand_2();
+/// We don't recognize commands in double quotes: "\n\t @unknown2".
+int test_unknown_comand_3();
+
+// expected-warning at +2 {{unknown command tag name}}
+// expected-warning at +2 {{unknown command tag name}}
+/// But it has to be a single line: "\unknown3
+/// @unknown4" (Doxygen treats multi-line quotes inconsistently.)
+int test_unknown_comand_4();
+
// RUN: c-index-test -test-load-source all -Wdocumentation-unknown-command %s > /dev/null 2> %t.err
// RUN: FileCheck < %t.err -check-prefix=CHECK-RANGE %s
// CHECK-RANGE: warn-documentation-unknown-command.cpp:5:9:{5:9-5:17}: warning: unknown command tag name
diff --git a/clang/test/Sema/warn-documentation.cpp b/clang/test/Sema/warn-documentation.cpp
index 7243e791bba60..353c94a47eb6f 100644
--- a/clang/test/Sema/warn-documentation.cpp
+++ b/clang/test/Sema/warn-documentation.cpp
@@ -125,6 +125,16 @@ int test_block_command5(int);
/// \brief \c Aaa
int test_block_command6(int);
+// We don't recognize comments in double quotes.
+/// "\brief \returns Aaa"
+int test_block_command7(int);
+
+// But only if they're single-line. (Doxygen treats multi-line quotes inconsistently.)
+// expected-warning at +1 {{empty paragraph passed to '\brief' command}}
+/// "\brief
+/// \returns Aaa"
+int test_block_command8(int);
+
// expected-warning at +5 {{duplicated command '\brief'}} expected-note at +1 {{previous command '\brief' here}}
/// \brief Aaa
///
More information about the cfe-commits
mailing list