[clang] 9f0fa65 - Comment parsing: Don't recognize commands in single-line double quotation

Aaron Puchert via cfe-commits cfe-commits at lists.llvm.org
Fri Jan 14 13:48:25 PST 2022


Author: Aaron Puchert
Date: 2022-01-14T22:46:07+01:00
New Revision: 9f0fa6544012ed8f7b6b3d72fce6535bf4430e40

URL: https://github.com/llvm/llvm-project/commit/9f0fa6544012ed8f7b6b3d72fce6535bf4430e40
DIFF: https://github.com/llvm/llvm-project/commit/9f0fa6544012ed8f7b6b3d72fce6535bf4430e40.diff

LOG: Comment parsing: Don't recognize commands in single-line double quotation

This is consistent with the behavior of Doxygen, and allows users to
write strings with C escapes or document input/output formats containing
special characters (@ or \) without escaping them, which might be
confusing. For example, if a function wants to document its expected
input format as "user at host" it doesn't have to write user\@host instead,
which would look right in the documentation but confusing in the code.
Now users can just use double quotes (which they might do anyway).

This fixes a lot of false positives of -Wdocumentation-unknown-command,
but it could also fix issues with -Wdocumentation if the text triggers
an actual command.

Reviewed By: gribozavr2

Differential Revision: https://reviews.llvm.org/D116190

Added: 
    

Modified: 
    clang/include/clang/AST/CommentLexer.h
    clang/lib/AST/CommentLexer.cpp
    clang/test/Sema/warn-documentation-unknown-command.cpp
    clang/test/Sema/warn-documentation.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/AST/CommentLexer.h b/clang/include/clang/AST/CommentLexer.h
index 94f778501e758..9aa1681cb2c5c 100644
--- a/clang/include/clang/AST/CommentLexer.h
+++ b/clang/include/clang/AST/CommentLexer.h
@@ -320,6 +320,9 @@ class Lexer {
   /// Eat string matching regexp \code \s*\* \endcode.
   void skipLineStartingDecorations();
 
+  /// Skip over pure text.
+  const char *skipTextToken();
+
   /// Lex comment text, including commands if ParseCommands is set to true.
   void lexCommentText(Token &T);
 

diff  --git a/clang/lib/AST/CommentLexer.cpp b/clang/lib/AST/CommentLexer.cpp
index 6e00c2aa7c280..61ce8979f13f5 100644
--- a/clang/lib/AST/CommentLexer.cpp
+++ b/clang/lib/AST/CommentLexer.cpp
@@ -270,6 +270,29 @@ void Lexer::formTokenWithChars(Token &Result, const char *TokEnd,
   BufferPtr = TokEnd;
 }
 
+const char *Lexer::skipTextToken() {
+  const char *TokenPtr = BufferPtr;
+  assert(TokenPtr < CommentEnd);
+  StringRef TokStartSymbols = ParseCommands ? "\n\r\\@\"&<" : "\n\r";
+
+again:
+  size_t End =
+      StringRef(TokenPtr, CommentEnd - TokenPtr).find_first_of(TokStartSymbols);
+  if (End == StringRef::npos)
+    return CommentEnd;
+
+  // Doxygen doesn't recognize any commands in a one-line double quotation.
+  // If we don't find an ending quotation mark, we pretend it never began.
+  if (*(TokenPtr + End) == '\"') {
+    TokenPtr += End + 1;
+    End = StringRef(TokenPtr, CommentEnd - TokenPtr).find_first_of("\n\r\"");
+    if (End != StringRef::npos && *(TokenPtr + End) == '\"')
+      TokenPtr += End + 1;
+    goto again;
+  }
+  return TokenPtr + End;
+}
+
 void Lexer::lexCommentText(Token &T) {
   assert(CommentState == LCS_InsideBCPLComment ||
          CommentState == LCS_InsideCComment);
@@ -290,17 +313,8 @@ void Lexer::lexCommentText(Token &T) {
             skipLineStartingDecorations();
           return;
 
-      default: {
-          StringRef TokStartSymbols = ParseCommands ? "\n\r\\@&<" : "\n\r";
-          size_t End = StringRef(TokenPtr, CommentEnd - TokenPtr)
-                           .find_first_of(TokStartSymbols);
-          if (End != StringRef::npos)
-            TokenPtr += End;
-          else
-            TokenPtr = CommentEnd;
-          formTextToken(T, TokenPtr);
-          return;
-      }
+      default:
+        return formTextToken(T, skipTextToken());
     }
   };
 

diff  --git a/clang/test/Sema/warn-documentation-unknown-command.cpp b/clang/test/Sema/warn-documentation-unknown-command.cpp
index 4328c9682f212..2cb261d627c56 100644
--- a/clang/test/Sema/warn-documentation-unknown-command.cpp
+++ b/clang/test/Sema/warn-documentation-unknown-command.cpp
@@ -9,6 +9,15 @@ int test_unknown_comand_1;
 /// \retur aaa
 int test_unknown_comand_2();
 
+/// We don't recognize commands in double quotes: "\n\t @unknown2".
+int test_unknown_comand_3();
+
+// expected-warning at +2 {{unknown command tag name}}
+// expected-warning at +2 {{unknown command tag name}}
+/// But it has to be a single line: "\unknown3
+/// @unknown4" (Doxygen treats multi-line quotes inconsistently.)
+int test_unknown_comand_4();
+
 // RUN: c-index-test -test-load-source all -Wdocumentation-unknown-command %s > /dev/null 2> %t.err
 // RUN: FileCheck < %t.err -check-prefix=CHECK-RANGE %s
 // CHECK-RANGE: warn-documentation-unknown-command.cpp:5:9:{5:9-5:17}: warning: unknown command tag name

diff  --git a/clang/test/Sema/warn-documentation.cpp b/clang/test/Sema/warn-documentation.cpp
index 7243e791bba60..353c94a47eb6f 100644
--- a/clang/test/Sema/warn-documentation.cpp
+++ b/clang/test/Sema/warn-documentation.cpp
@@ -125,6 +125,16 @@ int test_block_command5(int);
 /// \brief \c Aaa
 int test_block_command6(int);
 
+// We don't recognize comments in double quotes.
+/// "\brief \returns Aaa"
+int test_block_command7(int);
+
+// But only if they're single-line. (Doxygen treats multi-line quotes inconsistently.)
+// expected-warning at +1 {{empty paragraph passed to '\brief' command}}
+/// "\brief
+/// \returns Aaa"
+int test_block_command8(int);
+
 // expected-warning at +5 {{duplicated command '\brief'}} expected-note at +1 {{previous command '\brief' here}}
 /// \brief Aaa
 ///


        


More information about the cfe-commits mailing list