[clang] [Clang][Comments] Support for parsing headers in Doxygen \par commands (PR #91100)
via cfe-commits
cfe-commits at lists.llvm.org
Sat Jun 15 13:53:18 PDT 2024
https://github.com/hdoc updated https://github.com/llvm/llvm-project/pull/91100
>From e7f88b7fb3137a7fc24d082b9ac7765400a96644 Mon Sep 17 00:00:00 2001
From: hdoc <github at hdoc.io>
Date: Sat, 4 May 2024 18:50:16 -0700
Subject: [PATCH 1/5] Support for parsing headers in Doxygen \par commands
---
.../include/clang/AST/CommentCommandTraits.h | 4 +
clang/include/clang/AST/CommentCommands.td | 3 +-
clang/include/clang/AST/CommentParser.h | 4 +-
clang/lib/AST/CommentParser.cpp | 77 ++++++++++
clang/test/Index/comment-misc-tags.m | 8 +-
clang/unittests/AST/CommentParser.cpp | 137 ++++++++++++++++++
.../ClangCommentCommandInfoEmitter.cpp | 1 +
7 files changed, 227 insertions(+), 7 deletions(-)
diff --git a/clang/include/clang/AST/CommentCommandTraits.h b/clang/include/clang/AST/CommentCommandTraits.h
index 0c3254d84eb00..78c484fff3aed 100644
--- a/clang/include/clang/AST/CommentCommandTraits.h
+++ b/clang/include/clang/AST/CommentCommandTraits.h
@@ -88,6 +88,10 @@ struct CommandInfo {
LLVM_PREFERRED_TYPE(bool)
unsigned IsHeaderfileCommand : 1;
+ /// True if this is a \\par command.
+ LLVM_PREFERRED_TYPE(bool)
+ unsigned IsParCommand : 1;
+
/// True if we don't want to warn about this command being passed an empty
/// paragraph. Meaningful only for block commands.
LLVM_PREFERRED_TYPE(bool)
diff --git a/clang/include/clang/AST/CommentCommands.td b/clang/include/clang/AST/CommentCommands.td
index 06b2fa9b5531c..a410cd4039bee 100644
--- a/clang/include/clang/AST/CommentCommands.td
+++ b/clang/include/clang/AST/CommentCommands.td
@@ -18,6 +18,7 @@ class Command<string name> {
bit IsThrowsCommand = 0;
bit IsDeprecatedCommand = 0;
bit IsHeaderfileCommand = 0;
+ bit IsParCommand = 0;
bit IsEmptyParagraphAllowed = 0;
@@ -156,7 +157,7 @@ def Date : BlockCommand<"date">;
def Invariant : BlockCommand<"invariant">;
def Li : BlockCommand<"li">;
def Note : BlockCommand<"note">;
-def Par : BlockCommand<"par">;
+def Par : BlockCommand<"par"> { let IsParCommand = 1; let NumArgs = 1; }
def Post : BlockCommand<"post">;
def Pre : BlockCommand<"pre">;
def Remark : BlockCommand<"remark">;
diff --git a/clang/include/clang/AST/CommentParser.h b/clang/include/clang/AST/CommentParser.h
index a2d0e30835e2c..289f0b2c066b9 100644
--- a/clang/include/clang/AST/CommentParser.h
+++ b/clang/include/clang/AST/CommentParser.h
@@ -105,6 +105,9 @@ class Parser {
ArrayRef<Comment::Argument>
parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs);
+ ArrayRef<Comment::Argument>
+ parseParCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs);
+
BlockCommandComment *parseBlockCommand();
InlineCommandComment *parseInlineCommand();
@@ -123,4 +126,3 @@ class Parser {
} // end namespace clang
#endif
-
diff --git a/clang/lib/AST/CommentParser.cpp b/clang/lib/AST/CommentParser.cpp
index 5baf81a509fb6..bbe93ebc37d13 100644
--- a/clang/lib/AST/CommentParser.cpp
+++ b/clang/lib/AST/CommentParser.cpp
@@ -222,6 +222,63 @@ class TextTokenRetokenizer {
return true;
}
+ /// Check if this line starts with @par or \par
+ bool startsWithParCommand() {
+ unsigned Offset = 1;
+
+ /// Skip all whitespace characters at the beginning.
+ /// This needs to backtrack because Pos has already advanced past the
+ /// actual \par or @par command by the time this function is called.
+ while (isWhitespace(*(Pos.BufferPtr - Offset)))
+ Offset++;
+
+ /// Check if next four characters are \par or @par
+ llvm::StringRef LineStart(Pos.BufferPtr - 5, 4);
+ return LineStart.starts_with("\\par") || LineStart.starts_with("@par");
+ }
+
+ /// Extract a par command argument-header.
+ bool lexParHeading(Token &Tok) {
+ if (isEnd())
+ return false;
+
+ Position SavedPos = Pos;
+
+ consumeWhitespace();
+ SmallString<32> WordText;
+ const char *WordBegin = Pos.BufferPtr;
+ SourceLocation Loc = getSourceLocation();
+
+ if (!startsWithParCommand())
+ return false;
+
+ // Read until the end of this token, which is effectively the end of the
+ // line This gets us the content of the par header, if there is one.
+ while (!isEnd()) {
+ WordText.push_back(peek());
+ if (Pos.BufferPtr + 1 == Pos.BufferEnd) {
+ consumeChar();
+ break;
+ } else {
+ consumeChar();
+ }
+ }
+
+ const unsigned Length = WordText.size();
+ if (Length == 0) {
+ Pos = SavedPos;
+ return false;
+ }
+
+ char *TextPtr = Allocator.Allocate<char>(Length + 1);
+
+ memcpy(TextPtr, WordText.c_str(), Length + 1);
+ StringRef Text = StringRef(TextPtr, Length);
+
+ formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
+ return true;
+ }
+
/// Extract a word -- sequence of non-whitespace characters.
bool lexWord(Token &Tok) {
if (isEnd())
@@ -394,6 +451,23 @@ Parser::parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer,
return llvm::ArrayRef(Args, ParsedArgs);
}
+ArrayRef<Comment::Argument>
+Parser::parseParCommandArgs(TextTokenRetokenizer &Retokenizer,
+ unsigned NumArgs) {
+ auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs))
+ Comment::Argument[NumArgs];
+ unsigned ParsedArgs = 0;
+ Token Arg;
+
+ while (ParsedArgs < NumArgs && Retokenizer.lexParHeading(Arg)) {
+ Args[ParsedArgs] = Comment::Argument{
+ SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()};
+ ParsedArgs++;
+ }
+
+ return llvm::ArrayRef(Args, ParsedArgs);
+}
+
BlockCommandComment *Parser::parseBlockCommand() {
assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
@@ -449,6 +523,9 @@ BlockCommandComment *Parser::parseBlockCommand() {
else if (Info->IsThrowsCommand)
S.actOnBlockCommandArgs(
BC, parseThrowCommandArgs(Retokenizer, Info->NumArgs));
+ else if (Info->IsParCommand)
+ S.actOnBlockCommandArgs(BC,
+ parseParCommandArgs(Retokenizer, Info->NumArgs));
else
S.actOnBlockCommandArgs(BC, parseCommandArgs(Retokenizer, Info->NumArgs));
diff --git a/clang/test/Index/comment-misc-tags.m b/clang/test/Index/comment-misc-tags.m
index 47ee9d9aa392a..6d018dbfcf193 100644
--- a/clang/test/Index/comment-misc-tags.m
+++ b/clang/test/Index/comment-misc-tags.m
@@ -91,18 +91,16 @@ @interface IOCommandGate
struct Test {int filler;};
-// CHECK: (CXComment_BlockCommand CommandName=[par]
+// CHECK: (CXComment_BlockCommand CommandName=[par] Arg[0]=User defined paragraph:
// CHECK-NEXT: (CXComment_Paragraph
-// CHECK-NEXT: (CXComment_Text Text=[ User defined paragraph:] HasTrailingNewline)
// CHECK-NEXT: (CXComment_Text Text=[ Contents of the paragraph.])))
// CHECK: (CXComment_BlockCommand CommandName=[par]
// CHECK-NEXT: (CXComment_Paragraph
-// CHECK-NEXT: (CXComment_Text Text=[ New paragraph under the same heading.])))
+// CHECK-NEXT: (CXComment_Text Text=[New paragraph under the same heading.])))
// CHECK: (CXComment_BlockCommand CommandName=[note]
// CHECK-NEXT: (CXComment_Paragraph
// CHECK-NEXT: (CXComment_Text Text=[ This note consists of two paragraphs.] HasTrailingNewline)
// CHECK-NEXT: (CXComment_Text Text=[ This is the first paragraph.])))
// CHECK: (CXComment_BlockCommand CommandName=[par]
// CHECK-NEXT: (CXComment_Paragraph
-// CHECK-NEXT: (CXComment_Text Text=[ And this is the second paragraph.])))
-
+// CHECK-NEXT: (CXComment_Text Text=[And this is the second paragraph.])))
diff --git a/clang/unittests/AST/CommentParser.cpp b/clang/unittests/AST/CommentParser.cpp
index 1c57c899f9074..e0df182d430c3 100644
--- a/clang/unittests/AST/CommentParser.cpp
+++ b/clang/unittests/AST/CommentParser.cpp
@@ -1639,6 +1639,143 @@ TEST_F(CommentParserTest, ThrowsCommandHasArg9) {
}
}
+TEST_F(CommentParserTest, ParCommandHasArg1) {
+ const char *Sources[] = {
+ "/// @par Paragraph header:", "/// @par Paragraph header:\n",
+ "/// @par Paragraph header:\r\n", "/// @par Paragraph header:\n\r",
+ "/** @par Paragraph header:*/",
+ };
+
+ for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+ FullComment *FC = parseString(Sources[i]);
+ ASSERT_TRUE(HasChildCount(FC, 2));
+
+ ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+ {
+ BlockCommandComment *BCC;
+ ParagraphComment *PC;
+ ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "par", PC));
+ ASSERT_TRUE(HasChildCount(PC, 0));
+ ASSERT_TRUE(BCC->getNumArgs() == 1);
+ ASSERT_TRUE(BCC->getArgText(0) == "Paragraph header:");
+ }
+ }
+}
+
+TEST_F(CommentParserTest, ParCommandHasArg2) {
+ const char *Sources[] = {
+ "/// @par Paragraph header: ", "/// @par Paragraph header: \n",
+ "/// @par Paragraph header: \r\n", "/// @par Paragraph header: \n\r",
+ "/** @par Paragraph header: */",
+ };
+
+ for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+ FullComment *FC = parseString(Sources[i]);
+ ASSERT_TRUE(HasChildCount(FC, 2));
+
+ ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+ {
+ BlockCommandComment *BCC;
+ ParagraphComment *PC;
+ ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "par", PC));
+ ASSERT_TRUE(HasChildCount(PC, 0));
+ ASSERT_TRUE(BCC->getNumArgs() == 1);
+ ASSERT_TRUE(BCC->getArgText(0) == "Paragraph header: ");
+ }
+ }
+}
+
+TEST_F(CommentParserTest, ParCommandHasArg3) {
+ const char *Sources[] = {
+ ("/// @par Paragraph header:\n"
+ "/// Paragraph body"),
+ ("/// @par Paragraph header:\r\n"
+ "/// Paragraph body"),
+ ("/// @par Paragraph header:\n\r"
+ "/// Paragraph body"),
+ };
+
+ for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+ FullComment *FC = parseString(Sources[i]);
+ ASSERT_TRUE(HasChildCount(FC, 2));
+
+ ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+ {
+ BlockCommandComment *BCC;
+ ParagraphComment *PC;
+ TextComment *TC;
+ ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "par", PC));
+ ASSERT_TRUE(HasChildCount(PC, 1));
+ ASSERT_TRUE(BCC->getNumArgs() == 1);
+ ASSERT_TRUE(BCC->getArgText(0) == "Paragraph header:");
+ ASSERT_TRUE(GetChildAt(PC, 0, TC));
+ ASSERT_TRUE(TC->getText() == " Paragraph body");
+ }
+ }
+}
+
+TEST_F(CommentParserTest, ParCommandHasArg4) {
+ const char *Sources[] = {
+ ("/// @par Paragraph header:\n"
+ "/// Paragraph body1\n"
+ "/// Paragraph body2"),
+ ("/// @par Paragraph header:\r\n"
+ "/// Paragraph body1\n"
+ "/// Paragraph body2"),
+ ("/// @par Paragraph header:\n\r"
+ "/// Paragraph body1\n"
+ "/// Paragraph body2"),
+ };
+
+ for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+ FullComment *FC = parseString(Sources[i]);
+ ASSERT_TRUE(HasChildCount(FC, 2));
+
+ ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+ {
+ BlockCommandComment *BCC;
+ ParagraphComment *PC;
+ TextComment *TC;
+ ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "par", PC));
+ ASSERT_TRUE(HasChildCount(PC, 2));
+ ASSERT_TRUE(BCC->getNumArgs() == 1);
+ ASSERT_TRUE(BCC->getArgText(0) == "Paragraph header:");
+ ASSERT_TRUE(GetChildAt(PC, 0, TC));
+ ASSERT_TRUE(TC->getText() == " Paragraph body1");
+ ASSERT_TRUE(GetChildAt(PC, 1, TC));
+ ASSERT_TRUE(TC->getText() == " Paragraph body2");
+ }
+ }
+}
+
+TEST_F(CommentParserTest, ParCommandHasArg5) {
+ const char *Sources[] = {
+ ("/// @par \n"
+ "/// Paragraphs with no text before newline have no heading"),
+ ("/// @par \r\n"
+ "/// Paragraphs with no text before newline have no heading"),
+ ("/// @par \n\r"
+ "/// Paragraphs with no text before newline have no heading"),
+ };
+
+ for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+ FullComment *FC = parseString(Sources[i]);
+ ASSERT_TRUE(HasChildCount(FC, 2));
+
+ ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+ {
+ BlockCommandComment *BCC;
+ ParagraphComment *PC;
+ TextComment *TC;
+ ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "par", PC));
+ ASSERT_TRUE(HasChildCount(PC, 1));
+ ASSERT_TRUE(BCC->getNumArgs() == 0);
+ ASSERT_TRUE(GetChildAt(PC, 0, TC));
+ ASSERT_TRUE(TC->getText() ==
+ "Paragraphs with no text before newline have no heading");
+ }
+ }
+}
} // unnamed namespace
diff --git a/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp b/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp
index a113b02e19995..07b26dc2f6b8b 100644
--- a/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp
+++ b/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp
@@ -44,6 +44,7 @@ void clang::EmitClangCommentCommandInfo(RecordKeeper &Records,
<< Tag.getValueAsBit("IsThrowsCommand") << ", "
<< Tag.getValueAsBit("IsDeprecatedCommand") << ", "
<< Tag.getValueAsBit("IsHeaderfileCommand") << ", "
+ << Tag.getValueAsBit("IsParCommand") << ", "
<< Tag.getValueAsBit("IsEmptyParagraphAllowed") << ", "
<< Tag.getValueAsBit("IsVerbatimBlockCommand") << ", "
<< Tag.getValueAsBit("IsVerbatimBlockEndCommand") << ", "
>From 21705de908ae93272e8c39c2aaef49ea1f086ff6 Mon Sep 17 00:00:00 2001
From: hdoc <github at hdoc.io>
Date: Mon, 6 May 2024 13:57:42 -0700
Subject: [PATCH 2/5] Address review feedback
---
clang/lib/AST/CommentParser.cpp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/clang/lib/AST/CommentParser.cpp b/clang/lib/AST/CommentParser.cpp
index bbe93ebc37d13..1158df610d560 100644
--- a/clang/lib/AST/CommentParser.cpp
+++ b/clang/lib/AST/CommentParser.cpp
@@ -259,12 +259,11 @@ class TextTokenRetokenizer {
if (Pos.BufferPtr + 1 == Pos.BufferEnd) {
consumeChar();
break;
- } else {
- consumeChar();
}
+ consumeChar();
}
- const unsigned Length = WordText.size();
+ unsigned Length = WordText.size();
if (Length == 0) {
Pos = SavedPos;
return false;
@@ -454,6 +453,7 @@ Parser::parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer,
ArrayRef<Comment::Argument>
Parser::parseParCommandArgs(TextTokenRetokenizer &Retokenizer,
unsigned NumArgs) {
+ assert(NumArgs > 0);
auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs))
Comment::Argument[NumArgs];
unsigned ParsedArgs = 0;
>From de602c9c6ce390fd652517939063c21094f87041 Mon Sep 17 00:00:00 2001
From: hdoc <github at hdoc.io>
Date: Mon, 3 Jun 2024 14:31:40 -0700
Subject: [PATCH 3/5] Small fix to par heading check and comments
---
clang/lib/AST/CommentParser.cpp | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/clang/lib/AST/CommentParser.cpp b/clang/lib/AST/CommentParser.cpp
index 1158df610d560..24aa1ab8fc189 100644
--- a/clang/lib/AST/CommentParser.cpp
+++ b/clang/lib/AST/CommentParser.cpp
@@ -222,18 +222,19 @@ class TextTokenRetokenizer {
return true;
}
- /// Check if this line starts with @par or \par
+ // Check if this line starts with @par or \par
bool startsWithParCommand() {
unsigned Offset = 1;
- /// Skip all whitespace characters at the beginning.
- /// This needs to backtrack because Pos has already advanced past the
- /// actual \par or @par command by the time this function is called.
+ // Skip all whitespace characters at the beginning.
+ // This needs to backtrack because Pos has already advanced past the
+ // actual \par or @par command by the time this function is called.
while (isWhitespace(*(Pos.BufferPtr - Offset)))
Offset++;
- /// Check if next four characters are \par or @par
- llvm::StringRef LineStart(Pos.BufferPtr - 5, 4);
+ // Once we've reached the whitespace, backtrack and check if the previous four
+ // characters are \par or @par.
+ llvm::StringRef LineStart(Pos.BufferPtr - Offset - 3, 4);
return LineStart.starts_with("\\par") || LineStart.starts_with("@par");
}
@@ -253,7 +254,7 @@ class TextTokenRetokenizer {
return false;
// Read until the end of this token, which is effectively the end of the
- // line This gets us the content of the par header, if there is one.
+ // line. This gets us the content of the par header, if there is one.
while (!isEnd()) {
WordText.push_back(peek());
if (Pos.BufferPtr + 1 == Pos.BufferEnd) {
>From a1d5ecaf23f5992b75d66436fe066e57390bb915 Mon Sep 17 00:00:00 2001
From: hdoc <github at hdoc.io>
Date: Sat, 15 Jun 2024 00:38:40 -0700
Subject: [PATCH 4/5] Run clang-format over failing file
Not sure why a failure is being triggered here as we didn't touch this
exact section. Our changes affect code a little further down in the
file.
---
clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp b/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp
index 07b26dc2f6b8b..f90ebc463005b 100644
--- a/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp
+++ b/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp
@@ -32,8 +32,7 @@ void clang::EmitClangCommentCommandInfo(RecordKeeper &Records,
Record &Tag = *Tags[i];
OS << " { "
<< "\"" << Tag.getValueAsString("Name") << "\", "
- << "\"" << Tag.getValueAsString("EndCommandName") << "\", "
- << i << ", "
+ << "\"" << Tag.getValueAsString("EndCommandName") << "\", " << i << ", "
<< Tag.getValueAsInt("NumArgs") << ", "
<< Tag.getValueAsBit("IsInlineCommand") << ", "
<< Tag.getValueAsBit("IsBlockCommand") << ", "
>From ecadf982c454bbfb48865ad849aefaf77cde7764 Mon Sep 17 00:00:00 2001
From: hdoc <github at hdoc.io>
Date: Sat, 15 Jun 2024 00:40:29 -0700
Subject: [PATCH 5/5] More clang-format fixes
---
clang/lib/AST/CommentParser.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/lib/AST/CommentParser.cpp b/clang/lib/AST/CommentParser.cpp
index 24aa1ab8fc189..d5e5bb27ceba3 100644
--- a/clang/lib/AST/CommentParser.cpp
+++ b/clang/lib/AST/CommentParser.cpp
@@ -232,8 +232,8 @@ class TextTokenRetokenizer {
while (isWhitespace(*(Pos.BufferPtr - Offset)))
Offset++;
- // Once we've reached the whitespace, backtrack and check if the previous four
- // characters are \par or @par.
+ // Once we've reached the whitespace, backtrack and check if the previous
+ // four characters are \par or @par.
llvm::StringRef LineStart(Pos.BufferPtr - Offset - 3, 4);
return LineStart.starts_with("\\par") || LineStart.starts_with("@par");
}
More information about the cfe-commits
mailing list