[clang] [Clang][Comments] Add argument parsing for @throw @throws @exception (PR #84726)

via cfe-commits cfe-commits at lists.llvm.org
Sun Mar 24 00:41:34 PDT 2024


https://github.com/hdoc updated https://github.com/llvm/llvm-project/pull/84726

>From ec3f444913d9162de4494cdb09b336b1b00380fa Mon Sep 17 00:00:00 2001
From: hdoc <github at hdoc.io>
Date: Mon, 11 Mar 2024 01:13:25 -0700
Subject: [PATCH 1/5] Comment parsing: add argument parsing for @throw @throws
 @exception

Doxygen allows for the @throw, @throws, and @exception commands to
have an attached argument indicating the type being thrown. Currently,
Clang's AST parsing doesn't support parsing out this argument from doc
comments. The result is missing compatibility with Doxygen.

We would find it helpful if the AST exposed these thrown types as
BlockCommandComment arguments so that we could generate better
documentation.

This PR implements parsing of arguments for the @throw, @throws, and
@exception commands. Each command can only have one argument, matching
the semantics of Doxygen. We have also added unit tests to validate
the functionality.
---
 clang/include/clang/AST/CommentCommands.td |   6 +-
 clang/include/clang/AST/CommentParser.h    |   3 +
 clang/lib/AST/CommentParser.cpp            | 133 ++++++++++++
 clang/unittests/AST/CommentParser.cpp      | 235 ++++++++++++++++++++-
 4 files changed, 373 insertions(+), 4 deletions(-)

diff --git a/clang/include/clang/AST/CommentCommands.td b/clang/include/clang/AST/CommentCommands.td
index e839031752cdd8..06b2fa9b5531c6 100644
--- a/clang/include/clang/AST/CommentCommands.td
+++ b/clang/include/clang/AST/CommentCommands.td
@@ -132,9 +132,9 @@ def Tparam : BlockCommand<"tparam"> { let IsTParamCommand = 1; }
 // HeaderDoc command for template parameter documentation.
 def Templatefield : BlockCommand<"templatefield"> { let IsTParamCommand = 1; }
 
-def Throws    : BlockCommand<"throws"> { let IsThrowsCommand = 1; }
-def Throw     : BlockCommand<"throw"> { let IsThrowsCommand = 1; }
-def Exception : BlockCommand<"exception"> { let IsThrowsCommand = 1; }
+def Throws    : BlockCommand<"throws"> { let IsThrowsCommand = 1; let NumArgs = 1; }
+def Throw     : BlockCommand<"throw"> { let IsThrowsCommand = 1; let NumArgs = 1; }
+def Exception : BlockCommand<"exception"> { let IsThrowsCommand = 1; let NumArgs = 1;}
 
 def Deprecated : BlockCommand<"deprecated"> {
   let IsEmptyParagraphAllowed = 1;
diff --git a/clang/include/clang/AST/CommentParser.h b/clang/include/clang/AST/CommentParser.h
index e11e818b1af0a1..5884a25d007851 100644
--- a/clang/include/clang/AST/CommentParser.h
+++ b/clang/include/clang/AST/CommentParser.h
@@ -100,6 +100,9 @@ class Parser {
   ArrayRef<Comment::Argument>
   parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs);
 
+  ArrayRef<Comment::Argument>
+  parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs);
+
   BlockCommandComment *parseBlockCommand();
   InlineCommandComment *parseInlineCommand();
 
diff --git a/clang/lib/AST/CommentParser.cpp b/clang/lib/AST/CommentParser.cpp
index 8adfd85d0160c3..c70fa1b05cb241 100644
--- a/clang/lib/AST/CommentParser.cpp
+++ b/clang/lib/AST/CommentParser.cpp
@@ -75,6 +75,25 @@ class TextTokenRetokenizer {
     return *Pos.BufferPtr;
   }
 
+  char peekNext(unsigned offset) const {
+    assert(!isEnd());
+    assert(Pos.BufferPtr != Pos.BufferEnd);
+    if (Pos.BufferPtr + offset <= Pos.BufferEnd) {
+      return *(Pos.BufferPtr + offset);
+    } else {
+      return '\0';
+    }
+  }
+
+  void peekNextToken(SmallString<32> &WordText) const {
+    unsigned offset = 1;
+    char C = peekNext(offset++);
+    while (!isWhitespace(C) && C != '\0') {
+      WordText.push_back(C);
+      C = peekNext(offset++);
+    }
+  }
+
   void consumeChar() {
     assert(!isEnd());
     assert(Pos.BufferPtr != Pos.BufferEnd);
@@ -89,6 +108,29 @@ class TextTokenRetokenizer {
     }
   }
 
+  /// Extract a template type
+  bool lexTemplateType(SmallString<32> &WordText) {
+    unsigned IncrementCounter = 0;
+    while (!isEnd()) {
+      const char C = peek();
+      WordText.push_back(C);
+      consumeChar();
+      switch (C) {
+      default:
+        break;
+      case '<': {
+        IncrementCounter++;
+      } break;
+      case '>': {
+        IncrementCounter--;
+        if (!IncrementCounter)
+          return true;
+      } break;
+      }
+    }
+    return false;
+  }
+
   /// Add a token.
   /// Returns true on success, false if there are no interesting tokens to
   /// fetch from lexer.
@@ -149,6 +191,76 @@ class TextTokenRetokenizer {
     addToken();
   }
 
+  /// Extract a type argument
+  bool lexDataType(Token &Tok) {
+    if (isEnd())
+      return false;
+    Position SavedPos = Pos;
+    consumeWhitespace();
+    SmallString<32> NextToken;
+    SmallString<32> WordText;
+    const char *WordBegin = Pos.BufferPtr;
+    SourceLocation Loc = getSourceLocation();
+    StringRef ConstVal = StringRef("const");
+    bool ConstPointer = false;
+
+    while (!isEnd()) {
+      const char C = peek();
+      if (!isWhitespace(C)) {
+        if (C == '<') {
+          if (!lexTemplateType(WordText))
+            return false;
+        } else {
+          WordText.push_back(C);
+          consumeChar();
+        }
+      } else {
+        if (WordText.equals(ConstVal)) {
+          WordText.push_back(C);
+          consumeChar();
+        } else if (WordText.ends_with(StringRef("*")) ||
+                   WordText.ends_with(StringRef("&"))) {
+          NextToken.clear();
+          peekNextToken(NextToken);
+          if (NextToken.equals(ConstVal)) {
+            ConstPointer = true;
+            WordText.push_back(C);
+            consumeChar();
+          } else {
+            consumeChar();
+            break;
+          }
+        } else {
+          NextToken.clear();
+          peekNextToken(NextToken);
+          if ((NextToken.ends_with(StringRef("*")) ||
+               NextToken.ends_with(StringRef("&"))) &&
+              !ConstPointer) {
+            WordText.push_back(C);
+            consumeChar();
+          } else {
+            consumeChar();
+            break;
+          }
+        }
+      }
+    }
+
+    const unsigned Length = WordText.size();
+    if (Length == 0) {
+      Pos = SavedPos;
+      return false;
+    }
+
+    char *TextPtr = Allocator.Allocate<char>(Length + 1);
+
+    memcpy(TextPtr, WordText.c_str(), Length + 1);
+    StringRef Text = StringRef(TextPtr, Length);
+
+    formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
+    return true;
+  }
+
   /// Extract a word -- sequence of non-whitespace characters.
   bool lexWord(Token &Tok) {
     if (isEnd())
@@ -295,6 +407,7 @@ Parser::parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs) {
       Comment::Argument[NumArgs];
   unsigned ParsedArgs = 0;
   Token Arg;
+
   while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
     Args[ParsedArgs] = Comment::Argument{
         SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()};
@@ -304,6 +417,23 @@ Parser::parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs) {
   return llvm::ArrayRef(Args, ParsedArgs);
 }
 
+ArrayRef<Comment::Argument>
+Parser::parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer,
+                              unsigned NumArgs) {
+  auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs))
+      Comment::Argument[NumArgs];
+  unsigned ParsedArgs = 0;
+  Token Arg;
+
+  while (ParsedArgs < NumArgs && Retokenizer.lexDataType(Arg)) {
+    Args[ParsedArgs] = Comment::Argument{
+        SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()};
+    ParsedArgs++;
+  }
+
+  return llvm::ArrayRef(Args, ParsedArgs);
+}
+
 BlockCommandComment *Parser::parseBlockCommand() {
   assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
 
@@ -356,6 +486,9 @@ BlockCommandComment *Parser::parseBlockCommand() {
       parseParamCommandArgs(PC, Retokenizer);
     else if (TPC)
       parseTParamCommandArgs(TPC, Retokenizer);
+    else if (Info->IsThrowsCommand)
+      S.actOnBlockCommandArgs(
+          BC, parseThrowCommandArgs(Retokenizer, Info->NumArgs));
     else
       S.actOnBlockCommandArgs(BC, parseCommandArgs(Retokenizer, Info->NumArgs));
 
diff --git a/clang/unittests/AST/CommentParser.cpp b/clang/unittests/AST/CommentParser.cpp
index c3479672ae2a3c..e01d654aa1cea2 100644
--- a/clang/unittests/AST/CommentParser.cpp
+++ b/clang/unittests/AST/CommentParser.cpp
@@ -1427,8 +1427,241 @@ TEST_F(CommentParserTest, Deprecated) {
   }
 }
 
+TEST_F(CommentParserTest, ThrowsCommandHasArg1) {
+  const char *Sources[] = {
+      "/// @throws int This function throws an integer",
+      ("/// @throws\n"
+       "/// int This function throws an integer"),
+      ("/// @throws \n"
+       "/// int This function throws an integer"),
+      ("/// @throws\n"
+       "/// int\n"
+       "/// This function throws an integer"),
+      ("/// @throws \n"
+       "/// int \n"
+       "/// This function throws an integer"),
+  };
+
+  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+    FullComment *FC = parseString(Sources[i]);
+    ASSERT_TRUE(HasChildCount(FC, 2));
+
+    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+    {
+      BlockCommandComment *BCC;
+      ParagraphComment *PC;
+      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+      ASSERT_TRUE(HasChildCount(PC, 1));
+      ASSERT_TRUE(BCC->getNumArgs() == 1);
+      ASSERT_TRUE(BCC->getArgText(0) == "int");
+    }
+  }
+}
+
+TEST_F(CommentParserTest, ThrowsCommandHasArg2) {
+  const char *Sources[] = {
+      "/// @throws const int This function throws a const integer",
+      ("/// @throws\n"
+       "/// const int This function throws a const integer"),
+      ("/// @throws \n"
+       "/// const int This function throws a const integer"),
+      ("/// @throws\n"
+       "/// const int\n"
+       "/// This function throws a const integer"),
+      ("/// @throws \n"
+       "/// const int \n"
+       "/// This function throws a const integer"),
+  };
+
+  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+    FullComment *FC = parseString(Sources[i]);
+    ASSERT_TRUE(HasChildCount(FC, 2));
+
+    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+    {
+      BlockCommandComment *BCC;
+      ParagraphComment *PC;
+      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+      ASSERT_TRUE(HasChildCount(PC, 1));
+      ASSERT_TRUE(BCC->getNumArgs() == 1);
+      ASSERT_TRUE(BCC->getArgText(0) == "const int");
+    }
+  }
+}
+
+TEST_F(CommentParserTest, ThrowsCommandHasArg3) {
+  const char *Sources[] = {
+      "/// @throws const int * This function throws a pointer to a const "
+      "integer\n",
+      ("/// @throws\n"
+       "/// const int * This function throws a pointer to a const integer"),
+      ("/// @throws \n"
+       "/// const int * This function throws a pointer to a const integer"),
+      ("/// @throws\n"
+       "/// const int *\n"
+       "/// This function throws a pointer to a const integer"),
+      ("/// @throws \n"
+       "/// const int *\n"
+       "/// This function throws a pointer to a const integer"),
+  };
+
+  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+    FullComment *FC = parseString(Sources[i]);
+    ASSERT_TRUE(HasChildCount(FC, 2));
+
+    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+    {
+      BlockCommandComment *BCC;
+      ParagraphComment *PC;
+      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+      ASSERT_TRUE(HasChildCount(PC, 1));
+      ASSERT_TRUE(BCC->getNumArgs() == 1);
+      ASSERT_TRUE(BCC->getArgText(0) == "const int *");
+    }
+  }
+}
+
+TEST_F(CommentParserTest, ThrowsCommandHasArg4) {
+  const char *Sources[] = {
+      "/// @throws const int * const This function throws a const pointer to a "
+      "const integer",
+      ("/// @throws\n"
+       "/// const int * const This function throws a const pointer to a const "
+       "integer"),
+      ("/// @throws \n"
+       "/// const int * const This function throws a const pointer to a const "
+       "integer"),
+      ("/// @throws\n"
+       "/// const int * const\n"
+       "/// This function throws a const pointer to a const integer"),
+      ("/// @throws \n"
+       "/// const int * const\n"
+       "/// This function throws a const pointer to a const integer"),
+  };
+
+  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+    FullComment *FC = parseString(Sources[i]);
+    ASSERT_TRUE(HasChildCount(FC, 2));
+
+    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+    {
+      BlockCommandComment *BCC;
+      ParagraphComment *PC;
+      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+      ASSERT_TRUE(HasChildCount(PC, 1));
+      ASSERT_TRUE(BCC->getNumArgs() == 1);
+      ASSERT_TRUE(BCC->getArgText(0) == "const int * const");
+    }
+  }
+}
+
+TEST_F(CommentParserTest, ThrowsCommandHasArg5) {
+  const char *Sources[] = {
+      "/// @throws int** This function throws a double pointer to an integer",
+  };
+
+  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+    FullComment *FC = parseString(Sources[i]);
+    ASSERT_TRUE(HasChildCount(FC, 2));
+
+    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+    {
+      BlockCommandComment *BCC;
+      ParagraphComment *PC;
+      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+      ASSERT_TRUE(HasChildCount(PC, 1));
+      ASSERT_TRUE(BCC->getNumArgs() == 1);
+      ASSERT_TRUE(BCC->getArgText(0) == "int**");
+    }
+  }
+}
+
+TEST_F(CommentParserTest, ThrowsCommandHasArg6) {
+  const char *Sources[] = {
+      "/// @throws const char ** double pointer to a constant char pointer",
+  };
+
+  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+    FullComment *FC = parseString(Sources[i]);
+    ASSERT_TRUE(HasChildCount(FC, 2));
+
+    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+    {
+      BlockCommandComment *BCC;
+      ParagraphComment *PC;
+      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+      ASSERT_TRUE(HasChildCount(PC, 1));
+      ASSERT_TRUE(BCC->getNumArgs() == 1);
+      ASSERT_TRUE(BCC->getArgText(0) == "const char **");
+    }
+  }
+}
+
+TEST_F(CommentParserTest, ThrowsCommandHasArg7) {
+  const char *Sources[] = {
+      "/// @throws Error<T> error of type Error<T>",
+  };
+
+  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+    FullComment *FC = parseString(Sources[i]);
+    ASSERT_TRUE(HasChildCount(FC, 2));
+
+    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+    {
+      BlockCommandComment *BCC;
+      ParagraphComment *PC;
+      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+      ASSERT_TRUE(HasChildCount(PC, 3)); // Extra children because <T> is parsed
+                                         // as a series of TextComments
+      ASSERT_TRUE(BCC->getNumArgs() == 1);
+      ASSERT_TRUE(BCC->getArgText(0) == "Error<T>");
+    }
+  }
+}
+
+TEST_F(CommentParserTest, ThrowsCommandHasArg8) {
+  const char *Sources[] = {
+      "/// @throws Error<Container<T>> nested templates",
+  };
+
+  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+    FullComment *FC = parseString(Sources[i]);
+    ASSERT_TRUE(HasChildCount(FC, 2));
+
+    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+    {
+      BlockCommandComment *BCC;
+      ParagraphComment *PC;
+      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+      ASSERT_TRUE(HasChildCount(PC, 1));
+      ASSERT_TRUE(BCC->getNumArgs() == 1);
+      ASSERT_TRUE(BCC->getArgText(0) == "Error<Container<T>>");
+    }
+  }
+}
+
+TEST_F(CommentParserTest, ThrowsCommandHasArg9) {
+  const char *Sources[] = {
+      "/// @throws Error<Ts...> variadic templates",
+  };
+
+  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+    FullComment *FC = parseString(Sources[i]);
+    ASSERT_TRUE(HasChildCount(FC, 2));
+
+    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+    {
+      BlockCommandComment *BCC;
+      ParagraphComment *PC;
+      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+      ASSERT_TRUE(HasChildCount(PC, 1));
+      ASSERT_TRUE(BCC->getNumArgs() == 1);
+      ASSERT_TRUE(BCC->getArgText(0) == "Error<Ts...>");
+    }
+  }
+}
+
 } // unnamed namespace
 
 } // end namespace comments
 } // end namespace clang
-

>From 3463833cb0d9c78d04bcbb9e6a46bd0a0c3e2528 Mon Sep 17 00:00:00 2001
From: hdoc <github at hdoc.io>
Date: Mon, 11 Mar 2024 16:13:33 -0700
Subject: [PATCH 2/5] Fix comment to XML tests

---
 .../Index/comment-to-html-xml-conversion.cpp  | 53 ++++++++-----------
 1 file changed, 21 insertions(+), 32 deletions(-)

diff --git a/clang/test/Index/comment-to-html-xml-conversion.cpp b/clang/test/Index/comment-to-html-xml-conversion.cpp
index d9eefb909653c7..291aecf44d1129 100644
--- a/clang/test/Index/comment-to-html-xml-conversion.cpp
+++ b/clang/test/Index/comment-to-html-xml-conversion.cpp
@@ -1046,82 +1046,71 @@ void comment_to_xml_conversion_todo_4();
 /// Aaa.
 /// \throws Bbb.
 void comment_to_xml_conversion_exceptions_1();
-// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_1:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_exceptions_1</Name><USR>c:@F at comment_to_xml_conversion_exceptions_1#</USR><Declaration>void comment_to_xml_conversion_exceptions_1()</Declaration><Abstract><Para> Aaa. </Para></Abstract><Exceptions><Para> Bbb.</Para></Exceptions></Function>]
+// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_1:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_exceptions_1</Name><USR>c:@F at comment_to_xml_conversion_exceptions_1#</USR><Declaration>void comment_to_xml_conversion_exceptions_1()</Declaration><Abstract><Para> Aaa. </Para></Abstract><Exceptions></Exceptions></Function>]
 // CHECK-NEXT:  CommentAST=[
 // CHECK-NEXT:    (CXComment_FullComment
 // CHECK-NEXT:       (CXComment_Paragraph
 // CHECK-NEXT:         (CXComment_Text Text=[ Aaa.] HasTrailingNewline)
 // CHECK-NEXT:         (CXComment_Text Text=[ ] IsWhitespace))
-// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throws]
-// CHECK-NEXT:         (CXComment_Paragraph
-// CHECK-NEXT:           (CXComment_Text Text=[ Bbb.]))))]
+// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throws] Arg[0]=Bbb.
+// CHECK-NEXT:         (CXComment_Paragraph IsWhitespace)))]
 
 /// Aaa.
 /// \throw Bbb.
 void comment_to_xml_conversion_exceptions_2();
-// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_2:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_exceptions_2</Name><USR>c:@F at comment_to_xml_conversion_exceptions_2#</USR><Declaration>void comment_to_xml_conversion_exceptions_2()</Declaration><Abstract><Para> Aaa. </Para></Abstract><Exceptions><Para> Bbb.</Para></Exceptions></Function>]
+// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_2:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_exceptions_2</Name><USR>c:@F at comment_to_xml_conversion_exceptions_2#</USR><Declaration>void comment_to_xml_conversion_exceptions_2()</Declaration><Abstract><Para> Aaa. </Para></Abstract><Exceptions></Exceptions></Function>]
 // CHECK-NEXT:  CommentAST=[
 // CHECK-NEXT:    (CXComment_FullComment
 // CHECK-NEXT:       (CXComment_Paragraph
 // CHECK-NEXT:         (CXComment_Text Text=[ Aaa.] HasTrailingNewline)
 // CHECK-NEXT:         (CXComment_Text Text=[ ] IsWhitespace))
-// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throw]
-// CHECK-NEXT:         (CXComment_Paragraph
-// CHECK-NEXT:           (CXComment_Text Text=[ Bbb.]))))]
+// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throw] Arg[0]=Bbb.
+// CHECK-NEXT:         (CXComment_Paragraph IsWhitespace)))]
 
 /// Aaa.
 /// \exception Bbb.
 void comment_to_xml_conversion_exceptions_3();
-// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_3:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_exceptions_3</Name><USR>c:@F at comment_to_xml_conversion_exceptions_3#</USR><Declaration>void comment_to_xml_conversion_exceptions_3()</Declaration><Abstract><Para> Aaa. </Para></Abstract><Exceptions><Para> Bbb.</Para></Exceptions></Function>]
+// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_3:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_exceptions_3</Name><USR>c:@F at comment_to_xml_conversion_exceptions_3#</USR><Declaration>void comment_to_xml_conversion_exceptions_3()</Declaration><Abstract><Para> Aaa. </Para></Abstract><Exceptions></Exceptions></Function>]
 // CHECK-NEXT:  CommentAST=[
 // CHECK-NEXT:    (CXComment_FullComment
 // CHECK-NEXT:       (CXComment_Paragraph
 // CHECK-NEXT:         (CXComment_Text Text=[ Aaa.] HasTrailingNewline)
 // CHECK-NEXT:         (CXComment_Text Text=[ ] IsWhitespace))
-// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[exception]
-// CHECK-NEXT:         (CXComment_Paragraph
-// CHECK-NEXT:           (CXComment_Text Text=[ Bbb.]))))]
+// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[exception] Arg[0]=Bbb.
+// CHECK-NEXT:         (CXComment_Paragraph IsWhitespace)))]
 
 /// Aaa.
 /// \throws Bbb.
 /// \throws Ccc.
 /// \throws Ddd.
 void comment_to_xml_conversion_exceptions_4();
-// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_4:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_exceptions_4</Name><USR>c:@F at comment_to_xml_conversion_exceptions_4#</USR><Declaration>void comment_to_xml_conversion_exceptions_4()</Declaration><Abstract><Para> Aaa. </Para></Abstract><Exceptions><Para> Bbb. </Para><Para> Ccc. </Para><Para> Ddd.</Para></Exceptions></Function>]
+// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_4:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_exceptions_4</Name><USR>c:@F at comment_to_xml_conversion_exceptions_4#</USR><Declaration>void comment_to_xml_conversion_exceptions_4()</Declaration><Abstract><Para> Aaa. </Para></Abstract><Exceptions></Exceptions></Function>]
 // CHECK-NEXT:  CommentAST=[
 // CHECK-NEXT:    (CXComment_FullComment
 // CHECK-NEXT:       (CXComment_Paragraph
 // CHECK-NEXT:         (CXComment_Text Text=[ Aaa.] HasTrailingNewline)
 // CHECK-NEXT:         (CXComment_Text Text=[ ] IsWhitespace))
-// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throws]
-// CHECK-NEXT:         (CXComment_Paragraph
-// CHECK-NEXT:           (CXComment_Text Text=[ Bbb.] HasTrailingNewline)
-// CHECK-NEXT:           (CXComment_Text Text=[ ] IsWhitespace)))
-// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throws]
-// CHECK-NEXT:         (CXComment_Paragraph
-// CHECK-NEXT:           (CXComment_Text Text=[ Ccc.] HasTrailingNewline)
-// CHECK-NEXT:           (CXComment_Text Text=[ ] IsWhitespace)))
-// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throws]
-// CHECK-NEXT:         (CXComment_Paragraph
-// CHECK-NEXT:           (CXComment_Text Text=[ Ddd.]))))]
+// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throws] Arg[0]=Bbb.
+// CHECK-NEXT:         (CXComment_Paragraph IsWhitespace))
+// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throws] Arg[0]=Ccc.
+// CHECK-NEXT:         (CXComment_Paragraph IsWhitespace))
+// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throws] Arg[0]=Ddd.
+// CHECK-NEXT:         (CXComment_Paragraph IsWhitespace)))]
 
 /// Aaa.
 /// \throws Bbb.
 /// \throw Ccc.
 void comment_to_xml_conversion_exceptions_5();
-// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_5:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_exceptions_5</Name><USR>c:@F at comment_to_xml_conversion_exceptions_5#</USR><Declaration>void comment_to_xml_conversion_exceptions_5()</Declaration><Abstract><Para> Aaa. </Para></Abstract><Exceptions><Para> Bbb. </Para><Para> Ccc.</Para></Exceptions></Function>]
+// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_5:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_exceptions_5</Name><USR>c:@F at comment_to_xml_conversion_exceptions_5#</USR><Declaration>void comment_to_xml_conversion_exceptions_5()</Declaration><Abstract><Para> Aaa. </Para></Abstract><Exceptions></Exceptions></Function>]
 // CHECK-NEXT:  CommentAST=[
 // CHECK-NEXT:    (CXComment_FullComment
 // CHECK-NEXT:       (CXComment_Paragraph
 // CHECK-NEXT:         (CXComment_Text Text=[ Aaa.] HasTrailingNewline)
 // CHECK-NEXT:         (CXComment_Text Text=[ ] IsWhitespace))
-// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throws]
-// CHECK-NEXT:         (CXComment_Paragraph
-// CHECK-NEXT:           (CXComment_Text Text=[ Bbb.] HasTrailingNewline)
-// CHECK-NEXT:           (CXComment_Text Text=[ ] IsWhitespace)))
-// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throw]
-// CHECK-NEXT:         (CXComment_Paragraph
-// CHECK-NEXT:           (CXComment_Text Text=[ Ccc.]))))]
+// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throws] Arg[0]=Bbb.
+// CHECK-NEXT:         (CXComment_Paragraph IsWhitespace))
+// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throw] Arg[0]=Ccc.
+// CHECK-NEXT:         (CXComment_Paragraph IsWhitespace)))]
 
 #endif
 

>From cc6591c797328447e169029025f6e68918d7f074 Mon Sep 17 00:00:00 2001
From: hdoc <github at hdoc.io>
Date: Tue, 12 Mar 2024 20:15:10 -0700
Subject: [PATCH 3/5] Update XML output routine for Throws commands to fix XML
 validation

XML validation was failing due to the Exception XML being empty, since
the actual exception type was being parsed as an argument instead of
as a ParagraphComment.

This was a result of the change we made to argument parsing.

As a result, I updated the XML output to still output the argument
text in the Para XML, as it was emitted before.
---
 clang/lib/Index/CommentToXML.cpp              | 34 ++++++++++-----
 .../Index/comment-to-html-xml-conversion.cpp  | 42 ++++++++++++++++---
 2 files changed, 59 insertions(+), 17 deletions(-)

diff --git a/clang/lib/Index/CommentToXML.cpp b/clang/lib/Index/CommentToXML.cpp
index 295f3f228ff79b..7908311b35d012 100644
--- a/clang/lib/Index/CommentToXML.cpp
+++ b/clang/lib/Index/CommentToXML.cpp
@@ -545,7 +545,8 @@ class CommentASTToXMLConverter :
   void visitParagraphComment(const ParagraphComment *C);
 
   void appendParagraphCommentWithKind(const ParagraphComment *C,
-                                      StringRef Kind);
+                                      StringRef ParagraphKind,
+                                      StringRef PrependBodyText);
 
   void visitBlockCommandComment(const BlockCommandComment *C);
   void visitParamCommandComment(const ParamCommandComment *C);
@@ -679,15 +680,15 @@ CommentASTToXMLConverter::visitHTMLEndTagComment(const HTMLEndTagComment *C) {
   Result << "></" << C->getTagName() << "></rawHTML>";
 }
 
-void
-CommentASTToXMLConverter::visitParagraphComment(const ParagraphComment *C) {
-  appendParagraphCommentWithKind(C, StringRef());
+void CommentASTToXMLConverter::visitParagraphComment(
+    const ParagraphComment *C) {
+  appendParagraphCommentWithKind(C, StringRef(), StringRef());
 }
 
 void CommentASTToXMLConverter::appendParagraphCommentWithKind(
-                                  const ParagraphComment *C,
-                                  StringRef ParagraphKind) {
-  if (C->isWhitespace())
+    const ParagraphComment *C, StringRef ParagraphKind,
+    StringRef PrependBodyText) {
+  if (C->isWhitespace() && PrependBodyText.empty())
     return;
 
   if (ParagraphKind.empty())
@@ -695,8 +696,11 @@ void CommentASTToXMLConverter::appendParagraphCommentWithKind(
   else
     Result << "<Para kind=\"" << ParagraphKind << "\">";
 
-  for (Comment::child_iterator I = C->child_begin(), E = C->child_end();
-       I != E; ++I) {
+  if (!PrependBodyText.empty())
+    Result << PrependBodyText << " ";
+
+  for (Comment::child_iterator I = C->child_begin(), E = C->child_end(); I != E;
+       ++I) {
     visit(*I);
   }
   Result << "</Para>";
@@ -705,8 +709,15 @@ void CommentASTToXMLConverter::appendParagraphCommentWithKind(
 void CommentASTToXMLConverter::visitBlockCommandComment(
     const BlockCommandComment *C) {
   StringRef ParagraphKind;
+  StringRef ExceptionType;
 
-  switch (C->getCommandID()) {
+  const unsigned CommandID = C->getCommandID();
+  const CommandInfo *Info = Traits.getCommandInfo(CommandID);
+  if (Info->IsThrowsCommand && C->getNumArgs() > 0) {
+    ExceptionType = C->getArgText(0);
+  }
+
+  switch (CommandID) {
   case CommandTraits::KCI_attention:
   case CommandTraits::KCI_author:
   case CommandTraits::KCI_authors:
@@ -731,7 +742,8 @@ void CommentASTToXMLConverter::visitBlockCommandComment(
     break;
   }
 
-  appendParagraphCommentWithKind(C->getParagraph(), ParagraphKind);
+  appendParagraphCommentWithKind(C->getParagraph(), ParagraphKind,
+                                 ExceptionType);
 }
 
 void CommentASTToXMLConverter::visitParamCommandComment(
diff --git a/clang/test/Index/comment-to-html-xml-conversion.cpp b/clang/test/Index/comment-to-html-xml-conversion.cpp
index 291aecf44d1129..e0a7cff5a9a3db 100644
--- a/clang/test/Index/comment-to-html-xml-conversion.cpp
+++ b/clang/test/Index/comment-to-html-xml-conversion.cpp
@@ -1046,7 +1046,7 @@ void comment_to_xml_conversion_todo_4();
 /// Aaa.
 /// \throws Bbb.
 void comment_to_xml_conversion_exceptions_1();
-// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_1:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_exceptions_1</Name><USR>c:@F at comment_to_xml_conversion_exceptions_1#</USR><Declaration>void comment_to_xml_conversion_exceptions_1()</Declaration><Abstract><Para> Aaa. </Para></Abstract><Exceptions></Exceptions></Function>]
+// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_1:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_exceptions_1</Name><USR>c:@F at comment_to_xml_conversion_exceptions_1#</USR><Declaration>void comment_to_xml_conversion_exceptions_1()</Declaration><Abstract><Para> Aaa. </Para></Abstract><Exceptions><Para>Bbb. </Para></Exceptions></Function>]
 // CHECK-NEXT:  CommentAST=[
 // CHECK-NEXT:    (CXComment_FullComment
 // CHECK-NEXT:       (CXComment_Paragraph
@@ -1058,7 +1058,7 @@ void comment_to_xml_conversion_exceptions_1();
 /// Aaa.
 /// \throw Bbb.
 void comment_to_xml_conversion_exceptions_2();
-// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_2:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_exceptions_2</Name><USR>c:@F at comment_to_xml_conversion_exceptions_2#</USR><Declaration>void comment_to_xml_conversion_exceptions_2()</Declaration><Abstract><Para> Aaa. </Para></Abstract><Exceptions></Exceptions></Function>]
+// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_2:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_exceptions_2</Name><USR>c:@F at comment_to_xml_conversion_exceptions_2#</USR><Declaration>void comment_to_xml_conversion_exceptions_2()</Declaration><Abstract><Para> Aaa. </Para></Abstract><Exceptions><Para>Bbb. </Para></Exceptions></Function>]
 // CHECK-NEXT:  CommentAST=[
 // CHECK-NEXT:    (CXComment_FullComment
 // CHECK-NEXT:       (CXComment_Paragraph
@@ -1070,7 +1070,7 @@ void comment_to_xml_conversion_exceptions_2();
 /// Aaa.
 /// \exception Bbb.
 void comment_to_xml_conversion_exceptions_3();
-// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_3:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_exceptions_3</Name><USR>c:@F at comment_to_xml_conversion_exceptions_3#</USR><Declaration>void comment_to_xml_conversion_exceptions_3()</Declaration><Abstract><Para> Aaa. </Para></Abstract><Exceptions></Exceptions></Function>]
+// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_3:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_exceptions_3</Name><USR>c:@F at comment_to_xml_conversion_exceptions_3#</USR><Declaration>void comment_to_xml_conversion_exceptions_3()</Declaration><Abstract><Para> Aaa. </Para></Abstract><Exceptions><Para>Bbb. </Para></Exceptions></Function>]
 // CHECK-NEXT:  CommentAST=[
 // CHECK-NEXT:    (CXComment_FullComment
 // CHECK-NEXT:       (CXComment_Paragraph
@@ -1084,7 +1084,7 @@ void comment_to_xml_conversion_exceptions_3();
 /// \throws Ccc.
 /// \throws Ddd.
 void comment_to_xml_conversion_exceptions_4();
-// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_4:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_exceptions_4</Name><USR>c:@F at comment_to_xml_conversion_exceptions_4#</USR><Declaration>void comment_to_xml_conversion_exceptions_4()</Declaration><Abstract><Para> Aaa. </Para></Abstract><Exceptions></Exceptions></Function>]
+// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_4:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_exceptions_4</Name><USR>c:@F at comment_to_xml_conversion_exceptions_4#</USR><Declaration>void comment_to_xml_conversion_exceptions_4()</Declaration><Abstract><Para> Aaa. </Para></Abstract><Exceptions><Para>Bbb. </Para><Para>Ccc. </Para><Para>Ddd. </Para></Exceptions></Function>]
 // CHECK-NEXT:  CommentAST=[
 // CHECK-NEXT:    (CXComment_FullComment
 // CHECK-NEXT:       (CXComment_Paragraph
@@ -1101,7 +1101,7 @@ void comment_to_xml_conversion_exceptions_4();
 /// \throws Bbb.
 /// \throw Ccc.
 void comment_to_xml_conversion_exceptions_5();
-// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_5:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_exceptions_5</Name><USR>c:@F at comment_to_xml_conversion_exceptions_5#</USR><Declaration>void comment_to_xml_conversion_exceptions_5()</Declaration><Abstract><Para> Aaa. </Para></Abstract><Exceptions></Exceptions></Function>]
+// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_5:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_exceptions_5</Name><USR>c:@F at comment_to_xml_conversion_exceptions_5#</USR><Declaration>void comment_to_xml_conversion_exceptions_5()</Declaration><Abstract><Para> Aaa. </Para></Abstract><Exceptions><Para>Bbb. </Para><Para>Ccc. </Para></Exceptions></Function>]
 // CHECK-NEXT:  CommentAST=[
 // CHECK-NEXT:    (CXComment_FullComment
 // CHECK-NEXT:       (CXComment_Paragraph
@@ -1112,5 +1112,35 @@ void comment_to_xml_conversion_exceptions_5();
 // CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throw] Arg[0]=Ccc.
 // CHECK-NEXT:         (CXComment_Paragraph IsWhitespace)))]
 
-#endif
+/// Aaa.
+/// \throws Bbb subsequent arg text
+void comment_to_xml_conversion_exceptions_6();
+// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_6:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_exceptions_6</Name><USR>c:@F at comment_to_xml_conversion_exceptions_6#</USR><Declaration>void comment_to_xml_conversion_exceptions_6()</Declaration><Abstract><Para> Aaa. </Para></Abstract><Exceptions><Para>Bbb subsequent arg text</Para></Exceptions></Function>]
+// CHECK-NEXT:  CommentAST=[
+// CHECK-NEXT:    (CXComment_FullComment
+// CHECK-NEXT:       (CXComment_Paragraph
+// CHECK-NEXT:         (CXComment_Text Text=[ Aaa.] HasTrailingNewline)
+// CHECK-NEXT:         (CXComment_Text Text=[ ] IsWhitespace))
+// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throws] Arg[0]=Bbb
+// CHECK-NEXT:         (CXComment_Paragraph
+// CHECK-NEXT:           (CXComment_Text Text=[subsequent arg text]))))]
 
+/// Aaa.
+/// \throws Bbb subsequent arg text
+/// \throw Ccc subsequent arg text
+void comment_to_xml_conversion_exceptions_7();
+// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_exceptions_7:{{.*}} FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-1]]" column="6"><Name>comment_to_xml_conversion_exceptions_7</Name><USR>c:@F at comment_to_xml_conversion_exceptions_7#</USR><Declaration>void comment_to_xml_conversion_exceptions_7()</Declaration><Abstract><Para> Aaa. </Para></Abstract><Exceptions><Para>Bbb subsequent arg text </Para><Para>Ccc subsequent arg text</Para></Exceptions></Function>]
+// CHECK-NEXT:  CommentAST=[
+// CHECK-NEXT:    (CXComment_FullComment
+// CHECK-NEXT:       (CXComment_Paragraph
+// CHECK-NEXT:         (CXComment_Text Text=[ Aaa.] HasTrailingNewline)
+// CHECK-NEXT:         (CXComment_Text Text=[ ] IsWhitespace))
+// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throws] Arg[0]=Bbb
+// CHECK-NEXT:         (CXComment_Paragraph
+// CHECK-NEXT:           (CXComment_Text Text=[subsequent arg text] HasTrailingNewline)
+// CHECK-NEXT:           (CXComment_Text Text=[ ] IsWhitespace)))
+// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throw] Arg[0]=Ccc
+// CHECK-NEXT:         (CXComment_Paragraph
+// CHECK-NEXT:           (CXComment_Text Text=[subsequent arg text]))))]
+
+#endif

>From 3a4f7bde2766157564475a5f12bc8db8d11a8778 Mon Sep 17 00:00:00 2001
From: hdoc <github at hdoc.io>
Date: Fri, 15 Mar 2024 11:42:50 -0700
Subject: [PATCH 4/5] Support more qualifiers, such as volatile

---
 clang/lib/AST/CommentParser.cpp       | 99 ++++++++++++++++++++-------
 clang/unittests/AST/CommentParser.cpp | 86 +++++++++++++++++++++++
 2 files changed, 162 insertions(+), 23 deletions(-)

diff --git a/clang/lib/AST/CommentParser.cpp b/clang/lib/AST/CommentParser.cpp
index c70fa1b05cb241..a2aa3cfc6d5b53 100644
--- a/clang/lib/AST/CommentParser.cpp
+++ b/clang/lib/AST/CommentParser.cpp
@@ -131,6 +131,48 @@ class TextTokenRetokenizer {
     return false;
   }
 
+  bool isDataTypeQualifier(SmallString<32> &WordText) {
+    if (WordText.ends_with(StringRef("const")))
+      return true;
+    if (WordText.ends_with(StringRef("volatile")))
+      return true;
+    if (WordText.ends_with(StringRef("unsigned")))
+      return true;
+    if (WordText.ends_with(StringRef("signed")))
+      return true;
+    if (WordText.ends_with(StringRef("long")))
+      return true;
+    if (WordText.ends_with(StringRef("short")))
+      return true;
+    if (WordText.ends_with(StringRef("restrict")))
+      return true;
+    if (WordText.ends_with(StringRef("auto")))
+      return true;
+    if (WordText.ends_with(StringRef("register")))
+      return true;
+    if (WordText.ends_with(StringRef("static")))
+      return true;
+    if (WordText.ends_with(StringRef("extern")))
+      return true;
+    if (WordText.ends_with(StringRef("struct")))
+      return true;
+    if (WordText.ends_with(StringRef("typedef")))
+      return true;
+    if (WordText.ends_with(StringRef("union")))
+      return true;
+    if (WordText.ends_with(StringRef("void")))
+      return true;
+    return false;
+  }
+
+  bool isScopeResolutionOperator(SmallString<32> &WordText) {
+    return WordText.ends_with(StringRef("::"));
+  }
+
+  bool continueParsing(SmallString<32> &WordText) {
+    return isDataTypeQualifier(WordText) || isScopeResolutionOperator(WordText);
+  }
+
   /// Add a token.
   /// Returns true on success, false if there are no interesting tokens to
   /// fetch from lexer.
@@ -192,7 +234,7 @@ class TextTokenRetokenizer {
   }
 
   /// Extract a type argument
-  bool lexDataType(Token &Tok) {
+  bool lexType(Token &Tok) {
     if (isEnd())
       return false;
     Position SavedPos = Pos;
@@ -202,6 +244,8 @@ class TextTokenRetokenizer {
     const char *WordBegin = Pos.BufferPtr;
     SourceLocation Loc = getSourceLocation();
     StringRef ConstVal = StringRef("const");
+    StringRef PointerVal = StringRef("*");
+    StringRef ReferenceVal = StringRef("&");
     bool ConstPointer = false;
 
     while (!isEnd()) {
@@ -215,32 +259,41 @@ class TextTokenRetokenizer {
           consumeChar();
         }
       } else {
-        if (WordText.equals(ConstVal)) {
-          WordText.push_back(C);
+        if (ConstPointer) {
           consumeChar();
-        } else if (WordText.ends_with(StringRef("*")) ||
-                   WordText.ends_with(StringRef("&"))) {
-          NextToken.clear();
-          peekNextToken(NextToken);
-          if (NextToken.equals(ConstVal)) {
-            ConstPointer = true;
-            WordText.push_back(C);
-            consumeChar();
-          } else {
-            consumeChar();
-            break;
-          }
+          break;
         } else {
-          NextToken.clear();
-          peekNextToken(NextToken);
-          if ((NextToken.ends_with(StringRef("*")) ||
-               NextToken.ends_with(StringRef("&"))) &&
-              !ConstPointer) {
+          if (continueParsing(WordText)) {
             WordText.push_back(C);
             consumeChar();
           } else {
-            consumeChar();
-            break;
+            NextToken.clear();
+            peekNextToken(NextToken);
+            if (WordText.ends_with(PointerVal) ||
+                WordText.ends_with(ReferenceVal)) {
+              if (NextToken.equals(ConstVal)) {
+                ConstPointer = true;
+                WordText.push_back(C);
+                consumeChar();
+              } else {
+                consumeChar();
+                break;
+              }
+            } else {
+              if ((NextToken.ends_with(PointerVal) ||
+                   NextToken.ends_with(ReferenceVal))) {
+                WordText.push_back(C);
+                consumeChar();
+              } else {
+                if (continueParsing(NextToken)) {
+                  WordText.push_back(C);
+                  consumeChar();
+                } else {
+                  consumeChar();
+                  break;
+                }
+              }
+            }
           }
         }
       }
@@ -425,7 +478,7 @@ Parser::parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer,
   unsigned ParsedArgs = 0;
   Token Arg;
 
-  while (ParsedArgs < NumArgs && Retokenizer.lexDataType(Arg)) {
+  while (ParsedArgs < NumArgs && Retokenizer.lexType(Arg)) {
     Args[ParsedArgs] = Comment::Argument{
         SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()};
     ParsedArgs++;
diff --git a/clang/unittests/AST/CommentParser.cpp b/clang/unittests/AST/CommentParser.cpp
index e01d654aa1cea2..e2e77dd95a9a50 100644
--- a/clang/unittests/AST/CommentParser.cpp
+++ b/clang/unittests/AST/CommentParser.cpp
@@ -1661,6 +1661,92 @@ TEST_F(CommentParserTest, ThrowsCommandHasArg9) {
   }
 }
 
+TEST_F(CommentParserTest, ThrowsCommandHasArg10) {
+  const char *Sources[] = {
+      "/// @throws const std::map<int, std::string> * pointer to a const map",
+  };
+
+  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+    FullComment *FC = parseString(Sources[i]);
+    ASSERT_TRUE(HasChildCount(FC, 2));
+
+    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+    {
+      BlockCommandComment *BCC;
+      ParagraphComment *PC;
+      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+      ASSERT_TRUE(HasChildCount(PC, 1));
+      ASSERT_TRUE(BCC->getNumArgs() == 1);
+      ASSERT_TRUE(BCC->getArgText(0) == "const std::map<int, std::string> *");
+    }
+  }
+}
+
+TEST_F(CommentParserTest, ThrowsCommandHasArg11) {
+  const char *Sources[] = {
+      "/// @throws const std :: map<int, std :: string> * pointer to a "
+      "const map with spaces",
+  };
+
+  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+    FullComment *FC = parseString(Sources[i]);
+    ASSERT_TRUE(HasChildCount(FC, 2));
+
+    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+    {
+      BlockCommandComment *BCC;
+      ParagraphComment *PC;
+      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+      ASSERT_TRUE(HasChildCount(PC, 1));
+      ASSERT_TRUE(BCC->getNumArgs() == 1);
+      ASSERT_TRUE(BCC->getArgText(0) ==
+                  "const std :: map<int, std :: string> *");
+    }
+  }
+}
+
+TEST_F(CommentParserTest, ThrowsCommandHasArg12) {
+  const char *Sources[] = {
+      "/// @throws volatile int a volatile integer",
+  };
+
+  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+    FullComment *FC = parseString(Sources[i]);
+    ASSERT_TRUE(HasChildCount(FC, 2));
+
+    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+    {
+      BlockCommandComment *BCC;
+      ParagraphComment *PC;
+      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+      ASSERT_TRUE(HasChildCount(PC, 1));
+      ASSERT_TRUE(BCC->getNumArgs() == 1);
+      ASSERT_TRUE(BCC->getArgText(0) == "volatile int");
+    }
+  }
+}
+
+TEST_F(CommentParserTest, ThrowsCommandHasArg13) {
+  const char *Sources[] = {
+      "/// @throws volatile double * volatile pointer to a double",
+  };
+
+  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+    FullComment *FC = parseString(Sources[i]);
+    ASSERT_TRUE(HasChildCount(FC, 2));
+
+    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+    {
+      BlockCommandComment *BCC;
+      ParagraphComment *PC;
+      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+      ASSERT_TRUE(HasChildCount(PC, 1));
+      ASSERT_TRUE(BCC->getNumArgs() == 1);
+      ASSERT_TRUE(BCC->getArgText(0) == "volatile double *");
+    }
+  }
+}
+
 } // unnamed namespace
 
 } // end namespace comments

>From fee8def56a6b32a96551d1d0738ab695bd90b2f9 Mon Sep 17 00:00:00 2001
From: hdoc <github at hdoc.io>
Date: Sun, 24 Mar 2024 00:39:56 -0700
Subject: [PATCH 5/5] Refactor integer argument parsing and add corresponding
 test cases

---
 clang/lib/AST/CommentParser.cpp       | 127 ++++++++++++++++++--------
 clang/unittests/AST/CommentParser.cpp |  42 +++++++++
 2 files changed, 130 insertions(+), 39 deletions(-)

diff --git a/clang/lib/AST/CommentParser.cpp b/clang/lib/AST/CommentParser.cpp
index a2aa3cfc6d5b53..63baebe59cdfd9 100644
--- a/clang/lib/AST/CommentParser.cpp
+++ b/clang/lib/AST/CommentParser.cpp
@@ -108,69 +108,112 @@ class TextTokenRetokenizer {
     }
   }
 
+  bool continueInt(SmallString<32> &NextToken) {
+    return NextToken.ends_with(StringRef("char")) ||
+           NextToken.ends_with(StringRef("int")) ||
+           NextToken.ends_with(StringRef("char*")) ||
+           NextToken.ends_with(StringRef("int*")) ||
+           NextToken.ends_with(StringRef("char&")) ||
+           NextToken.ends_with(StringRef("int&"));
+  }
+
+  bool lexInt(SmallString<32> &WordText, SmallString<32> &NextToken) {
+    unsigned LongCounter = (WordText.ends_with(StringRef("long"))) ? 1 : 0;
+    bool complete = false;
+
+    while (!isEnd()) {
+      const char C = peek();
+      if (!isWhitespace(C)) {
+        WordText.push_back(C);
+        consumeChar();
+      } else {
+
+        NextToken.clear();
+        peekNextToken(NextToken);
+
+        if (WordText.ends_with(StringRef("long"))) {
+          LongCounter++;
+          if (continueInt(NextToken)) {
+            WordText.push_back(C);
+            consumeChar();
+            complete = true;
+            continue;
+          } else {
+            if (LongCounter == 2) {
+              return true;
+            }
+          }
+        } else {
+
+          if (complete || continueInt(WordText)) {
+            return true;
+          }
+        }
+
+        if (NextToken.ends_with(StringRef("long"))) {
+          WordText.push_back(C);
+          consumeChar();
+        } else {
+          return true;
+        }
+      }
+    }
+
+    return false;
+  }
+
   /// Extract a template type
-  bool lexTemplateType(SmallString<32> &WordText) {
+  bool lexTemplate(SmallString<32> &WordText) {
     unsigned IncrementCounter = 0;
     while (!isEnd()) {
       const char C = peek();
       WordText.push_back(C);
       consumeChar();
       switch (C) {
-      default:
-        break;
       case '<': {
         IncrementCounter++;
-      } break;
+        break;
+      }
       case '>': {
         IncrementCounter--;
         if (!IncrementCounter)
           return true;
-      } break;
+        break;
+      }
+      default:
+        break;
       }
     }
     return false;
   }
 
-  bool isDataTypeQualifier(SmallString<32> &WordText) {
-    if (WordText.ends_with(StringRef("const")))
-      return true;
-    if (WordText.ends_with(StringRef("volatile")))
-      return true;
-    if (WordText.ends_with(StringRef("unsigned")))
-      return true;
-    if (WordText.ends_with(StringRef("signed")))
-      return true;
-    if (WordText.ends_with(StringRef("long")))
-      return true;
-    if (WordText.ends_with(StringRef("short")))
-      return true;
-    if (WordText.ends_with(StringRef("restrict")))
-      return true;
-    if (WordText.ends_with(StringRef("auto")))
-      return true;
-    if (WordText.ends_with(StringRef("register")))
-      return true;
-    if (WordText.ends_with(StringRef("static")))
-      return true;
-    if (WordText.ends_with(StringRef("extern")))
-      return true;
-    if (WordText.ends_with(StringRef("struct")))
-      return true;
-    if (WordText.ends_with(StringRef("typedef")))
-      return true;
-    if (WordText.ends_with(StringRef("union")))
-      return true;
-    if (WordText.ends_with(StringRef("void")))
-      return true;
-    return false;
+  bool isTypeQualifier(SmallString<32> &WordText) {
+    return WordText.ends_with(StringRef("const")) ||
+           WordText.ends_with(StringRef("volatile")) ||
+           WordText.ends_with(StringRef("short")) ||
+           WordText.ends_with(StringRef("restrict")) ||
+           WordText.ends_with(StringRef("auto")) ||
+           WordText.ends_with(StringRef("register")) ||
+           WordText.ends_with(StringRef("static")) ||
+           WordText.ends_with(StringRef("extern")) ||
+           WordText.ends_with(StringRef("struct")) ||
+           WordText.ends_with(StringRef("typedef")) ||
+           WordText.ends_with(StringRef("union")) ||
+           WordText.ends_with(StringRef("void"));
   }
 
   bool isScopeResolutionOperator(SmallString<32> &WordText) {
     return WordText.ends_with(StringRef("::"));
   }
 
+  bool isInt(SmallString<32> &WordText) {
+    return WordText.ends_with(StringRef("unsigned")) ||
+           WordText.ends_with(StringRef("long")) ||
+           WordText.ends_with(StringRef("signed"));
+  }
+
   bool continueParsing(SmallString<32> &WordText) {
-    return isDataTypeQualifier(WordText) || isScopeResolutionOperator(WordText);
+    return isTypeQualifier(WordText) || isScopeResolutionOperator(WordText);
   }
 
   /// Add a token.
@@ -252,7 +295,7 @@ class TextTokenRetokenizer {
       const char C = peek();
       if (!isWhitespace(C)) {
         if (C == '<') {
-          if (!lexTemplateType(WordText))
+          if (!lexTemplate(WordText))
             return false;
         } else {
           WordText.push_back(C);
@@ -263,6 +306,12 @@ class TextTokenRetokenizer {
           consumeChar();
           break;
         } else {
+          if (isInt(WordText)) {
+            WordText.push_back(C);
+            consumeChar();
+            if (!lexInt(WordText, NextToken))
+              return false;
+          }
           if (continueParsing(WordText)) {
             WordText.push_back(C);
             consumeChar();
diff --git a/clang/unittests/AST/CommentParser.cpp b/clang/unittests/AST/CommentParser.cpp
index e2e77dd95a9a50..385a98e2a036b9 100644
--- a/clang/unittests/AST/CommentParser.cpp
+++ b/clang/unittests/AST/CommentParser.cpp
@@ -1747,6 +1747,48 @@ TEST_F(CommentParserTest, ThrowsCommandHasArg13) {
   }
 }
 
+TEST_F(CommentParserTest, ThrowsCommandHasArg14) {
+  const char *Sources[] = {
+      "/// @throws unsigned long at least a 32-bit integer",
+  };
+
+  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+    FullComment *FC = parseString(Sources[i]);
+    ASSERT_TRUE(HasChildCount(FC, 2));
+
+    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+    {
+      BlockCommandComment *BCC;
+      ParagraphComment *PC;
+      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+      ASSERT_TRUE(HasChildCount(PC, 1));
+      ASSERT_TRUE(BCC->getNumArgs() == 1);
+      ASSERT_TRUE(BCC->getArgText(0) == "unsigned long");
+    }
+  }
+}
+
+TEST_F(CommentParserTest, ThrowsCommandHasArg15) {
+  const char *Sources[] = {
+      "/// @throws unsigned long long at least a 64-bit integer",
+  };
+
+  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+    FullComment *FC = parseString(Sources[i]);
+    ASSERT_TRUE(HasChildCount(FC, 2));
+
+    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+    {
+      BlockCommandComment *BCC;
+      ParagraphComment *PC;
+      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+      ASSERT_TRUE(HasChildCount(PC, 1));
+      ASSERT_TRUE(BCC->getNumArgs() == 1);
+      ASSERT_TRUE(BCC->getArgText(0) == "unsigned long long");
+    }
+  }
+}
+
 } // unnamed namespace
 
 } // end namespace comments



More information about the cfe-commits mailing list