r176461 - Comment parsing: refactor handling of command markers in AST

Dmitri Gribenko gribozavr at gmail.com
Mon Mar 4 15:06:15 PST 2013


Author: gribozavr
Date: Mon Mar  4 17:06:15 2013
New Revision: 176461

URL: http://llvm.org/viewvc/llvm-project?rev=176461&view=rev
Log:
Comment parsing: refactor handling of command markers in AST

* Use the term 'command marker', because the semantics of 'backslash' and 'at'
  commands are the same.  (Talking about 'at commands' makes them look like a
  special entity.)

* Sink the flag down into bitfields, reducing the size of AST nodes.

* Change the flag into an enum for clarity.  Boolean function parameters are
  not very clear.

* Add unittests for new tok::at_command tokens.

Modified:
    cfe/trunk/include/clang/AST/Comment.h
    cfe/trunk/include/clang/AST/CommentLexer.h
    cfe/trunk/include/clang/AST/CommentSema.h
    cfe/trunk/lib/AST/CommentLexer.cpp
    cfe/trunk/lib/AST/CommentParser.cpp
    cfe/trunk/lib/AST/CommentSema.cpp
    cfe/trunk/unittests/AST/CommentLexer.cpp

Modified: cfe/trunk/include/clang/AST/Comment.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/Comment.h?rev=176461&r1=176460&r2=176461&view=diff
==============================================================================
--- cfe/trunk/include/clang/AST/Comment.h (original)
+++ cfe/trunk/include/clang/AST/Comment.h Mon Mar  4 17:06:15 2013
@@ -28,6 +28,26 @@ class TemplateParameterList;
 
 namespace comments {
 class FullComment;
+
+/// Describes the syntax that was used in a documentation command.
+///
+/// Exact values of this enumeration are important because they used to select
+/// parts of diagnostic messages.  Audit diagnostics before changing or adding
+/// a new value.
+enum CommandMarkerKind {
+  /// Command started with a backslash character:
+  /// \code
+  ///   \foo
+  /// \endcode
+  CMK_Backslash = 0,
+
+  /// Command started with an 'at' character:
+  /// \code
+  ///   @foo
+  /// \endcode
+  CMK_At = 1
+};
+
 /// Any part of the comment.
 /// Abstract class.
 class Comment {
@@ -110,8 +130,12 @@ protected:
     unsigned : NumCommentBits;
 
     unsigned CommandID : 8;
+
+    /// Describes the syntax that was used in a documentation command.
+    /// Contains values from CommandMarkerKind enum.
+    unsigned CommandMarker : 1;
   };
-  enum { NumBlockCommandCommentBits = NumCommentBits + 8 };
+  enum { NumBlockCommandCommentBits = NumCommentBits + 9 };
 
   class ParamCommandCommentBitfields {
     friend class ParamCommandComment;
@@ -570,30 +594,29 @@ protected:
 
   /// Paragraph argument.
   ParagraphComment *Paragraph;
-  
-  /// Header Doc command, if true
-  bool AtCommand;
-  
+
   BlockCommandComment(CommentKind K,
                       SourceLocation LocBegin,
                       SourceLocation LocEnd,
                       unsigned CommandID,
-                      bool AtCommand) :
+                      CommandMarkerKind CommandMarker) :
       BlockContentComment(K, LocBegin, LocEnd),
-      Paragraph(NULL), AtCommand(AtCommand) {
+      Paragraph(NULL) {
     setLocation(getCommandNameBeginLoc());
     BlockCommandCommentBits.CommandID = CommandID;
+    BlockCommandCommentBits.CommandMarker = CommandMarker;
   }
 
 public:
   BlockCommandComment(SourceLocation LocBegin,
                       SourceLocation LocEnd,
                       unsigned CommandID,
-                      bool AtCommand) :
+                      CommandMarkerKind CommandMarker) :
       BlockContentComment(BlockCommandCommentKind, LocBegin, LocEnd),
-      Paragraph(NULL), AtCommand(AtCommand) {
+      Paragraph(NULL) {
     setLocation(getCommandNameBeginLoc());
     BlockCommandCommentBits.CommandID = CommandID;
+    BlockCommandCommentBits.CommandMarker = CommandMarker;
   }
 
   static bool classof(const Comment *C) {
@@ -662,9 +685,10 @@ public:
     if (NewLocEnd.isValid())
       setSourceRange(SourceRange(getLocStart(), NewLocEnd));
   }
-  
-  bool getAtCommand() const LLVM_READONLY {
-    return AtCommand;
+
+  CommandMarkerKind getCommandMarker() const LLVM_READONLY {
+    return static_cast<CommandMarkerKind>(
+        BlockCommandCommentBits.CommandMarker);
   }
 };
 
@@ -680,9 +704,9 @@ public:
   ParamCommandComment(SourceLocation LocBegin,
                       SourceLocation LocEnd,
                       unsigned CommandID,
-                      bool AtCommand) :
+                      CommandMarkerKind CommandMarker) :
       BlockCommandComment(ParamCommandCommentKind, LocBegin, LocEnd,
-                          CommandID, AtCommand),
+                          CommandID, CommandMarker),
       ParamIndex(InvalidParamIndex) {
     ParamCommandCommentBits.Direction = In;
     ParamCommandCommentBits.IsDirectionExplicit = false;
@@ -763,9 +787,9 @@ public:
   TParamCommandComment(SourceLocation LocBegin,
                        SourceLocation LocEnd,
                        unsigned CommandID,
-                       bool AtCommand) :
+                       CommandMarkerKind CommandMarker) :
       BlockCommandComment(TParamCommandCommentKind, LocBegin, LocEnd, CommandID,
-                          AtCommand)
+                          CommandMarker)
   { }
 
   static bool classof(const Comment *C) {
@@ -846,7 +870,8 @@ public:
                        SourceLocation LocEnd,
                        unsigned CommandID) :
       BlockCommandComment(VerbatimBlockCommentKind,
-                          LocBegin, LocEnd, CommandID, false)
+                          LocBegin, LocEnd, CommandID,
+                          CMK_At) // FIXME: improve source fidelity.
   { }
 
   static bool classof(const Comment *C) {
@@ -899,7 +924,8 @@ public:
                       StringRef Text) :
       BlockCommandComment(VerbatimLineCommentKind,
                           LocBegin, LocEnd,
-                          CommandID, false),
+                          CommandID,
+                          CMK_At), // FIXME: improve source fidelity.
       Text(Text),
       TextBegin(TextBegin)
   { }

Modified: cfe/trunk/include/clang/AST/CommentLexer.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/CommentLexer.h?rev=176461&r1=176460&r2=176461&view=diff
==============================================================================
--- cfe/trunk/include/clang/AST/CommentLexer.h (original)
+++ cfe/trunk/include/clang/AST/CommentLexer.h Mon Mar  4 17:06:15 2013
@@ -34,9 +34,9 @@ enum TokenKind {
   eof,
   newline,
   text,
-  unknown_command, // Command that does not have an ID.
-  backslash_command,   // \Command with an ID.
-  at_command,          // @command with an ID.
+  unknown_command,   // Command that does not have an ID.
+  backslash_command, // Command with an ID, that used backslash marker.
+  at_command,        // Command with an ID, that used 'at' marker.
   verbatim_block_begin,
   verbatim_block_line,
   verbatim_block_end,

Modified: cfe/trunk/include/clang/AST/CommentSema.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/CommentSema.h?rev=176461&r1=176460&r2=176461&view=diff
==============================================================================
--- cfe/trunk/include/clang/AST/CommentSema.h (original)
+++ cfe/trunk/include/clang/AST/CommentSema.h Mon Mar  4 17:06:15 2013
@@ -97,7 +97,7 @@ public:
   BlockCommandComment *actOnBlockCommandStart(SourceLocation LocBegin,
                                               SourceLocation LocEnd,
                                               unsigned CommandID,
-                                              bool AtCommand);
+                                              CommandMarkerKind CommandMarker);
 
   void actOnBlockCommandArgs(BlockCommandComment *Command,
                              ArrayRef<BlockCommandComment::Argument> Args);
@@ -108,7 +108,7 @@ public:
   ParamCommandComment *actOnParamCommandStart(SourceLocation LocBegin,
                                               SourceLocation LocEnd,
                                               unsigned CommandID,
-                                              bool AtCommand);
+                                              CommandMarkerKind CommandMarker);
 
   void actOnParamCommandDirectionArg(ParamCommandComment *Command,
                                      SourceLocation ArgLocBegin,
@@ -126,7 +126,7 @@ public:
   TParamCommandComment *actOnTParamCommandStart(SourceLocation LocBegin,
                                                 SourceLocation LocEnd,
                                                 unsigned CommandID,
-                                                bool AtCommand);
+                                                CommandMarkerKind CommandMarker);
 
   void actOnTParamCommandParamNameArg(TParamCommandComment *Command,
                                       SourceLocation ArgLocBegin,

Modified: cfe/trunk/lib/AST/CommentLexer.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/CommentLexer.cpp?rev=176461&r1=176460&r2=176461&view=diff
==============================================================================
--- cfe/trunk/lib/AST/CommentLexer.cpp (original)
+++ cfe/trunk/lib/AST/CommentLexer.cpp Mon Mar  4 17:06:15 2013
@@ -298,7 +298,11 @@ void Lexer::lexCommentText(Token &T) {
     switch(*TokenPtr) {
       case '\\':
       case '@': {
-        bool AtCommand = (*TokenPtr == '@');
+        // Commands that start with a backslash and commands that start with
+        // 'at' have equivalent semantics.  But we keep information about the
+        // exact syntax in AST for comments.
+        tok::TokenKind CommandKind =
+            (*TokenPtr == '@') ? tok::at_command : tok::backslash_command;
         TokenPtr++;
         if (TokenPtr == CommentEnd) {
           formTextToken(T, TokenPtr);
@@ -359,9 +363,7 @@ void Lexer::lexCommentText(Token &T) {
           setupAndLexVerbatimLine(T, TokenPtr, Info);
           return;
         }
-        formTokenWithChars(T, TokenPtr, 
-                           (AtCommand ? tok::at_command 
-                                      : tok::backslash_command));
+        formTokenWithChars(T, TokenPtr, CommandKind);
         T.setCommandID(Info->getID());
         return;
       }

Modified: cfe/trunk/lib/AST/CommentParser.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/CommentParser.cpp?rev=176461&r1=176460&r2=176461&view=diff
==============================================================================
--- cfe/trunk/lib/AST/CommentParser.cpp (original)
+++ cfe/trunk/lib/AST/CommentParser.cpp Mon Mar  4 17:06:15 2013
@@ -308,23 +308,25 @@ BlockCommandComment *Parser::parseBlockC
   bool IsParam = false;
   bool IsTParam = false;
   const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
+  CommandMarkerKind CommandMarker =
+      Tok.is(tok::backslash_command) ? CMK_Backslash : CMK_At;
   if (Info->IsParamCommand) {
     IsParam = true;
     PC = S.actOnParamCommandStart(Tok.getLocation(),
                                   Tok.getEndLocation(),
                                   Tok.getCommandID(),
-                                  Tok.is(tok::at_command));
+                                  CommandMarker);
   } else if (Info->IsTParamCommand) {
     IsTParam = true;
     TPC = S.actOnTParamCommandStart(Tok.getLocation(),
                                     Tok.getEndLocation(),
                                     Tok.getCommandID(),
-                                    Tok.is(tok::at_command));
+                                    CommandMarker);
   } else {
     BC = S.actOnBlockCommandStart(Tok.getLocation(),
                                   Tok.getEndLocation(),
                                   Tok.getCommandID(),
-                                  Tok.is(tok::at_command));
+                                  CommandMarker);
   }
   consumeToken();
 

Modified: cfe/trunk/lib/AST/CommentSema.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/CommentSema.cpp?rev=176461&r1=176460&r2=176461&view=diff
==============================================================================
--- cfe/trunk/lib/AST/CommentSema.cpp (original)
+++ cfe/trunk/lib/AST/CommentSema.cpp Mon Mar  4 17:06:15 2013
@@ -47,11 +47,13 @@ ParagraphComment *Sema::actOnParagraphCo
   return new (Allocator) ParagraphComment(Content);
 }
 
-BlockCommandComment *Sema::actOnBlockCommandStart(SourceLocation LocBegin,
-                                                  SourceLocation LocEnd,
-                                                  unsigned CommandID,
-                                                  bool AtCommand) {
-  return new (Allocator) BlockCommandComment(LocBegin, LocEnd, CommandID, AtCommand);
+BlockCommandComment *Sema::actOnBlockCommandStart(
+                                      SourceLocation LocBegin,
+                                      SourceLocation LocEnd,
+                                      unsigned CommandID,
+                                      CommandMarkerKind CommandMarker) {
+  return new (Allocator) BlockCommandComment(LocBegin, LocEnd, CommandID,
+                                             CommandMarker);
 }
 
 void Sema::actOnBlockCommandArgs(BlockCommandComment *Command,
@@ -68,17 +70,19 @@ void Sema::actOnBlockCommandFinish(Block
   checkDeprecatedCommand(Command);
 }
 
-ParamCommandComment *Sema::actOnParamCommandStart(SourceLocation LocBegin,
-                                                  SourceLocation LocEnd,
-                                                  unsigned CommandID,
-                                                  bool AtCommand) {
+ParamCommandComment *Sema::actOnParamCommandStart(
+                                      SourceLocation LocBegin,
+                                      SourceLocation LocEnd,
+                                      unsigned CommandID,
+                                      CommandMarkerKind CommandMarker) {
   ParamCommandComment *Command =
-      new (Allocator) ParamCommandComment(LocBegin, LocEnd, CommandID, AtCommand);
+      new (Allocator) ParamCommandComment(LocBegin, LocEnd, CommandID,
+                                          CommandMarker);
 
   if (!isFunctionDecl())
     Diag(Command->getLocation(),
          diag::warn_doc_param_not_attached_to_a_function_decl)
-      << AtCommand
+      << CommandMarker
       << Command->getCommandNameRange(Traits);
 
   return Command;
@@ -163,17 +167,19 @@ void Sema::actOnParamCommandFinish(Param
   checkBlockCommandEmptyParagraph(Command);
 }
 
-TParamCommandComment *Sema::actOnTParamCommandStart(SourceLocation LocBegin,
-                                                    SourceLocation LocEnd,
-                                                    unsigned CommandID,
-                                                    bool AtCommand) {
+TParamCommandComment *Sema::actOnTParamCommandStart(
+                                      SourceLocation LocBegin,
+                                      SourceLocation LocEnd,
+                                      unsigned CommandID,
+                                      CommandMarkerKind CommandMarker) {
   TParamCommandComment *Command =
-      new (Allocator) TParamCommandComment(LocBegin, LocEnd, CommandID, AtCommand);
+      new (Allocator) TParamCommandComment(LocBegin, LocEnd, CommandID,
+                                           CommandMarker);
 
   if (!isTemplateOrSpecialization())
     Diag(Command->getLocation(),
          diag::warn_doc_tparam_not_attached_to_a_template_decl)
-      << AtCommand
+      << CommandMarker
       << Command->getCommandNameRange(Traits);
 
   return Command;
@@ -437,7 +443,7 @@ void Sema::checkBlockCommandEmptyParagra
     if (!DiagLoc.isValid())
       DiagLoc = Command->getCommandNameRange(Traits).getEnd();
     Diag(DiagLoc, diag::warn_doc_block_command_empty_paragraph)
-      << Command->getAtCommand()
+      << Command->getCommandMarker()
       << Command->getCommandName(Traits)
       << Command->getSourceRange();
   }
@@ -465,7 +471,7 @@ void Sema::checkReturnsCommand(const Blo
       }
       Diag(Command->getLocation(),
            diag::warn_doc_returns_attached_to_a_void_function)
-        << Command->getAtCommand()
+        << Command->getCommandMarker()
         << Command->getCommandName(Traits)
         << DiagKind
         << Command->getSourceRange();
@@ -477,7 +483,7 @@ void Sema::checkReturnsCommand(const Blo
   
   Diag(Command->getLocation(),
        diag::warn_doc_returns_not_attached_to_a_function_decl)
-    << Command->getAtCommand()
+    << Command->getCommandMarker()
     << Command->getCommandName(Traits)
     << Command->getSourceRange();
 }
@@ -510,18 +516,18 @@ void Sema::checkBlockCommandDuplicate(co
   StringRef CommandName = Command->getCommandName(Traits);
   StringRef PrevCommandName = PrevCommand->getCommandName(Traits);
   Diag(Command->getLocation(), diag::warn_doc_block_command_duplicate)
-      << Command->getAtCommand()
+      << Command->getCommandMarker()
       << CommandName
       << Command->getSourceRange();
   if (CommandName == PrevCommandName)
     Diag(PrevCommand->getLocation(), diag::note_doc_block_command_previous)
-        << PrevCommand->getAtCommand()
+        << PrevCommand->getCommandMarker()
         << PrevCommandName
         << PrevCommand->getSourceRange();
   else
     Diag(PrevCommand->getLocation(),
          diag::note_doc_block_command_previous_alias)
-        << PrevCommand->getAtCommand()
+        << PrevCommand->getCommandMarker()
         << PrevCommandName
         << CommandName;
 }

Modified: cfe/trunk/unittests/AST/CommentLexer.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/AST/CommentLexer.cpp?rev=176461&r1=176460&r2=176461&view=diff
==============================================================================
--- cfe/trunk/unittests/AST/CommentLexer.cpp (original)
+++ cfe/trunk/unittests/AST/CommentLexer.cpp Mon Mar  4 17:06:15 2013
@@ -301,8 +301,10 @@ TEST_F(CommentLexerTest, DoxygenCommand3
 
 // Doxygen escape sequences.
 TEST_F(CommentLexerTest, DoxygenCommand4) {
-  const char *Source =
-    "/// \\\\ \\@ \\& \\$ \\# \\< \\> \\% \\\" \\. \\::";
+  const char *Sources[] = {
+    "/// \\\\ \\@ \\& \\$ \\# \\< \\> \\% \\\" \\. \\::",
+    "/// @\\ @@ @& @$ @# @< @> @% @\" @. @::"
+  };
   const char *Text[] = {
     " ",
     "\\", " ", "@", " ", "&", " ", "$",  " ", "#", " ",
@@ -310,16 +312,18 @@ TEST_F(CommentLexerTest, DoxygenCommand4
     "::", ""
   };
 
-  std::vector<Token> Toks;
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+    std::vector<Token> Toks;
 
-  lexString(Source, Toks);
+    lexString(Sources[i], Toks);
 
-  ASSERT_EQ(array_lengthof(Text), Toks.size());
+    ASSERT_EQ(array_lengthof(Text), Toks.size());
 
-  for (size_t i = 0, e = Toks.size(); i != e; i++) {
-    if(Toks[i].is(tok::text))
-      ASSERT_EQ(StringRef(Text[i]), Toks[i].getText())
-        << "index " << i;
+    for (size_t j = 0, e = Toks.size(); j != e; j++) {
+      if(Toks[j].is(tok::text))
+        ASSERT_EQ(StringRef(Text[j]), Toks[j].getText())
+          << "index " << i;
+    }
   }
 }
 
@@ -404,6 +408,38 @@ TEST_F(CommentLexerTest, DoxygenCommand7
 }
 
 TEST_F(CommentLexerTest, DoxygenCommand8) {
+  const char *Source = "/// @em at em @em\t at em\n";
+  std::vector<Token> Toks;
+
+  lexString(Source, Toks);
+
+  ASSERT_EQ(8U, Toks.size());
+
+  ASSERT_EQ(tok::text,       Toks[0].getKind());
+  ASSERT_EQ(StringRef(" "),  Toks[0].getText());
+
+  ASSERT_EQ(tok::at_command, Toks[1].getKind());
+  ASSERT_EQ(StringRef("em"), getCommandName(Toks[1]));
+
+  ASSERT_EQ(tok::at_command, Toks[2].getKind());
+  ASSERT_EQ(StringRef("em"), getCommandName(Toks[2]));
+
+  ASSERT_EQ(tok::text,       Toks[3].getKind());
+  ASSERT_EQ(StringRef(" "),  Toks[3].getText());
+
+  ASSERT_EQ(tok::at_command, Toks[4].getKind());
+  ASSERT_EQ(StringRef("em"), getCommandName(Toks[4]));
+
+  ASSERT_EQ(tok::text,       Toks[5].getKind());
+  ASSERT_EQ(StringRef("\t"), Toks[5].getText());
+
+  ASSERT_EQ(tok::at_command, Toks[6].getKind());
+  ASSERT_EQ(StringRef("em"), getCommandName(Toks[6]));
+
+  ASSERT_EQ(tok::newline,    Toks[7].getKind());
+}
+
+TEST_F(CommentLexerTest, DoxygenCommand9) {
   const char *Source = "/// \\aaa\\bbb \\ccc\t\\ddd\n";
   std::vector<Token> Toks;
 
@@ -435,7 +471,7 @@ TEST_F(CommentLexerTest, DoxygenCommand8
   ASSERT_EQ(tok::newline,     Toks[7].getKind());
 }
 
-TEST_F(CommentLexerTest, DoxygenCommand9) {
+TEST_F(CommentLexerTest, DoxygenCommand10) {
   const char *Source = "// \\c\n";
   std::vector<Token> Toks;
 
@@ -453,7 +489,9 @@ TEST_F(CommentLexerTest, DoxygenCommand9
 }
 
 TEST_F(CommentLexerTest, RegisterCustomBlockCommand) {
-  const char *Source = "/// \\NewBlockCommand Aaa.\n";
+  const char *Source =
+    "/// \\NewBlockCommand Aaa.\n"
+    "/// @NewBlockCommand Aaa.\n";
 
   Traits.registerBlockCommand(StringRef("NewBlockCommand"));
 
@@ -461,10 +499,10 @@ TEST_F(CommentLexerTest, RegisterCustomB
 
   lexString(Source, Toks);
 
-  ASSERT_EQ(4U, Toks.size());
+  ASSERT_EQ(8U, Toks.size());
 
-  ASSERT_EQ(tok::text,      Toks[0].getKind());
-  ASSERT_EQ(StringRef(" "), Toks[0].getText());
+  ASSERT_EQ(tok::text,          Toks[0].getKind());
+  ASSERT_EQ(StringRef(" "),     Toks[0].getText());
 
   ASSERT_EQ(tok::backslash_command, Toks[1].getKind());
   ASSERT_EQ(StringRef("NewBlockCommand"), getCommandName(Toks[1]));
@@ -472,7 +510,18 @@ TEST_F(CommentLexerTest, RegisterCustomB
   ASSERT_EQ(tok::text,          Toks[2].getKind());
   ASSERT_EQ(StringRef(" Aaa."), Toks[2].getText());
 
-  ASSERT_EQ(tok::newline,        Toks[3].getKind());
+  ASSERT_EQ(tok::newline,       Toks[3].getKind());
+
+  ASSERT_EQ(tok::text,          Toks[4].getKind());
+  ASSERT_EQ(StringRef(" "),     Toks[4].getText());
+
+  ASSERT_EQ(tok::at_command,    Toks[5].getKind());
+  ASSERT_EQ(StringRef("NewBlockCommand"), getCommandName(Toks[5]));
+
+  ASSERT_EQ(tok::text,          Toks[6].getKind());
+  ASSERT_EQ(StringRef(" Aaa."), Toks[6].getText());
+
+  ASSERT_EQ(tok::newline,       Toks[7].getKind());
 }
 
 TEST_F(CommentLexerTest, RegisterMultipleBlockCommands) {





More information about the cfe-commits mailing list