r176461 - Comment parsing: refactor handling of command markers in AST
Dmitri Gribenko
gribozavr at gmail.com
Mon Mar 4 15:06:15 PST 2013
Author: gribozavr
Date: Mon Mar 4 17:06:15 2013
New Revision: 176461
URL: http://llvm.org/viewvc/llvm-project?rev=176461&view=rev
Log:
Comment parsing: refactor handling of command markers in AST
* Use the term 'command marker', because the semantics of 'backslash' and 'at'
commands are the same. (Talking about 'at commands' makes them look like a
special entity.)
* Sink the flag down into bitfields, reducing the size of AST nodes.
* Change the flag into an enum for clarity. Boolean function parameters are
not very clear.
* Add unittests for new tok::at_command tokens.
Modified:
cfe/trunk/include/clang/AST/Comment.h
cfe/trunk/include/clang/AST/CommentLexer.h
cfe/trunk/include/clang/AST/CommentSema.h
cfe/trunk/lib/AST/CommentLexer.cpp
cfe/trunk/lib/AST/CommentParser.cpp
cfe/trunk/lib/AST/CommentSema.cpp
cfe/trunk/unittests/AST/CommentLexer.cpp
Modified: cfe/trunk/include/clang/AST/Comment.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/Comment.h?rev=176461&r1=176460&r2=176461&view=diff
==============================================================================
--- cfe/trunk/include/clang/AST/Comment.h (original)
+++ cfe/trunk/include/clang/AST/Comment.h Mon Mar 4 17:06:15 2013
@@ -28,6 +28,26 @@ class TemplateParameterList;
namespace comments {
class FullComment;
+
+/// Describes the syntax that was used in a documentation command.
+///
+/// Exact values of this enumeration are important because they used to select
+/// parts of diagnostic messages. Audit diagnostics before changing or adding
+/// a new value.
+enum CommandMarkerKind {
+ /// Command started with a backslash character:
+ /// \code
+ /// \foo
+ /// \endcode
+ CMK_Backslash = 0,
+
+ /// Command started with an 'at' character:
+ /// \code
+ /// @foo
+ /// \endcode
+ CMK_At = 1
+};
+
/// Any part of the comment.
/// Abstract class.
class Comment {
@@ -110,8 +130,12 @@ protected:
unsigned : NumCommentBits;
unsigned CommandID : 8;
+
+ /// Describes the syntax that was used in a documentation command.
+ /// Contains values from CommandMarkerKind enum.
+ unsigned CommandMarker : 1;
};
- enum { NumBlockCommandCommentBits = NumCommentBits + 8 };
+ enum { NumBlockCommandCommentBits = NumCommentBits + 9 };
class ParamCommandCommentBitfields {
friend class ParamCommandComment;
@@ -570,30 +594,29 @@ protected:
/// Paragraph argument.
ParagraphComment *Paragraph;
-
- /// Header Doc command, if true
- bool AtCommand;
-
+
BlockCommandComment(CommentKind K,
SourceLocation LocBegin,
SourceLocation LocEnd,
unsigned CommandID,
- bool AtCommand) :
+ CommandMarkerKind CommandMarker) :
BlockContentComment(K, LocBegin, LocEnd),
- Paragraph(NULL), AtCommand(AtCommand) {
+ Paragraph(NULL) {
setLocation(getCommandNameBeginLoc());
BlockCommandCommentBits.CommandID = CommandID;
+ BlockCommandCommentBits.CommandMarker = CommandMarker;
}
public:
BlockCommandComment(SourceLocation LocBegin,
SourceLocation LocEnd,
unsigned CommandID,
- bool AtCommand) :
+ CommandMarkerKind CommandMarker) :
BlockContentComment(BlockCommandCommentKind, LocBegin, LocEnd),
- Paragraph(NULL), AtCommand(AtCommand) {
+ Paragraph(NULL) {
setLocation(getCommandNameBeginLoc());
BlockCommandCommentBits.CommandID = CommandID;
+ BlockCommandCommentBits.CommandMarker = CommandMarker;
}
static bool classof(const Comment *C) {
@@ -662,9 +685,10 @@ public:
if (NewLocEnd.isValid())
setSourceRange(SourceRange(getLocStart(), NewLocEnd));
}
-
- bool getAtCommand() const LLVM_READONLY {
- return AtCommand;
+
+ CommandMarkerKind getCommandMarker() const LLVM_READONLY {
+ return static_cast<CommandMarkerKind>(
+ BlockCommandCommentBits.CommandMarker);
}
};
@@ -680,9 +704,9 @@ public:
ParamCommandComment(SourceLocation LocBegin,
SourceLocation LocEnd,
unsigned CommandID,
- bool AtCommand) :
+ CommandMarkerKind CommandMarker) :
BlockCommandComment(ParamCommandCommentKind, LocBegin, LocEnd,
- CommandID, AtCommand),
+ CommandID, CommandMarker),
ParamIndex(InvalidParamIndex) {
ParamCommandCommentBits.Direction = In;
ParamCommandCommentBits.IsDirectionExplicit = false;
@@ -763,9 +787,9 @@ public:
TParamCommandComment(SourceLocation LocBegin,
SourceLocation LocEnd,
unsigned CommandID,
- bool AtCommand) :
+ CommandMarkerKind CommandMarker) :
BlockCommandComment(TParamCommandCommentKind, LocBegin, LocEnd, CommandID,
- AtCommand)
+ CommandMarker)
{ }
static bool classof(const Comment *C) {
@@ -846,7 +870,8 @@ public:
SourceLocation LocEnd,
unsigned CommandID) :
BlockCommandComment(VerbatimBlockCommentKind,
- LocBegin, LocEnd, CommandID, false)
+ LocBegin, LocEnd, CommandID,
+ CMK_At) // FIXME: improve source fidelity.
{ }
static bool classof(const Comment *C) {
@@ -899,7 +924,8 @@ public:
StringRef Text) :
BlockCommandComment(VerbatimLineCommentKind,
LocBegin, LocEnd,
- CommandID, false),
+ CommandID,
+ CMK_At), // FIXME: improve source fidelity.
Text(Text),
TextBegin(TextBegin)
{ }
Modified: cfe/trunk/include/clang/AST/CommentLexer.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/CommentLexer.h?rev=176461&r1=176460&r2=176461&view=diff
==============================================================================
--- cfe/trunk/include/clang/AST/CommentLexer.h (original)
+++ cfe/trunk/include/clang/AST/CommentLexer.h Mon Mar 4 17:06:15 2013
@@ -34,9 +34,9 @@ enum TokenKind {
eof,
newline,
text,
- unknown_command, // Command that does not have an ID.
- backslash_command, // \Command with an ID.
- at_command, // @command with an ID.
+ unknown_command, // Command that does not have an ID.
+ backslash_command, // Command with an ID, that used backslash marker.
+ at_command, // Command with an ID, that used 'at' marker.
verbatim_block_begin,
verbatim_block_line,
verbatim_block_end,
Modified: cfe/trunk/include/clang/AST/CommentSema.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/CommentSema.h?rev=176461&r1=176460&r2=176461&view=diff
==============================================================================
--- cfe/trunk/include/clang/AST/CommentSema.h (original)
+++ cfe/trunk/include/clang/AST/CommentSema.h Mon Mar 4 17:06:15 2013
@@ -97,7 +97,7 @@ public:
BlockCommandComment *actOnBlockCommandStart(SourceLocation LocBegin,
SourceLocation LocEnd,
unsigned CommandID,
- bool AtCommand);
+ CommandMarkerKind CommandMarker);
void actOnBlockCommandArgs(BlockCommandComment *Command,
ArrayRef<BlockCommandComment::Argument> Args);
@@ -108,7 +108,7 @@ public:
ParamCommandComment *actOnParamCommandStart(SourceLocation LocBegin,
SourceLocation LocEnd,
unsigned CommandID,
- bool AtCommand);
+ CommandMarkerKind CommandMarker);
void actOnParamCommandDirectionArg(ParamCommandComment *Command,
SourceLocation ArgLocBegin,
@@ -126,7 +126,7 @@ public:
TParamCommandComment *actOnTParamCommandStart(SourceLocation LocBegin,
SourceLocation LocEnd,
unsigned CommandID,
- bool AtCommand);
+ CommandMarkerKind CommandMarker);
void actOnTParamCommandParamNameArg(TParamCommandComment *Command,
SourceLocation ArgLocBegin,
Modified: cfe/trunk/lib/AST/CommentLexer.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/CommentLexer.cpp?rev=176461&r1=176460&r2=176461&view=diff
==============================================================================
--- cfe/trunk/lib/AST/CommentLexer.cpp (original)
+++ cfe/trunk/lib/AST/CommentLexer.cpp Mon Mar 4 17:06:15 2013
@@ -298,7 +298,11 @@ void Lexer::lexCommentText(Token &T) {
switch(*TokenPtr) {
case '\\':
case '@': {
- bool AtCommand = (*TokenPtr == '@');
+ // Commands that start with a backslash and commands that start with
+ // 'at' have equivalent semantics. But we keep information about the
+ // exact syntax in AST for comments.
+ tok::TokenKind CommandKind =
+ (*TokenPtr == '@') ? tok::at_command : tok::backslash_command;
TokenPtr++;
if (TokenPtr == CommentEnd) {
formTextToken(T, TokenPtr);
@@ -359,9 +363,7 @@ void Lexer::lexCommentText(Token &T) {
setupAndLexVerbatimLine(T, TokenPtr, Info);
return;
}
- formTokenWithChars(T, TokenPtr,
- (AtCommand ? tok::at_command
- : tok::backslash_command));
+ formTokenWithChars(T, TokenPtr, CommandKind);
T.setCommandID(Info->getID());
return;
}
Modified: cfe/trunk/lib/AST/CommentParser.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/CommentParser.cpp?rev=176461&r1=176460&r2=176461&view=diff
==============================================================================
--- cfe/trunk/lib/AST/CommentParser.cpp (original)
+++ cfe/trunk/lib/AST/CommentParser.cpp Mon Mar 4 17:06:15 2013
@@ -308,23 +308,25 @@ BlockCommandComment *Parser::parseBlockC
bool IsParam = false;
bool IsTParam = false;
const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
+ CommandMarkerKind CommandMarker =
+ Tok.is(tok::backslash_command) ? CMK_Backslash : CMK_At;
if (Info->IsParamCommand) {
IsParam = true;
PC = S.actOnParamCommandStart(Tok.getLocation(),
Tok.getEndLocation(),
Tok.getCommandID(),
- Tok.is(tok::at_command));
+ CommandMarker);
} else if (Info->IsTParamCommand) {
IsTParam = true;
TPC = S.actOnTParamCommandStart(Tok.getLocation(),
Tok.getEndLocation(),
Tok.getCommandID(),
- Tok.is(tok::at_command));
+ CommandMarker);
} else {
BC = S.actOnBlockCommandStart(Tok.getLocation(),
Tok.getEndLocation(),
Tok.getCommandID(),
- Tok.is(tok::at_command));
+ CommandMarker);
}
consumeToken();
Modified: cfe/trunk/lib/AST/CommentSema.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/CommentSema.cpp?rev=176461&r1=176460&r2=176461&view=diff
==============================================================================
--- cfe/trunk/lib/AST/CommentSema.cpp (original)
+++ cfe/trunk/lib/AST/CommentSema.cpp Mon Mar 4 17:06:15 2013
@@ -47,11 +47,13 @@ ParagraphComment *Sema::actOnParagraphCo
return new (Allocator) ParagraphComment(Content);
}
-BlockCommandComment *Sema::actOnBlockCommandStart(SourceLocation LocBegin,
- SourceLocation LocEnd,
- unsigned CommandID,
- bool AtCommand) {
- return new (Allocator) BlockCommandComment(LocBegin, LocEnd, CommandID, AtCommand);
+BlockCommandComment *Sema::actOnBlockCommandStart(
+ SourceLocation LocBegin,
+ SourceLocation LocEnd,
+ unsigned CommandID,
+ CommandMarkerKind CommandMarker) {
+ return new (Allocator) BlockCommandComment(LocBegin, LocEnd, CommandID,
+ CommandMarker);
}
void Sema::actOnBlockCommandArgs(BlockCommandComment *Command,
@@ -68,17 +70,19 @@ void Sema::actOnBlockCommandFinish(Block
checkDeprecatedCommand(Command);
}
-ParamCommandComment *Sema::actOnParamCommandStart(SourceLocation LocBegin,
- SourceLocation LocEnd,
- unsigned CommandID,
- bool AtCommand) {
+ParamCommandComment *Sema::actOnParamCommandStart(
+ SourceLocation LocBegin,
+ SourceLocation LocEnd,
+ unsigned CommandID,
+ CommandMarkerKind CommandMarker) {
ParamCommandComment *Command =
- new (Allocator) ParamCommandComment(LocBegin, LocEnd, CommandID, AtCommand);
+ new (Allocator) ParamCommandComment(LocBegin, LocEnd, CommandID,
+ CommandMarker);
if (!isFunctionDecl())
Diag(Command->getLocation(),
diag::warn_doc_param_not_attached_to_a_function_decl)
- << AtCommand
+ << CommandMarker
<< Command->getCommandNameRange(Traits);
return Command;
@@ -163,17 +167,19 @@ void Sema::actOnParamCommandFinish(Param
checkBlockCommandEmptyParagraph(Command);
}
-TParamCommandComment *Sema::actOnTParamCommandStart(SourceLocation LocBegin,
- SourceLocation LocEnd,
- unsigned CommandID,
- bool AtCommand) {
+TParamCommandComment *Sema::actOnTParamCommandStart(
+ SourceLocation LocBegin,
+ SourceLocation LocEnd,
+ unsigned CommandID,
+ CommandMarkerKind CommandMarker) {
TParamCommandComment *Command =
- new (Allocator) TParamCommandComment(LocBegin, LocEnd, CommandID, AtCommand);
+ new (Allocator) TParamCommandComment(LocBegin, LocEnd, CommandID,
+ CommandMarker);
if (!isTemplateOrSpecialization())
Diag(Command->getLocation(),
diag::warn_doc_tparam_not_attached_to_a_template_decl)
- << AtCommand
+ << CommandMarker
<< Command->getCommandNameRange(Traits);
return Command;
@@ -437,7 +443,7 @@ void Sema::checkBlockCommandEmptyParagra
if (!DiagLoc.isValid())
DiagLoc = Command->getCommandNameRange(Traits).getEnd();
Diag(DiagLoc, diag::warn_doc_block_command_empty_paragraph)
- << Command->getAtCommand()
+ << Command->getCommandMarker()
<< Command->getCommandName(Traits)
<< Command->getSourceRange();
}
@@ -465,7 +471,7 @@ void Sema::checkReturnsCommand(const Blo
}
Diag(Command->getLocation(),
diag::warn_doc_returns_attached_to_a_void_function)
- << Command->getAtCommand()
+ << Command->getCommandMarker()
<< Command->getCommandName(Traits)
<< DiagKind
<< Command->getSourceRange();
@@ -477,7 +483,7 @@ void Sema::checkReturnsCommand(const Blo
Diag(Command->getLocation(),
diag::warn_doc_returns_not_attached_to_a_function_decl)
- << Command->getAtCommand()
+ << Command->getCommandMarker()
<< Command->getCommandName(Traits)
<< Command->getSourceRange();
}
@@ -510,18 +516,18 @@ void Sema::checkBlockCommandDuplicate(co
StringRef CommandName = Command->getCommandName(Traits);
StringRef PrevCommandName = PrevCommand->getCommandName(Traits);
Diag(Command->getLocation(), diag::warn_doc_block_command_duplicate)
- << Command->getAtCommand()
+ << Command->getCommandMarker()
<< CommandName
<< Command->getSourceRange();
if (CommandName == PrevCommandName)
Diag(PrevCommand->getLocation(), diag::note_doc_block_command_previous)
- << PrevCommand->getAtCommand()
+ << PrevCommand->getCommandMarker()
<< PrevCommandName
<< PrevCommand->getSourceRange();
else
Diag(PrevCommand->getLocation(),
diag::note_doc_block_command_previous_alias)
- << PrevCommand->getAtCommand()
+ << PrevCommand->getCommandMarker()
<< PrevCommandName
<< CommandName;
}
Modified: cfe/trunk/unittests/AST/CommentLexer.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/AST/CommentLexer.cpp?rev=176461&r1=176460&r2=176461&view=diff
==============================================================================
--- cfe/trunk/unittests/AST/CommentLexer.cpp (original)
+++ cfe/trunk/unittests/AST/CommentLexer.cpp Mon Mar 4 17:06:15 2013
@@ -301,8 +301,10 @@ TEST_F(CommentLexerTest, DoxygenCommand3
// Doxygen escape sequences.
TEST_F(CommentLexerTest, DoxygenCommand4) {
- const char *Source =
- "/// \\\\ \\@ \\& \\$ \\# \\< \\> \\% \\\" \\. \\::";
+ const char *Sources[] = {
+ "/// \\\\ \\@ \\& \\$ \\# \\< \\> \\% \\\" \\. \\::",
+ "/// @\\ @@ @& @$ @# @< @> @% @\" @. @::"
+ };
const char *Text[] = {
" ",
"\\", " ", "@", " ", "&", " ", "$", " ", "#", " ",
@@ -310,16 +312,18 @@ TEST_F(CommentLexerTest, DoxygenCommand4
"::", ""
};
- std::vector<Token> Toks;
+ for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+ std::vector<Token> Toks;
- lexString(Source, Toks);
+ lexString(Sources[i], Toks);
- ASSERT_EQ(array_lengthof(Text), Toks.size());
+ ASSERT_EQ(array_lengthof(Text), Toks.size());
- for (size_t i = 0, e = Toks.size(); i != e; i++) {
- if(Toks[i].is(tok::text))
- ASSERT_EQ(StringRef(Text[i]), Toks[i].getText())
- << "index " << i;
+ for (size_t j = 0, e = Toks.size(); j != e; j++) {
+ if(Toks[j].is(tok::text))
+ ASSERT_EQ(StringRef(Text[j]), Toks[j].getText())
+ << "index " << i;
+ }
}
}
@@ -404,6 +408,38 @@ TEST_F(CommentLexerTest, DoxygenCommand7
}
TEST_F(CommentLexerTest, DoxygenCommand8) {
+ const char *Source = "/// @em at em @em\t at em\n";
+ std::vector<Token> Toks;
+
+ lexString(Source, Toks);
+
+ ASSERT_EQ(8U, Toks.size());
+
+ ASSERT_EQ(tok::text, Toks[0].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[0].getText());
+
+ ASSERT_EQ(tok::at_command, Toks[1].getKind());
+ ASSERT_EQ(StringRef("em"), getCommandName(Toks[1]));
+
+ ASSERT_EQ(tok::at_command, Toks[2].getKind());
+ ASSERT_EQ(StringRef("em"), getCommandName(Toks[2]));
+
+ ASSERT_EQ(tok::text, Toks[3].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[3].getText());
+
+ ASSERT_EQ(tok::at_command, Toks[4].getKind());
+ ASSERT_EQ(StringRef("em"), getCommandName(Toks[4]));
+
+ ASSERT_EQ(tok::text, Toks[5].getKind());
+ ASSERT_EQ(StringRef("\t"), Toks[5].getText());
+
+ ASSERT_EQ(tok::at_command, Toks[6].getKind());
+ ASSERT_EQ(StringRef("em"), getCommandName(Toks[6]));
+
+ ASSERT_EQ(tok::newline, Toks[7].getKind());
+}
+
+TEST_F(CommentLexerTest, DoxygenCommand9) {
const char *Source = "/// \\aaa\\bbb \\ccc\t\\ddd\n";
std::vector<Token> Toks;
@@ -435,7 +471,7 @@ TEST_F(CommentLexerTest, DoxygenCommand8
ASSERT_EQ(tok::newline, Toks[7].getKind());
}
-TEST_F(CommentLexerTest, DoxygenCommand9) {
+TEST_F(CommentLexerTest, DoxygenCommand10) {
const char *Source = "// \\c\n";
std::vector<Token> Toks;
@@ -453,7 +489,9 @@ TEST_F(CommentLexerTest, DoxygenCommand9
}
TEST_F(CommentLexerTest, RegisterCustomBlockCommand) {
- const char *Source = "/// \\NewBlockCommand Aaa.\n";
+ const char *Source =
+ "/// \\NewBlockCommand Aaa.\n"
+ "/// @NewBlockCommand Aaa.\n";
Traits.registerBlockCommand(StringRef("NewBlockCommand"));
@@ -461,10 +499,10 @@ TEST_F(CommentLexerTest, RegisterCustomB
lexString(Source, Toks);
- ASSERT_EQ(4U, Toks.size());
+ ASSERT_EQ(8U, Toks.size());
- ASSERT_EQ(tok::text, Toks[0].getKind());
- ASSERT_EQ(StringRef(" "), Toks[0].getText());
+ ASSERT_EQ(tok::text, Toks[0].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[0].getText());
ASSERT_EQ(tok::backslash_command, Toks[1].getKind());
ASSERT_EQ(StringRef("NewBlockCommand"), getCommandName(Toks[1]));
@@ -472,7 +510,18 @@ TEST_F(CommentLexerTest, RegisterCustomB
ASSERT_EQ(tok::text, Toks[2].getKind());
ASSERT_EQ(StringRef(" Aaa."), Toks[2].getText());
- ASSERT_EQ(tok::newline, Toks[3].getKind());
+ ASSERT_EQ(tok::newline, Toks[3].getKind());
+
+ ASSERT_EQ(tok::text, Toks[4].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[4].getText());
+
+ ASSERT_EQ(tok::at_command, Toks[5].getKind());
+ ASSERT_EQ(StringRef("NewBlockCommand"), getCommandName(Toks[5]));
+
+ ASSERT_EQ(tok::text, Toks[6].getKind());
+ ASSERT_EQ(StringRef(" Aaa."), Toks[6].getText());
+
+ ASSERT_EQ(tok::newline, Toks[7].getKind());
}
TEST_F(CommentLexerTest, RegisterMultipleBlockCommands) {
More information about the cfe-commits
mailing list