[cfe-commits] r159223 - in /cfe/trunk: include/clang-c/ include/clang/AST/ lib/AST/ test/Index/ tools/c-index-test/ tools/libclang/ unittests/ unittests/AST/
David Blaikie
dblaikie at gmail.com
Tue Jun 26 16:04:30 PDT 2012
I'm not sure if this is showing up on buildbots, but I'm seeing a
failure presumably caused by this change:
Exit Code: 1
Command Output (stderr):
--
src/tools/clang/test/Index/annotate-comments.cpp:262:11: error:
expected string not found in input
// CHECK: annotate-comments.cpp:185:6: FunctionDecl=isdoxy44:{{.*}}
BriefComment=[ IS_DOXYGEN_START Aaa bbb\n ccc.\n]
^
<stdin>:60:263: note: scanning from here
// CHECK: annotate-comments.cpp:176:6: FunctionDecl=isdoxy43:176:6
RawComment=[/// IS_DOXYGEN_START\n/// It is fine to have a command at
the end of comment.\n///\brief\n///\n/// Some malformed command.\n/*
\*/\n/**\n * \brief Aaa aaaaaaa aaaa.\n * IS_DOXYGEN_END\n */]
RawCommentRange=[166:1 - 175:4] Extent=[176:1 - 176:20]
^
<stdin>:61:11: note: possible intended match here
// CHECK: annotate-comments.cpp:185:6: FunctionDecl=isdoxy44:185:6
RawComment=[/// IS_DOXYGEN_START Aaa bbb\n/// ccc.\n///\n/// Ddd
eee.\n/// Fff.\n///\n/// Ggg. IS_DOXYGEN_END] RawCommentRange=[178:1 -
184:24] Extent=[185:1 - 185:20]
^
--
Let me know if you need more information to investigate/reproduce this.
- David
On Tue, Jun 26, 2012 at 1:39 PM, Dmitri Gribenko <gribozavr at gmail.com> wrote:
> Author: gribozavr
> Date: Tue Jun 26 15:39:18 2012
> New Revision: 159223
>
> URL: http://llvm.org/viewvc/llvm-project?rev=159223&view=rev
> Log:
> Implement a lexer for structured comments.
>
> Added:
> cfe/trunk/include/clang/AST/CommentBriefParser.h
> cfe/trunk/include/clang/AST/CommentLexer.h
> cfe/trunk/lib/AST/CommentBriefParser.cpp
> cfe/trunk/lib/AST/CommentLexer.cpp
> cfe/trunk/unittests/AST/
> cfe/trunk/unittests/AST/CMakeLists.txt
> cfe/trunk/unittests/AST/CommentLexer.cpp
> cfe/trunk/unittests/AST/Makefile
> Modified:
> cfe/trunk/include/clang-c/Index.h
> cfe/trunk/include/clang/AST/RawCommentList.h
> cfe/trunk/lib/AST/ASTContext.cpp
> cfe/trunk/lib/AST/CMakeLists.txt
> cfe/trunk/lib/AST/RawCommentList.cpp
> cfe/trunk/test/Index/annotate-comments.cpp
> cfe/trunk/tools/c-index-test/c-index-test.c
> cfe/trunk/tools/libclang/CIndex.cpp
> cfe/trunk/tools/libclang/libclang.exports
> cfe/trunk/unittests/Makefile
>
> Modified: cfe/trunk/include/clang-c/Index.h
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang-c/Index.h?rev=159223&r1=159222&r2=159223&view=diff
> ==============================================================================
> --- cfe/trunk/include/clang-c/Index.h (original)
> +++ cfe/trunk/include/clang-c/Index.h Tue Jun 26 15:39:18 2012
> @@ -3201,6 +3201,12 @@
> CINDEX_LINKAGE CXString clang_Cursor_getRawCommentText(CXCursor C);
>
> /**
> + * \brief Given a cursor that represents a declaration, return the associated
> + * \\brief paragraph; otherwise return the first paragraph.
> + */
> +CINDEX_LINKAGE CXString clang_Cursor_getBriefCommentText(CXCursor C);
> +
> +/**
> * @}
> */
>
>
> Added: cfe/trunk/include/clang/AST/CommentBriefParser.h
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/CommentBriefParser.h?rev=159223&view=auto
> ==============================================================================
> --- cfe/trunk/include/clang/AST/CommentBriefParser.h (added)
> +++ cfe/trunk/include/clang/AST/CommentBriefParser.h Tue Jun 26 15:39:18 2012
> @@ -0,0 +1,49 @@
> +//===--- CommentBriefParser.h - Dumb comment parser -------------*- C++ -*-===//
> +//
> +// The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file defines a very simple Doxygen comment parser.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +
> +#ifndef LLVM_CLANG_AST_BRIEF_COMMENT_PARSER_H
> +#define LLVM_CLANG_AST_BRIEF_COMMENT_PARSER_H
> +
> +#include "clang/AST/CommentLexer.h"
> +
> +namespace clang {
> +namespace comments {
> +
> +/// A very simple comment parser that extracts just the brief description or
> +/// first paragraph.
> +class BriefParser {
> + Lexer &L;
> +
> + /// Current lookahead token.
> + Token Tok;
> +
> + SourceLocation ConsumeToken() {
> + SourceLocation Loc = Tok.getLocation();
> + L.lex(Tok);
> + return Loc;
> + }
> +
> +public:
> + BriefParser(Lexer &L);
> +
> + /// Return \\brief paragraph, if it exists; otherwise return the first
> + /// paragraph.
> + std::string Parse();
> +};
> +
> +} // end namespace comments
> +} // end namespace clang
> +
> +#endif
> +
>
> Added: cfe/trunk/include/clang/AST/CommentLexer.h
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/CommentLexer.h?rev=159223&view=auto
> ==============================================================================
> --- cfe/trunk/include/clang/AST/CommentLexer.h (added)
> +++ cfe/trunk/include/clang/AST/CommentLexer.h Tue Jun 26 15:39:18 2012
> @@ -0,0 +1,352 @@
> +//===--- CommentLexer.h - Lexer for structured comments ---------*- C++ -*-===//
> +//
> +// The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file defines lexer for structured comments and supporting token class.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef LLVM_CLANG_AST_COMMENT_LEXER_H
> +#define LLVM_CLANG_AST_COMMENT_LEXER_H
> +
> +#include "clang/Basic/SourceManager.h"
> +#include "llvm/ADT/StringRef.h"
> +#include "llvm/ADT/SmallString.h"
> +#include "llvm/ADT/SmallVector.h"
> +#include "llvm/Support/raw_ostream.h"
> +
> +namespace clang {
> +namespace comments {
> +
> +class Lexer;
> +
> +namespace tok {
> +enum TokenKind {
> + eof,
> + newline,
> + text,
> + command,
> + verbatim_block_begin,
> + verbatim_block_line,
> + verbatim_block_end,
> + verbatim_line,
> + html_tag_open, // <tag
> + html_ident, // attr
> + html_equals, // =
> + html_quoted_string, // "blah\"blah" or 'blah\'blah'
> + html_greater, // >
> + html_tag_close, // </tag>
> +
> + // Markdown tokens (not supported yet).
> + ruler,
> + md_code_line, // Line indented at least by 4 spaces.
> + md_code_inline, // `code`
> + md_emph, // _text_ or *text*
> + md_strong, // __text__ or *text*
> + md_header // ### level 3 header ###
> +};
> +} // end namespace tok
> +
> +class CommentOptions {
> +public:
> + bool Markdown;
> +};
> +
> +/// \brief Comment token.
> +class Token {
> + friend class Lexer;
> +
> + /// The location of the token.
> + SourceLocation Loc;
> +
> + /// The actual kind of the token.
> + tok::TokenKind Kind;
> +
> + /// Length of the token spelling in comment. Can be 0 for synthenized
> + /// tokens.
> + unsigned Length;
> +
> + /// Contains text value associated with a token.
> + const char *TextPtr1;
> + unsigned TextLen1;
> +
> + /// Contains text value associated with a token.
> + const char *TextPtr2;
> + unsigned TextLen2;
> +
> +public:
> + SourceLocation getLocation() const LLVM_READONLY { return Loc; }
> + void setLocation(SourceLocation SL) { Loc = SL; }
> +
> + tok::TokenKind getKind() const LLVM_READONLY { return Kind; }
> + void setKind(tok::TokenKind K) { Kind = K; }
> +
> + bool is(tok::TokenKind K) const LLVM_READONLY { return Kind == K; }
> + bool isNot(tok::TokenKind K) const LLVM_READONLY { return Kind != K; }
> +
> + unsigned getLength() const LLVM_READONLY { return Length; }
> + void setLength(unsigned L) { Length = L; }
> +
> + StringRef getText() const LLVM_READONLY {
> + assert(is(tok::text));
> + return StringRef(TextPtr1, TextLen1);
> + }
> +
> + void setText(StringRef Text) {
> + assert(is(tok::text));
> + TextPtr1 = Text.data();
> + TextLen1 = Text.size();
> + }
> +
> + StringRef getCommandName() const LLVM_READONLY {
> + assert(is(tok::command));
> + return StringRef(TextPtr1, TextLen1);
> + }
> +
> + void setCommandName(StringRef Name) {
> + assert(is(tok::command));
> + TextPtr1 = Name.data();
> + TextLen1 = Name.size();
> + }
> +
> + StringRef getVerbatimBlockName() const LLVM_READONLY {
> + assert(is(tok::verbatim_block_begin) || is(tok::verbatim_block_end));
> + return StringRef(TextPtr1, TextLen1);
> + }
> +
> + void setVerbatimBlockName(StringRef Name) {
> + assert(is(tok::verbatim_block_begin) || is(tok::verbatim_block_end));
> + TextPtr1 = Name.data();
> + TextLen1 = Name.size();
> + }
> +
> + StringRef getVerbatimBlockText() const LLVM_READONLY {
> + assert(is(tok::verbatim_block_line));
> + return StringRef(TextPtr1, TextLen1);
> + }
> +
> + void setVerbatimBlockText(StringRef Text) {
> + assert(is(tok::verbatim_block_line));
> + TextPtr1 = Text.data();
> + TextLen1 = Text.size();
> + }
> +
> + /// Returns the name of verbatim line command.
> + StringRef getVerbatimLineName() const LLVM_READONLY {
> + assert(is(tok::verbatim_line));
> + return StringRef(TextPtr1, TextLen1);
> + }
> +
> + void setVerbatimLineName(StringRef Name) {
> + assert(is(tok::verbatim_line));
> + TextPtr1 = Name.data();
> + TextLen1 = Name.size();
> + }
> +
> + StringRef getVerbatimLineText() const LLVM_READONLY {
> + assert(is(tok::verbatim_line));
> + return StringRef(TextPtr2, TextLen2);
> + }
> +
> + void setVerbatimLineText(StringRef Text) {
> + assert(is(tok::verbatim_line));
> + TextPtr2 = Text.data();
> + TextLen2 = Text.size();
> + }
> +
> + StringRef getHTMLTagOpenName() const LLVM_READONLY {
> + assert(is(tok::html_tag_open));
> + return StringRef(TextPtr1, TextLen1);
> + }
> +
> + void setHTMLTagOpenName(StringRef Name) {
> + assert(is(tok::html_tag_open));
> + TextPtr1 = Name.data();
> + TextLen1 = Name.size();
> + }
> +
> + StringRef getHTMLIdent() const LLVM_READONLY {
> + assert(is(tok::html_ident));
> + return StringRef(TextPtr1, TextLen1);
> + }
> +
> + void setHTMLIdent(StringRef Name) {
> + assert(is(tok::html_ident));
> + TextPtr1 = Name.data();
> + TextLen1 = Name.size();
> + }
> +
> + StringRef getHTMLQuotedString() const LLVM_READONLY {
> + assert(is(tok::html_quoted_string));
> + return StringRef(TextPtr1, TextLen1);
> + }
> +
> + void setHTMLQuotedString(StringRef Str) {
> + assert(is(tok::html_quoted_string));
> + TextPtr1 = Str.data();
> + TextLen1 = Str.size();
> + }
> +
> + StringRef getHTMLTagCloseName() const LLVM_READONLY {
> + assert(is(tok::html_tag_close));
> + return StringRef(TextPtr1, TextLen1);
> + }
> +
> + void setHTMLTagCloseName(StringRef Name) {
> + assert(is(tok::html_tag_close));
> + TextPtr1 = Name.data();
> + TextLen1 = Name.size();
> + }
> +
> + void dump(const Lexer &L, const SourceManager &SM) const;
> +};
> +
> +/// \brief Comment lexer.
> +class Lexer {
> +private:
> + Lexer(const Lexer&); // DO NOT IMPLEMENT
> + void operator=(const Lexer&); // DO NOT IMPLEMENT
> +
> + const char *const BufferStart;
> + const char *const BufferEnd;
> + SourceLocation FileLoc;
> + CommentOptions CommOpts;
> +
> + const char *BufferPtr;
> +
> + /// One past end pointer for the current comment. For BCPL comments points
> + /// to newline or BufferEnd, for C comments points to star in '*/'.
> + const char *CommentEnd;
> +
> + enum LexerCommentState {
> + LCS_BeforeComment,
> + LCS_InsideBCPLComment,
> + LCS_InsideCComment,
> + LCS_BetweenComments
> + };
> +
> + /// Low-level lexer state, track if we are inside or outside of comment.
> + LexerCommentState CommentState;
> +
> + enum LexerState {
> + /// Lexing normal comment text
> + LS_Normal,
> +
> + /// Finished lexing verbatim block beginning command, will lex first body
> + /// line.
> + LS_VerbatimBlockFirstLine,
> +
> + /// Lexing verbatim block body line-by-line, skipping line-starting
> + /// decorations.
> + LS_VerbatimBlockBody,
> +
> + /// Finished lexing \verbatim <TAG \endverbatim part, lexing tag attributes.
> + LS_HTMLOpenTag
> + };
> +
> + /// Current lexing mode.
> + LexerState State;
> +
> + /// A verbatim-like block command eats every character (except line starting
> + /// decorations) until matching end command is seen or comment end is hit.
> + struct VerbatimBlockCommand {
> + StringRef BeginName;
> + StringRef EndName;
> + };
> +
> + typedef SmallVector<VerbatimBlockCommand, 4> VerbatimBlockCommandVector;
> +
> + /// Registered verbatim-like block commands.
> + VerbatimBlockCommandVector VerbatimBlockCommands;
> +
> + /// If State is LS_VerbatimBlock, contains the the name of verbatim end
> + /// command, including command marker.
> + SmallString<16> VerbatimBlockEndCommandName;
> +
> + bool isVerbatimBlockCommand(StringRef BeginName, StringRef &EndName) const;
> +
> + /// A verbatim-like line command eats everything until a newline is seen or
> + /// comment end is hit.
> + struct VerbatimLineCommand {
> + StringRef Name;
> + };
> +
> + typedef SmallVector<VerbatimLineCommand, 4> VerbatimLineCommandVector;
> +
> + /// Registered verbatim-like line commands.
> + VerbatimLineCommandVector VerbatimLineCommands;
> +
> + bool isVerbatimLineCommand(StringRef Name) const;
> +
> + void formTokenWithChars(Token &Result, const char *TokEnd,
> + tok::TokenKind Kind) {
> + const unsigned TokLen = TokEnd - BufferPtr;
> + Result.setLocation(getSourceLocation(BufferPtr));
> + Result.setKind(Kind);
> + Result.setLength(TokLen);
> +#ifndef NDEBUG
> + Result.TextPtr1 = "<UNSET>";
> + Result.TextLen1 = 7;
> + Result.TextPtr2 = "<UNSET>";
> + Result.TextLen2 = 7;
> +#endif
> + BufferPtr = TokEnd;
> + }
> +
> + SourceLocation getSourceLocation(const char *Loc) const {
> + assert(Loc >= BufferStart && Loc <= BufferEnd &&
> + "Location out of range for this buffer!");
> +
> + const unsigned CharNo = Loc - BufferStart;
> + return FileLoc.getLocWithOffset(CharNo);
> + }
> +
> + /// Eat string matching regexp \code \s*\* \endcode.
> + void skipLineStartingDecorations();
> +
> + /// Lex stuff inside comments. CommentEnd should be set correctly.
> + void lexCommentText(Token &T);
> +
> + void setupAndLexVerbatimBlock(Token &T,
> + const char *TextBegin,
> + char Marker, StringRef EndName);
> +
> + void lexVerbatimBlockFirstLine(Token &T);
> +
> + void lexVerbatimBlockBody(Token &T);
> +
> + void lexVerbatimLine(Token &T, const char *TextBegin);
> +
> + void setupAndLexHTMLOpenTag(Token &T);
> +
> + void lexHTMLOpenTag(Token &T);
> +
> + void lexHTMLCloseTag(Token &T);
> +
> +public:
> + Lexer(SourceLocation FileLoc, const CommentOptions &CommOpts,
> + const char *BufferStart, const char *BufferEnd);
> +
> + void lex(Token &T);
> +
> + StringRef getSpelling(const Token &Tok,
> + const SourceManager &SourceMgr,
> + bool *Invalid = NULL) const;
> +
> + /// \brief Register a new verbatim block command.
> + void addVerbatimBlockCommand(StringRef BeginName, StringRef EndName);
> +
> + /// \brief Register a new verbatim line command.
> + void addVerbatimLineCommand(StringRef Name);
> +};
> +
> +} // end namespace comments
> +} // end namespace clang
> +
> +#endif
> +
>
> Modified: cfe/trunk/include/clang/AST/RawCommentList.h
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/RawCommentList.h?rev=159223&r1=159222&r2=159223&view=diff
> ==============================================================================
> --- cfe/trunk/include/clang/AST/RawCommentList.h (original)
> +++ cfe/trunk/include/clang/AST/RawCommentList.h Tue Jun 26 15:39:18 2012
> @@ -15,6 +15,7 @@
>
> namespace clang {
>
> +class ASTContext;
> class ASTReader;
>
> class RawComment {
> @@ -27,7 +28,7 @@
> CK_BCPLExcl, ///< \code //! stuff \endcode
> CK_JavaDoc, ///< \code /** stuff */ \endcode
> CK_Qt, ///< \code /*! stuff */ \endcode, also used by HeaderDoc
> - CK_Merged ///< Two or more Doxygen comments merged together
> + CK_Merged ///< Two or more documentation comments merged together
> };
>
> RawComment() : Kind(CK_Invalid), IsAlmostTrailingComment(false) { }
> @@ -53,7 +54,7 @@
> /// \code /**< stuff */ \endcode
> /// \code /*!< stuff */ \endcode
> bool isTrailingComment() const LLVM_READONLY {
> - assert(isDoxygen());
> + assert(isDocumentation());
> return IsTrailingComment;
> }
>
> @@ -64,13 +65,13 @@
> return IsAlmostTrailingComment;
> }
>
> - /// Returns true if this comment is not a Doxygen comment.
> + /// Returns true if this comment is not a documentation comment.
> bool isOrdinary() const LLVM_READONLY {
> return (Kind == CK_OrdinaryBCPL) || (Kind == CK_OrdinaryC);
> }
>
> - /// Returns true if this comment any kind of a Doxygen comment.
> - bool isDoxygen() const LLVM_READONLY {
> + /// Returns true if this comment any kind of a documentation comment.
> + bool isDocumentation() const LLVM_READONLY {
> return !isInvalid() && !isOrdinary();
> }
>
> @@ -91,11 +92,21 @@
> unsigned getBeginLine(const SourceManager &SM) const;
> unsigned getEndLine(const SourceManager &SM) const;
>
> + StringRef getBriefText(const ASTContext &Context) const {
> + if (BriefTextValid)
> + return BriefText;
> +
> + return extractBriefText(Context);
> + }
> +
> private:
> SourceRange Range;
>
> mutable StringRef RawText;
> - mutable bool RawTextValid : 1; ///< True if RawText is valid
> + mutable StringRef BriefText;
> +
> + mutable bool RawTextValid : 1; ///< True if RawText is valid
> + mutable bool BriefTextValid : 1; ///< True if BriefText is valid
>
> unsigned Kind : 3;
>
> @@ -118,6 +129,8 @@
>
> StringRef getRawTextSlow(const SourceManager &SourceMgr) const;
>
> + StringRef extractBriefText(const ASTContext &Context) const;
> +
> friend class ASTReader;
> };
>
>
> Modified: cfe/trunk/lib/AST/ASTContext.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/ASTContext.cpp?rev=159223&r1=159222&r2=159223&view=diff
> ==============================================================================
> --- cfe/trunk/lib/AST/ASTContext.cpp (original)
> +++ cfe/trunk/lib/AST/ASTContext.cpp Tue Jun 26 15:39:18 2012
> @@ -90,7 +90,7 @@
>
> // First check whether we have a trailing comment.
> if (Comment != RawComments.end() &&
> - Comment->isDoxygen() && Comment->isTrailingComment() &&
> + Comment->isDocumentation() && Comment->isTrailingComment() &&
> !isa<TagDecl>(D) && !isa<NamespaceDecl>(D)) {
> std::pair<FileID, unsigned> CommentBeginDecomp
> = SourceMgr.getDecomposedLoc(Comment->getSourceRange().getBegin());
> @@ -111,7 +111,7 @@
> --Comment;
>
> // Check that we actually have a non-member Doxygen comment.
> - if (!Comment->isDoxygen() || Comment->isTrailingComment())
> + if (!Comment->isDocumentation() || Comment->isTrailingComment())
> return NULL;
>
> // Decompose the end of the comment.
>
> Modified: cfe/trunk/lib/AST/CMakeLists.txt
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/CMakeLists.txt?rev=159223&r1=159222&r2=159223&view=diff
> ==============================================================================
> --- cfe/trunk/lib/AST/CMakeLists.txt (original)
> +++ cfe/trunk/lib/AST/CMakeLists.txt Tue Jun 26 15:39:18 2012
> @@ -8,6 +8,8 @@
> ASTImporter.cpp
> AttrImpl.cpp
> CXXInheritance.cpp
> + CommentBriefParser.cpp
> + CommentLexer.cpp
> Decl.cpp
> DeclarationName.cpp
> DeclBase.cpp
>
> Added: cfe/trunk/lib/AST/CommentBriefParser.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/CommentBriefParser.cpp?rev=159223&view=auto
> ==============================================================================
> --- cfe/trunk/lib/AST/CommentBriefParser.cpp (added)
> +++ cfe/trunk/lib/AST/CommentBriefParser.cpp Tue Jun 26 15:39:18 2012
> @@ -0,0 +1,76 @@
> +//===--- CommentBriefParser.cpp - Dumb comment parser ---------------------===//
> +//
> +// The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "clang/AST/CommentBriefParser.h"
> +
> +namespace clang {
> +namespace comments {
> +
> +std::string BriefParser::Parse() {
> + std::string FirstParagraph;
> + std::string Brief;
> + bool InFirstParagraph = true;
> + bool InBrief = false;
> + bool BriefDone = false;
> +
> + while (Tok.isNot(tok::eof)) {
> + if (Tok.is(tok::text)) {
> + if (InFirstParagraph)
> + FirstParagraph += Tok.getText();
> + if (InBrief)
> + Brief += Tok.getText();
> + ConsumeToken();
> + continue;
> + }
> +
> + if (!BriefDone && Tok.is(tok::command) && Tok.getCommandName() == "brief") {
> + InBrief = true;
> + ConsumeToken();
> + continue;
> + }
> +
> + if (Tok.is(tok::newline)) {
> + if (InFirstParagraph)
> + FirstParagraph += '\n';
> + if (InBrief)
> + Brief += '\n';
> + ConsumeToken();
> +
> + if (Tok.is(tok::newline)) {
> + ConsumeToken();
> + // We found a paragraph end.
> + InFirstParagraph = false;
> + if (InBrief) {
> + InBrief = false;
> + BriefDone = true;
> + }
> + }
> + continue;
> + }
> +
> + // We didn't handle this token, so just drop it.
> + ConsumeToken();
> + }
> +
> + if (Brief.size() > 0)
> + return Brief;
> +
> + return FirstParagraph;
> +}
> +
> +BriefParser::BriefParser(Lexer &L) : L(L)
> +{
> + // Get lookahead token.
> + ConsumeToken();
> +}
> +
> +} // end namespace comments
> +} // end namespace clang
> +
> +
>
> Added: cfe/trunk/lib/AST/CommentLexer.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/CommentLexer.cpp?rev=159223&view=auto
> ==============================================================================
> --- cfe/trunk/lib/AST/CommentLexer.cpp (added)
> +++ cfe/trunk/lib/AST/CommentLexer.cpp Tue Jun 26 15:39:18 2012
> @@ -0,0 +1,676 @@
> +#include "clang/AST/CommentLexer.h"
> +#include "llvm/ADT/StringSwitch.h"
> +#include "llvm/Support/ErrorHandling.h"
> +
> +namespace clang {
> +namespace comments {
> +
> +void Token::dump(const Lexer &L, const SourceManager &SM) const {
> + llvm::errs() << "comments::Token Kind=" << Kind << " ";
> + Loc.dump(SM);
> + llvm::errs() << " " << Length << " \"" << L.getSpelling(*this, SM) << "\"\n";
> +}
> +
> +bool Lexer::isVerbatimBlockCommand(StringRef BeginName,
> + StringRef &EndName) const {
> + const char *Result = llvm::StringSwitch<const char *>(BeginName)
> + .Case("code", "endcode")
> + .Case("verbatim", "endverbatim")
> + .Case("htmlonly", "endhtmlonly")
> + .Case("latexonly", "endlatexonly")
> + .Case("xmlonly", "endxmlonly")
> + .Case("manonly", "endmanonly")
> + .Case("rtfonly", "endrtfonly")
> +
> + .Case("dot", "enddot")
> + .Case("msc", "endmsc")
> +
> + .Case("f$", "f$") // Inline LaTeX formula
> + .Case("f[", "f]") // Displayed LaTeX formula
> + .Case("f{", "f}") // LaTeX environment
> +
> + .Default(NULL);
> +
> + if (Result) {
> + EndName = Result;
> + return true;
> + }
> +
> + for (VerbatimBlockCommandVector::const_iterator
> + I = VerbatimBlockCommands.begin(),
> + E = VerbatimBlockCommands.end();
> + I != E; ++I)
> + if (I->BeginName == BeginName) {
> + EndName = I->EndName;
> + return true;
> + }
> +
> + return false;
> +}
> +
> +bool Lexer::isVerbatimLineCommand(StringRef Name) const {
> + bool Result = llvm::StringSwitch<bool>(Name)
> + .Case("fn", true)
> + .Case("var", true)
> + .Case("property", true)
> + .Case("typedef", true)
> +
> + .Case("overload", true)
> +
> + .Case("defgroup", true)
> + .Case("ingroup", true)
> + .Case("addtogroup", true)
> + .Case("weakgroup", true)
> + .Case("name", true)
> +
> + .Case("section", true)
> + .Case("subsection", true)
> + .Case("subsubsection", true)
> + .Case("paragraph", true)
> +
> + .Case("mainpage", true)
> + .Case("subpage", true)
> + .Case("ref", true)
> +
> + .Default(false);
> +
> + if (Result)
> + return true;
> +
> + for (VerbatimLineCommandVector::const_iterator
> + I = VerbatimLineCommands.begin(),
> + E = VerbatimLineCommands.end();
> + I != E; ++I)
> + if (I->Name == Name)
> + return true;
> +
> + return false;
> +}
> +
> +void Lexer::skipLineStartingDecorations() {
> + // This function should be called only for C comments
> + assert(CommentState == LCS_InsideCComment);
> +
> + if (BufferPtr == CommentEnd)
> + return;
> +
> + switch (*BufferPtr) {
> + case ' ':
> + case '\t':
> + case '\f':
> + case '\v': {
> + const char *NewBufferPtr = BufferPtr;
> + NewBufferPtr++;
> + if (NewBufferPtr == CommentEnd)
> + return;
> +
> + char C = *NewBufferPtr;
> + while (C == ' ' || C == '\t' || C == '\f' || C == '\v') {
> + NewBufferPtr++;
> + if (NewBufferPtr == CommentEnd)
> + return;
> + C = *NewBufferPtr;
> + }
> + if (C == '*')
> + BufferPtr = NewBufferPtr + 1;
> + break;
> + }
> + case '*':
> + BufferPtr++;
> + break;
> + }
> +}
> +
> +namespace {
> +const char *findNewline(const char *BufferPtr, const char *BufferEnd) {
> + for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
> + const char C = *BufferPtr;
> + if (C == '\n' || C == '\r')
> + return BufferPtr;
> + }
> + return BufferEnd;
> +}
> +
> +const char *skipNewline(const char *BufferPtr, const char *BufferEnd) {
> + if (BufferPtr == BufferEnd)
> + return BufferPtr;
> +
> + if (*BufferPtr == '\n')
> + BufferPtr++;
> + else {
> + assert(*BufferPtr == '\r');
> + BufferPtr++;
> + if (BufferPtr != BufferEnd && *BufferPtr == '\n')
> + BufferPtr++;
> + }
> + return BufferPtr;
> +}
> +
> +bool isHTMLIdentifierCharacter(char C) {
> + return (C >= 'a' && C <= 'z') ||
> + (C >= 'A' && C <= 'Z') ||
> + (C >= '0' && C <= '9');
> +}
> +
> +const char *skipHTMLIdentifier(const char *BufferPtr, const char *BufferEnd) {
> + for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
> + if (!isHTMLIdentifierCharacter(*BufferPtr))
> + return BufferPtr;
> + }
> + return BufferEnd;
> +}
> +
> +/// Skip HTML string quoted in single or double quotes. Escaping quotes inside
> +/// string allowed.
> +///
> +/// Returns pointer to closing quote.
> +const char *skipHTMLQuotedString(const char *BufferPtr, const char *BufferEnd)
> +{
> + const char Quote = *BufferPtr;
> + assert(Quote == '\"' || Quote == '\'');
> +
> + BufferPtr++;
> + for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
> + const char C = *BufferPtr;
> + if (C == Quote && BufferPtr[-1] != '\\')
> + return BufferPtr;
> + }
> + return BufferEnd;
> +}
> +
> +bool isHorizontalWhitespace(char C) {
> + return C == ' ' || C == '\t' || C == '\f' || C == '\v';
> +}
> +
> +bool isWhitespace(char C) {
> + return C == ' ' || C == '\n' || C == '\r' ||
> + C == '\t' || C == '\f' || C == '\v';
> +}
> +
> +const char *skipWhitespace(const char *BufferPtr, const char *BufferEnd) {
> + for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
> + if (!isWhitespace(*BufferPtr))
> + return BufferPtr;
> + }
> + return BufferEnd;
> +}
> +
> +bool isCommandNameCharacter(char C) {
> + return (C >= 'a' && C <= 'z') ||
> + (C >= 'A' && C <= 'Z') ||
> + (C >= '0' && C <= '9');
> +}
> +
> +const char *skipCommandName(const char *BufferPtr, const char *BufferEnd) {
> + for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
> + if (!isCommandNameCharacter(*BufferPtr))
> + return BufferPtr;
> + }
> + return BufferEnd;
> +}
> +
> +/// Return the one past end pointer for BCPL comments.
> +/// Handles newlines escaped with backslash or trigraph for backslahs.
> +const char *findBCPLCommentEnd(const char *BufferPtr, const char *BufferEnd) {
> + const char *CurPtr = BufferPtr;
> + while (CurPtr != BufferEnd) {
> + char C = *CurPtr;
> + while (C != '\n' && C != '\r') {
> + CurPtr++;
> + if (CurPtr == BufferEnd)
> + return BufferEnd;
> + C = *CurPtr;
> + }
> + // We found a newline, check if it is escaped.
> + const char *EscapePtr = CurPtr - 1;
> + while(isHorizontalWhitespace(*EscapePtr))
> + EscapePtr--;
> +
> + if (*EscapePtr == '\\' ||
> + (EscapePtr - 2 >= BufferPtr && EscapePtr[0] == '/' &&
> + EscapePtr[-1] == '?' && EscapePtr[-2] == '?')) {
> + // We found an escaped newline.
> + CurPtr = skipNewline(CurPtr, BufferEnd);
> + } else
> + return CurPtr; // Not an escaped newline.
> + }
> + return BufferEnd;
> +}
> +
> +/// Return the one past end pointer for C comments.
> +/// Very dumb, does not handle escaped newlines or trigraphs.
> +const char *findCCommentEnd(const char *BufferPtr, const char *BufferEnd) {
> + for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
> + if (*BufferPtr == '*') {
> + assert(BufferPtr + 1 != BufferEnd);
> + if (*(BufferPtr + 1) == '/')
> + return BufferPtr;
> + }
> + }
> + llvm_unreachable("buffer end hit before '*/' was seen");
> +}
> +} // unnamed namespace
> +
> +void Lexer::lexCommentText(Token &T) {
> + assert(CommentState == LCS_InsideBCPLComment ||
> + CommentState == LCS_InsideCComment);
> +
> + switch (State) {
> + case LS_Normal:
> + break;
> + case LS_VerbatimBlockFirstLine:
> + lexVerbatimBlockFirstLine(T);
> + return;
> + case LS_VerbatimBlockBody:
> + lexVerbatimBlockBody(T);
> + return;
> + case LS_HTMLOpenTag:
> + lexHTMLOpenTag(T);
> + return;
> + }
> +
> + assert(State == LS_Normal);
> +
> + const char *TokenPtr = BufferPtr;
> + assert(TokenPtr < CommentEnd);
> + while (TokenPtr != CommentEnd) {
> + switch(*TokenPtr) {
> + case '\\':
> + case '@': {
> + TokenPtr++;
> + if (TokenPtr == CommentEnd) {
> + formTokenWithChars(T, TokenPtr, tok::text);
> + T.setText(StringRef(BufferPtr - T.getLength(), T.getLength()));
> + return;
> + }
> + char C = *TokenPtr;
> + switch (C) {
> + default:
> + break;
> +
> + case '\\': case '@': case '&': case '$':
> + case '#': case '<': case '>': case '%':
> + case '\"': case '.': case ':':
> + // This is one of \\ \@ \& \$ etc escape sequences.
> + TokenPtr++;
> + if (C == ':' && TokenPtr != CommentEnd && *TokenPtr == ':') {
> + // This is the \:: escape sequence.
> + TokenPtr++;
> + }
> + formTokenWithChars(T, TokenPtr, tok::text);
> + T.setText(StringRef(BufferPtr - (T.getLength() - 1),
> + T.getLength() - 1));
> + return;
> + }
> +
> + // Don't make zero-length commands.
> + if (!isCommandNameCharacter(*TokenPtr)) {
> + formTokenWithChars(T, TokenPtr, tok::text);
> + T.setText(StringRef(BufferPtr - T.getLength(), T.getLength()));
> + return;
> + }
> +
> + TokenPtr = skipCommandName(TokenPtr, CommentEnd);
> + unsigned Length = TokenPtr - (BufferPtr + 1);
> +
> + // Hardcoded support for lexing LaTeX formula commands
> + // \f$ \f[ \f] \f{ \f} as a single command.
> + if (Length == 1 && TokenPtr[-1] == 'f' && TokenPtr != CommentEnd) {
> + C = *TokenPtr;
> + if (C == '$' || C == '[' || C == ']' || C == '{' || C == '}') {
> + TokenPtr++;
> + Length++;
> + }
> + }
> +
> + const StringRef CommandName(BufferPtr + 1, Length);
> + StringRef EndName;
> +
> + if (isVerbatimBlockCommand(CommandName, EndName)) {
> + setupAndLexVerbatimBlock(T, TokenPtr, *BufferPtr, EndName);
> + return;
> + }
> + if (isVerbatimLineCommand(CommandName)) {
> + lexVerbatimLine(T, TokenPtr);
> + return;
> + }
> + formTokenWithChars(T, TokenPtr, tok::command);
> + T.setCommandName(CommandName);
> + return;
> + }
> +
> + case '<': {
> + TokenPtr++;
> + if (TokenPtr == CommentEnd) {
> + formTokenWithChars(T, TokenPtr, tok::text);
> + T.setText(StringRef(BufferPtr - T.getLength(), T.getLength()));
> + return;
> + }
> + const char C = *TokenPtr;
> + if (isHTMLIdentifierCharacter(C))
> + setupAndLexHTMLOpenTag(T);
> + else if (C == '/')
> + lexHTMLCloseTag(T);
> + return;
> + }
> +
> + case '\n':
> + case '\r':
> + TokenPtr = skipNewline(TokenPtr, CommentEnd);
> + formTokenWithChars(T, TokenPtr, tok::newline);
> +
> + if (CommentState == LCS_InsideCComment)
> + skipLineStartingDecorations();
> + return;
> +
> + default: {
> + while (true) {
> + TokenPtr++;
> + if (TokenPtr == CommentEnd)
> + break;
> + char C = *TokenPtr;
> + if(C == '\n' || C == '\r' ||
> + C == '\\' || C == '@' || C == '<')
> + break;
> + }
> + formTokenWithChars(T, TokenPtr, tok::text);
> + T.setText(StringRef(BufferPtr - T.getLength(), T.getLength()));
> + return;
> + }
> + }
> + }
> +}
> +
> +void Lexer::setupAndLexVerbatimBlock(Token &T,
> + const char *TextBegin,
> + char Marker, StringRef EndName) {
> + VerbatimBlockEndCommandName.clear();
> + VerbatimBlockEndCommandName.append(Marker == '\\' ? "\\" : "@");
> + VerbatimBlockEndCommandName.append(EndName);
> +
> + formTokenWithChars(T, TextBegin, tok::verbatim_block_begin);
> + T.setVerbatimBlockName(StringRef(TextBegin - (T.getLength() - 1),
> + T.getLength() - 1));
> +
> + State = LS_VerbatimBlockFirstLine;
> +}
> +
> +void Lexer::lexVerbatimBlockFirstLine(Token &T) {
> + assert(BufferPtr < CommentEnd);
> +
> + // FIXME: It would be better to scan the text once, finding either the block
> + // end command or newline.
> + //
> + // Extract current line.
> + const char *Newline = findNewline(BufferPtr, CommentEnd);
> + StringRef Line(BufferPtr, Newline - BufferPtr);
> +
> + // Look for end command in current line.
> + size_t Pos = Line.find(VerbatimBlockEndCommandName);
> + const char *NextLine;
> + if (Pos == StringRef::npos) {
> + // Current line is completely verbatim.
> + NextLine = skipNewline(Newline, CommentEnd);
> + } else if (Pos == 0) {
> + // Current line contains just an end command.
> + const char *End = BufferPtr + VerbatimBlockEndCommandName.size();
> + formTokenWithChars(T, End, tok::verbatim_block_end);
> + T.setVerbatimBlockName(StringRef(End - (T.getLength() - 1),
> + T.getLength() - 1));
> + State = LS_Normal;
> + return;
> + } else {
> + // There is some text, followed by end command. Extract text first.
> + NextLine = BufferPtr + Pos;
> + }
> +
> + formTokenWithChars(T, NextLine, tok::verbatim_block_line);
> + T.setVerbatimBlockText(StringRef(NextLine - T.getLength(), T.getLength()));
> +
> + State = LS_VerbatimBlockBody;
> +}
> +
> +void Lexer::lexVerbatimBlockBody(Token &T) {
> + assert(State == LS_VerbatimBlockBody);
> +
> + if (CommentState == LCS_InsideCComment)
> + skipLineStartingDecorations();
> +
> + lexVerbatimBlockFirstLine(T);
> +}
> +
> +void Lexer::lexVerbatimLine(Token &T, const char *TextBegin) {
> + // Extract current line.
> + const char *Newline = findNewline(BufferPtr, CommentEnd);
> +
> + const StringRef Name(BufferPtr + 1, TextBegin - BufferPtr - 1);
> + const StringRef Text(TextBegin, Newline - TextBegin);
> +
> + formTokenWithChars(T, Newline, tok::verbatim_line);
> + T.setVerbatimLineName(Name);
> + T.setVerbatimLineText(Text);
> +}
> +
> +void Lexer::setupAndLexHTMLOpenTag(Token &T) {
> + assert(BufferPtr[0] == '<' && isHTMLIdentifierCharacter(BufferPtr[1]));
> + const char *TagNameEnd = skipHTMLIdentifier(BufferPtr + 2, CommentEnd);
> +
> + formTokenWithChars(T, TagNameEnd, tok::html_tag_open);
> + T.setHTMLTagOpenName(StringRef(TagNameEnd - (T.getLength() - 1),
> + T.getLength() - 1));
> +
> + BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
> +
> + if (BufferPtr != CommentEnd && *BufferPtr == '>') {
> + BufferPtr++;
> + return;
> + }
> +
> + if (BufferPtr != CommentEnd && isHTMLIdentifierCharacter(*BufferPtr))
> + State = LS_HTMLOpenTag;
> +}
> +
> +void Lexer::lexHTMLOpenTag(Token &T) {
> + assert(State == LS_HTMLOpenTag);
> +
> + const char *TokenPtr = BufferPtr;
> + char C = *TokenPtr;
> + if (isHTMLIdentifierCharacter(C)) {
> + TokenPtr = skipHTMLIdentifier(TokenPtr, CommentEnd);
> + formTokenWithChars(T, TokenPtr, tok::html_ident);
> + T.setHTMLIdent(StringRef(TokenPtr - T.getLength(), T.getLength()));
> + } else {
> + switch (C) {
> + case '=':
> + TokenPtr++;
> + formTokenWithChars(T, TokenPtr, tok::html_equals);
> + break;
> + case '\"':
> + case '\'': {
> + const char *OpenQuote = TokenPtr;
> + TokenPtr = skipHTMLQuotedString(TokenPtr, CommentEnd);
> + const char *ClosingQuote = TokenPtr;
> + if (TokenPtr != CommentEnd) // Skip closing quote.
> + TokenPtr++;
> + formTokenWithChars(T, TokenPtr, tok::html_quoted_string);
> + T.setHTMLQuotedString(StringRef(OpenQuote + 1,
> + ClosingQuote - (OpenQuote + 1)));
> + break;
> + }
> + case '>':
> + TokenPtr++;
> + formTokenWithChars(T, TokenPtr, tok::html_greater);
> + break;
> + }
> + }
> +
> + // Now look ahead and return to normal state if we don't see any HTML tokens
> + // ahead.
> + BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
> + if (BufferPtr == CommentEnd) {
> + State = LS_Normal;
> + return;
> + }
> +
> + C = *BufferPtr;
> + if (!isHTMLIdentifierCharacter(C) &&
> + C != '=' && C != '\"' && C != '\'' && C != '>') {
> + State = LS_Normal;
> + return;
> + }
> +}
> +
> +void Lexer::lexHTMLCloseTag(Token &T) {
> + assert(BufferPtr[0] == '<' && BufferPtr[1] == '/');
> +
> + const char *TagNameBegin = skipWhitespace(BufferPtr + 2, CommentEnd);
> + const char *TagNameEnd = skipHTMLIdentifier(TagNameBegin, CommentEnd);
> +
> + const char *End = skipWhitespace(TagNameEnd, CommentEnd);
> + if (End != CommentEnd && *End == '>')
> + End++;
> +
> + formTokenWithChars(T, End, tok::html_tag_close);
> + T.setHTMLTagCloseName(StringRef(TagNameBegin, TagNameEnd - TagNameBegin));
> +}
> +
> +Lexer::Lexer(SourceLocation FileLoc, const CommentOptions &CommOpts,
> + const char *BufferStart, const char *BufferEnd):
> + BufferStart(BufferStart), BufferEnd(BufferEnd),
> + FileLoc(FileLoc), CommOpts(CommOpts), BufferPtr(BufferStart),
> + CommentState(LCS_BeforeComment), State(LS_Normal) {
> +}
> +
> +void Lexer::lex(Token &T) {
> +again:
> + switch (CommentState) {
> + case LCS_BeforeComment:
> + if (BufferPtr == BufferEnd) {
> + formTokenWithChars(T, BufferPtr, tok::eof);
> + return;
> + }
> +
> + assert(*BufferPtr == '/');
> + BufferPtr++; // Skip first slash.
> + switch(*BufferPtr) {
> + case '/': { // BCPL comment.
> + BufferPtr++; // Skip second slash.
> +
> + if (BufferPtr != BufferEnd) {
> + // Skip Doxygen magic marker, if it is present.
> + // It might be missing because of a typo //< or /*<, or because we
> + // merged this non-Doxygen comment into a bunch of Doxygen comments
> + // around it: /** ... */ /* ... */ /** ... */
> + const char C = *BufferPtr;
> + if (C == '/' || C == '!')
> + BufferPtr++;
> + }
> +
> + // Skip less-than symbol that marks trailing comments.
> + // Skip it even if the comment is not a Doxygen one, because //< and /*<
> + // are frequent typos.
> + if (BufferPtr != BufferEnd && *BufferPtr == '<')
> + BufferPtr++;
> +
> + CommentState = LCS_InsideBCPLComment;
> + State = LS_Normal;
> + CommentEnd = findBCPLCommentEnd(BufferPtr, BufferEnd);
> + goto again;
> + }
> + case '*': { // C comment.
> + BufferPtr++; // Skip star.
> +
> + // Skip Doxygen magic marker.
> + const char C = *BufferPtr;
> + if ((C == '*' && *(BufferPtr + 1) != '/') || C == '!')
> + BufferPtr++;
> +
> + // Skip less-than symbol that marks trailing comments.
> + if (BufferPtr != BufferEnd && *BufferPtr == '<')
> + BufferPtr++;
> +
> + CommentState = LCS_InsideCComment;
> + State = LS_Normal;
> + CommentEnd = findCCommentEnd(BufferPtr, BufferEnd);
> + goto again;
> + }
> + default:
> + llvm_unreachable("second character of comment should be '/' or '*'");
> + }
> +
> + case LCS_BetweenComments: {
> + // Consecutive comments are extracted only if there is only whitespace
> + // between them. So we can search for the start of the next comment.
> + const char *EndWhitespace = BufferPtr;
> + while(EndWhitespace != BufferEnd && *EndWhitespace != '/')
> + EndWhitespace++;
> +
> + // Turn any whitespace between comments (and there is only whitespace
> + // between them) into a newline. We have two newlines between comments
> + // in total (first one was synthesized after a comment).
> + formTokenWithChars(T, EndWhitespace, tok::newline);
> +
> + CommentState = LCS_BeforeComment;
> + break;
> + }
> +
> + case LCS_InsideBCPLComment:
> + case LCS_InsideCComment:
> + if (BufferPtr != CommentEnd) {
> + lexCommentText(T);
> + break;
> + } else {
> + // Skip C comment closing sequence.
> + if (CommentState == LCS_InsideCComment) {
> + assert(BufferPtr[0] == '*' && BufferPtr[1] == '/');
> + BufferPtr += 2;
> + assert(BufferPtr <= BufferEnd);
> +
> + // Synthenize newline just after the C comment, regardless if there is
> + // actually a newline.
> + formTokenWithChars(T, BufferPtr, tok::newline);
> +
> + CommentState = LCS_BetweenComments;
> + break;
> + } else {
> + // Don't synthesized a newline after BCPL comment.
> + CommentState = LCS_BetweenComments;
> + goto again;
> + }
> + }
> + }
> +}
> +
> +StringRef Lexer::getSpelling(const Token &Tok,
> + const SourceManager &SourceMgr,
> + bool *Invalid) const {
> + SourceLocation Loc = Tok.getLocation();
> + std::pair<FileID, unsigned> LocInfo = SourceMgr.getDecomposedLoc(Loc);
> +
> + bool InvalidTemp = false;
> + StringRef File = SourceMgr.getBufferData(LocInfo.first, &InvalidTemp);
> + if (InvalidTemp) {
> + *Invalid = true;
> + return StringRef();
> + }
> +
> + const char *Begin = File.data() + LocInfo.second;
> + return StringRef(Begin, Tok.getLength());
> +}
> +
> +void Lexer::addVerbatimBlockCommand(StringRef BeginName, StringRef EndName) {
> + VerbatimBlockCommand VBC;
> + VBC.BeginName = BeginName;
> + VBC.EndName = EndName;
> + VerbatimBlockCommands.push_back(VBC);
> +}
> +
> +void Lexer::addVerbatimLineCommand(StringRef Name) {
> + VerbatimLineCommand VLC;
> + VLC.Name = Name;
> + VerbatimLineCommands.push_back(VLC);
> +}
> +
> +} // end namespace comments
> +} // end namespace clang
> +
>
> Modified: cfe/trunk/lib/AST/RawCommentList.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/RawCommentList.cpp?rev=159223&r1=159222&r2=159223&view=diff
> ==============================================================================
> --- cfe/trunk/lib/AST/RawCommentList.cpp (original)
> +++ cfe/trunk/lib/AST/RawCommentList.cpp Tue Jun 26 15:39:18 2012
> @@ -8,6 +8,9 @@
> //===----------------------------------------------------------------------===//
>
> #include "clang/AST/RawCommentList.h"
> +#include "clang/AST/ASTContext.h"
> +#include "clang/AST/CommentLexer.h"
> +#include "clang/AST/CommentBriefParser.h"
> #include "llvm/ADT/STLExtras.h"
>
> using namespace clang;
> @@ -126,6 +129,24 @@
> return StringRef(BufferStart + BeginOffset, Length);
> }
>
> +StringRef RawComment::extractBriefText(const ASTContext &Context) const {
> + // Make sure that RawText is valid.
> + getRawText(Context.getSourceManager());
> +
> + comments::Lexer L(Range.getBegin(), comments::CommentOptions(),
> + RawText.begin(), RawText.end());
> + comments::BriefParser P(L);
> +
> + const std::string Result = P.Parse();
> + const unsigned BriefTextLength = Result.size();
> + char *BriefTextPtr = new (Context) char[BriefTextLength + 1];
> + memcpy(BriefTextPtr, Result.c_str(), BriefTextLength + 1);
> + BriefText = StringRef(BriefTextPtr, BriefTextLength);
> + BriefTextValid = true;
> +
> + return BriefText;
> +}
> +
> namespace {
> bool containsOnlyWhitespace(StringRef Str) {
> return Str.find_first_not_of(" \t\f\v\r\n") == StringRef::npos;
>
> Modified: cfe/trunk/test/Index/annotate-comments.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Index/annotate-comments.cpp?rev=159223&r1=159222&r2=159223&view=diff
> ==============================================================================
> --- cfe/trunk/test/Index/annotate-comments.cpp (original)
> +++ cfe/trunk/test/Index/annotate-comments.cpp Tue Jun 26 15:39:18 2012
> @@ -163,6 +163,37 @@
> int isdoxy42; /* NOT_DOXYGEN */ ///< isdoxy42 IS_DOXYGEN_SINGLE
> };
>
> +/// IS_DOXYGEN_START
> +/// It is fine to have a command at the end of comment.
> +///\brief
> +///
> +/// Some malformed command.
> +/* \*/
> +/**
> + * \brief Aaa aaaaaaa aaaa.
> + * IS_DOXYGEN_END
> + */
> +void isdoxy43(void);
> +
> +/// IS_DOXYGEN_START Aaa bbb
> +/// ccc.
> +///
> +/// Ddd eee.
> +/// Fff.
> +///
> +/// Ggg. IS_DOXYGEN_END
> +void isdoxy44(void);
> +
> +/// IS_DOXYGEN_START Aaa bbb
> +/// ccc.
> +///
> +/// \brief
> +/// Ddd eee.
> +/// Fff.
> +///
> +/// Ggg. IS_DOXYGEN_END
> +void isdoxy45(void);
> +
> #endif
>
> // RUN: rm -rf %t
> @@ -187,8 +218,8 @@
> // WRONG-NOT: IS_DOXYGEN_NOT_ATTACHED
>
> // Ensure we don't pick up extra comments.
> -// WRONG-NOT: IS_DOXYGEN_START{{.*}}IS_DOXYGEN_START
> -// WRONG-NOT: IS_DOXYGEN_END{{.*}}IS_DOXYGEN_END
> +// WRONG-NOT: IS_DOXYGEN_START{{.*}}IS_DOXYGEN_START{{.*}}BriefComment=
> +// WRONG-NOT: IS_DOXYGEN_END{{.*}}IS_DOXYGEN_END{{.*}}BriefComment=
>
> // RUN: FileCheck %s < %t/out.c-index-direct
> // RUN: FileCheck %s < %t/out.c-index-pch
> @@ -226,4 +257,8 @@
> // CHECK: annotate-comments.cpp:155:6: FunctionDecl=isdoxy40:{{.*}} isdoxy40 IS_DOXYGEN_SINGLE
> // CHECK: annotate-comments.cpp:160:5: FunctionDecl=isdoxy41:{{.*}} isdoxy41 IS_DOXYGEN_SINGLE
> // CHECK: annotate-comments.cpp:163:7: FieldDecl=isdoxy42:{{.*}} isdoxy42 IS_DOXYGEN_SINGLE
> +// CHECK: annotate-comments.cpp:176:6: FunctionDecl=isdoxy43:{{.*}} IS_DOXYGEN_START{{.*}} IS_DOXYGEN_END
> +
> +// CHECK: annotate-comments.cpp:185:6: FunctionDecl=isdoxy44:{{.*}} BriefComment=[ IS_DOXYGEN_START Aaa bbb\n ccc.\n]
> +// CHECK: annotate-comments.cpp:195:6: FunctionDecl=isdoxy45:{{.*}} BriefComment=[\n Ddd eee.\n Fff.\n]
>
>
> Modified: cfe/trunk/tools/c-index-test/c-index-test.c
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/tools/c-index-test/c-index-test.c?rev=159223&r1=159222&r2=159223&view=diff
> ==============================================================================
> --- cfe/trunk/tools/c-index-test/c-index-test.c (original)
> +++ cfe/trunk/tools/c-index-test/c-index-test.c Tue Jun 26 15:39:18 2012
> @@ -162,6 +162,24 @@
> /* Pretty-printing. */
> /******************************************************************************/
>
> +static void PrintCString(const char *Prefix, const char *CStr) {
> + printf(" %s=[", Prefix);
> + if (CStr != NULL && CStr[0] != '\0') {
> + for ( ; *CStr; ++CStr) {
> + const char C = *CStr;
> + switch (C) {
> + case '\n': printf("\\n"); break;
> + case '\r': printf("\\r"); break;
> + case '\t': printf("\\t"); break;
> + case '\v': printf("\\v"); break;
> + case '\f': printf("\\f"); break;
> + default: putchar(C); break;
> + }
> + }
> + }
> + printf("]");
> +}
> +
> static void PrintRange(CXSourceRange R, const char *str) {
> CXFile begin_file, end_file;
> unsigned begin_line, begin_column, end_line, end_column;
> @@ -218,8 +236,10 @@
> CXPlatformAvailability PlatformAvailability[2];
> int NumPlatformAvailability;
> int I;
> - CXString Comment;
> - const char *CommentCString;
> + CXString RawComment;
> + const char *RawCommentCString;
> + CXString BriefComment;
> + const char *BriefCommentCString;
>
> ks = clang_getCursorKindSpelling(Cursor.kind);
> string = want_display_name? clang_getCursorDisplayName(Cursor)
> @@ -401,21 +421,19 @@
> PrintRange(RefNameRange, "RefName");
> }
>
> - Comment = clang_Cursor_getRawCommentText(Cursor);
> - CommentCString = clang_getCString(Comment);
> - if (CommentCString != NULL && CommentCString[0] != '\0') {
> - printf(" Comment=[");
> - for ( ; *CommentCString; ++CommentCString) {
> - if (*CommentCString != '\n')
> - putchar(*CommentCString);
> - else
> - printf("\\n");
> - }
> - printf("]");
> -
> - PrintRange(clang_Cursor_getCommentRange(Cursor), "CommentRange");
> + RawComment = clang_Cursor_getRawCommentText(Cursor);
> + RawCommentCString = clang_getCString(RawComment);
> + if (RawCommentCString != NULL && RawCommentCString[0] != '\0') {
> + PrintCString("RawComment", RawCommentCString);
> + PrintRange(clang_Cursor_getCommentRange(Cursor), "RawCommentRange");
> +
> + BriefComment = clang_Cursor_getBriefCommentText(Cursor);
> + BriefCommentCString = clang_getCString(BriefComment);
> + if (BriefCommentCString != NULL && BriefCommentCString[0] != '\0')
> + PrintCString("BriefComment", BriefCommentCString);
> + clang_disposeString(BriefComment);
> }
> - clang_disposeString(Comment);
> + clang_disposeString(RawComment);
> }
> }
>
>
> Modified: cfe/trunk/tools/libclang/CIndex.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/tools/libclang/CIndex.cpp?rev=159223&r1=159222&r2=159223&view=diff
> ==============================================================================
> --- cfe/trunk/tools/libclang/CIndex.cpp (original)
> +++ cfe/trunk/tools/libclang/CIndex.cpp Tue Jun 26 15:39:18 2012
> @@ -5707,6 +5707,24 @@
>
> } // end: extern "C"
>
> +CXString clang_Cursor_getBriefCommentText(CXCursor C) {
> + if (!clang_isDeclaration(C.kind))
> + return createCXString((const char *) NULL);
> +
> + const Decl *D = getCursorDecl(C);
> + const ASTContext &Context = getCursorContext(C);
> + const RawComment *RC = Context.getRawCommentForDecl(D);
> +
> + if (RC && RC->isDocumentation()) {
> + StringRef BriefText = RC->getBriefText(Context);
> +
> + // Don't duplicate the string because RawComment ensures that this memory
> + // will not go away.
> + return createCXString(BriefText, false);
> + }
> +
> + return createCXString((const char *) NULL);
> +}
>
> //===----------------------------------------------------------------------===//
> // C++ AST instrospection.
>
> Modified: cfe/trunk/tools/libclang/libclang.exports
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/tools/libclang/libclang.exports?rev=159223&r1=159222&r2=159223&view=diff
> ==============================================================================
> --- cfe/trunk/tools/libclang/libclang.exports (original)
> +++ cfe/trunk/tools/libclang/libclang.exports Tue Jun 26 15:39:18 2012
> @@ -5,6 +5,7 @@
> clang_CXXMethod_isStatic
> clang_CXXMethod_isVirtual
> clang_Cursor_getArgument
> +clang_Cursor_getBriefCommentText
> clang_Cursor_getCommentRange
> clang_Cursor_getRawCommentText
> clang_Cursor_getNumArguments
>
> Added: cfe/trunk/unittests/AST/CMakeLists.txt
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/AST/CMakeLists.txt?rev=159223&view=auto
> ==============================================================================
> --- cfe/trunk/unittests/AST/CMakeLists.txt (added)
> +++ cfe/trunk/unittests/AST/CMakeLists.txt Tue Jun 26 15:39:18 2012
> @@ -0,0 +1,7 @@
> +add_clang_unittest(ASTTests
> + CommentLexer.cpp
> + )
> +
> +target_link_libraries(ASTTests
> + clangAST
> + )
>
> Added: cfe/trunk/unittests/AST/CommentLexer.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/AST/CommentLexer.cpp?rev=159223&view=auto
> ==============================================================================
> --- cfe/trunk/unittests/AST/CommentLexer.cpp (added)
> +++ cfe/trunk/unittests/AST/CommentLexer.cpp Tue Jun 26 15:39:18 2012
> @@ -0,0 +1,1010 @@
> +//===- unittests/AST/CommentLexer.cpp ------ Comment lexer tests ----------===//
> +//
> +// The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "clang/Basic/SourceManager.h"
> +#include "clang/Basic/FileManager.h"
> +#include "clang/Basic/Diagnostic.h"
> +#include "clang/AST/CommentLexer.h"
> +#include "llvm/ADT/STLExtras.h"
> +#include <vector>
> +
> +#include "gtest/gtest.h"
> +
> +using namespace llvm;
> +using namespace clang;
> +
> +namespace clang {
> +namespace comments {
> +
> +namespace {
> +class CommentLexerTest : public ::testing::Test {
> +protected:
> + CommentLexerTest()
> + : FileMgr(FileMgrOpts),
> + DiagID(new DiagnosticIDs()),
> + Diags(DiagID, new IgnoringDiagConsumer()),
> + SourceMgr(Diags, FileMgr) {
> + }
> +
> + FileSystemOptions FileMgrOpts;
> + FileManager FileMgr;
> + IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
> + DiagnosticsEngine Diags;
> + SourceManager SourceMgr;
> +
> + void lexString(const char *Source, std::vector<Token> &Toks);
> +};
> +
> +void CommentLexerTest::lexString(const char *Source,
> + std::vector<Token> &Toks) {
> + MemoryBuffer *Buf = MemoryBuffer::getMemBuffer(Source);
> + FileID File = SourceMgr.createFileIDForMemBuffer(Buf);
> + SourceLocation Begin = SourceMgr.getLocForStartOfFile(File);
> +
> + comments::Lexer L(Begin, CommentOptions(),
> + Source, Source + strlen(Source));
> +
> + while (1) {
> + Token Tok;
> + L.lex(Tok);
> + if (Tok.is(tok::eof))
> + break;
> + Toks.push_back(Tok);
> + }
> +}
> +
> +} // unnamed namespace
> +
> +// Empty source range should be handled.
> +TEST_F(CommentLexerTest, Basic1) {
> + const char *Source = "";
> + std::vector<Token> Toks;
> +
> + lexString(Source, Toks);
> +
> + ASSERT_EQ(0U, Toks.size());
> +}
> +
> +// Empty comments should be handled.
> +TEST_F(CommentLexerTest, Basic2) {
> + const char *Sources[] = {
> + "//", "///", "//!", "///<", "//!<"
> + };
> + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
> + std::vector<Token> Toks;
> +
> + lexString(Sources[i], Toks);
> +
> + ASSERT_EQ(1U, Toks.size());
> +
> + ASSERT_EQ(tok::newline, Toks[0].getKind());
> + }
> +}
> +
> +// Empty comments should be handled.
> +TEST_F(CommentLexerTest, Basic3) {
> + const char *Sources[] = {
> + "/**/", "/***/", "/*!*/", "/**<*/", "/*!<*/"
> + };
> + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
> + std::vector<Token> Toks;
> +
> + lexString(Sources[i], Toks);
> +
> + ASSERT_EQ(2U, Toks.size());
> +
> + ASSERT_EQ(tok::newline, Toks[0].getKind());
> + ASSERT_EQ(tok::newline, Toks[1].getKind());
> + }
> +}
> +
> +// Single comment with plain text.
> +TEST_F(CommentLexerTest, Basic4) {
> + const char *Sources[] = {
> + "// Meow", "/// Meow", "//! Meow",
> + "// Meow\n", "// Meow\r\n", "//! Meow\r",
> + };
> +
> + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
> + std::vector<Token> Toks;
> +
> + lexString(Sources[i], Toks);
> +
> + ASSERT_EQ(2U, Toks.size());
> +
> + ASSERT_EQ(tok::text, Toks[0].getKind());
> + ASSERT_EQ(StringRef(" Meow"), Toks[0].getText());
> +
> + ASSERT_EQ(tok::newline, Toks[1].getKind());
> + }
> +}
> +
> +// Single comment with plain text.
> +TEST_F(CommentLexerTest, Basic5) {
> + const char *Sources[] = {
> + "/* Meow*/", "/** Meow*/", "/*! Meow*/"
> + };
> +
> + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
> + std::vector<Token> Toks;
> +
> + lexString(Sources[i], Toks);
> +
> + ASSERT_EQ(3U, Toks.size());
> +
> + ASSERT_EQ(tok::text, Toks[0].getKind());
> + ASSERT_EQ(StringRef(" Meow"), Toks[0].getText());
> +
> + ASSERT_EQ(tok::newline, Toks[1].getKind());
> + ASSERT_EQ(tok::newline, Toks[2].getKind());
> + }
> +}
> +
> +// Test newline escaping.
> +TEST_F(CommentLexerTest, Basic6) {
> + const char *Sources[] = {
> + "// Aaa\\\n" " Bbb\\ \n" " Ccc?" "?/\n",
> + "// Aaa\\\r\n" " Bbb\\ \r\n" " Ccc?" "?/\r\n",
> + "// Aaa\\\r" " Bbb\\ \r" " Ccc?" "?/\r"
> + };
> +
> + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
> + std::vector<Token> Toks;
> +
> + lexString(Sources[i], Toks);
> +
> + ASSERT_EQ(10U, Toks.size());
> +
> + ASSERT_EQ(tok::text, Toks[0].getKind());
> + ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText());
> + ASSERT_EQ(tok::text, Toks[1].getKind());
> + ASSERT_EQ(StringRef("\\"), Toks[1].getText());
> + ASSERT_EQ(tok::newline, Toks[2].getKind());
> +
> + ASSERT_EQ(tok::text, Toks[3].getKind());
> + ASSERT_EQ(StringRef(" Bbb"), Toks[3].getText());
> + ASSERT_EQ(tok::text, Toks[4].getKind());
> + ASSERT_EQ(StringRef("\\"), Toks[4].getText());
> + ASSERT_EQ(tok::text, Toks[5].getKind());
> + ASSERT_EQ(StringRef(" "), Toks[5].getText());
> + ASSERT_EQ(tok::newline, Toks[6].getKind());
> +
> + ASSERT_EQ(tok::text, Toks[7].getKind());
> + ASSERT_EQ(StringRef(" Ccc?" "?/"), Toks[7].getText());
> + ASSERT_EQ(tok::newline, Toks[8].getKind());
> +
> + ASSERT_EQ(tok::newline, Toks[9].getKind());
> + }
> +}
> +
> +// Check that we skip C-style aligned stars correctly.
> +TEST_F(CommentLexerTest, Basic7) {
> + const char *Source =
> + "/* Aaa\n"
> + " * Bbb\r\n"
> + "\t* Ccc\n"
> + " ! Ddd\n"
> + " * Eee\n"
> + " ** Fff\n"
> + " */";
> + std::vector<Token> Toks;
> +
> + lexString(Source, Toks);
> +
> + ASSERT_EQ(15U, Toks.size());
> +
> + ASSERT_EQ(tok::text, Toks[0].getKind());
> + ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText());
> + ASSERT_EQ(tok::newline, Toks[1].getKind());
> +
> + ASSERT_EQ(tok::text, Toks[2].getKind());
> + ASSERT_EQ(StringRef(" Bbb"), Toks[2].getText());
> + ASSERT_EQ(tok::newline, Toks[3].getKind());
> +
> + ASSERT_EQ(tok::text, Toks[4].getKind());
> + ASSERT_EQ(StringRef(" Ccc"), Toks[4].getText());
> + ASSERT_EQ(tok::newline, Toks[5].getKind());
> +
> + ASSERT_EQ(tok::text, Toks[6].getKind());
> + ASSERT_EQ(StringRef(" ! Ddd"), Toks[6].getText());
> + ASSERT_EQ(tok::newline, Toks[7].getKind());
> +
> + ASSERT_EQ(tok::text, Toks[8].getKind());
> + ASSERT_EQ(StringRef(" Eee"), Toks[8].getText());
> + ASSERT_EQ(tok::newline, Toks[9].getKind());
> +
> + ASSERT_EQ(tok::text, Toks[10].getKind());
> + ASSERT_EQ(StringRef("* Fff"), Toks[10].getText());
> + ASSERT_EQ(tok::newline, Toks[11].getKind());
> +
> + ASSERT_EQ(tok::text, Toks[12].getKind());
> + ASSERT_EQ(StringRef(" "), Toks[12].getText());
> +
> + ASSERT_EQ(tok::newline, Toks[13].getKind());
> + ASSERT_EQ(tok::newline, Toks[14].getKind());
> +}
> +
> +// A command marker followed by comment end.
> +TEST_F(CommentLexerTest, DoxygenCommand1) {
> + const char *Sources[] = { "//@", "///@", "//!@" };
> + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
> + std::vector<Token> Toks;
> +
> + lexString(Sources[i], Toks);
> +
> + ASSERT_EQ(2U, Toks.size());
> +
> + ASSERT_EQ(tok::text, Toks[0].getKind());
> + ASSERT_EQ(StringRef("@"), Toks[0].getText());
> +
> + ASSERT_EQ(tok::newline, Toks[1].getKind());
> + }
> +}
> +
> +// A command marker followed by comment end.
> +TEST_F(CommentLexerTest, DoxygenCommand2) {
> + const char *Sources[] = { "/*@*/", "/**@*/", "/*!@*/"};
> + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
> + std::vector<Token> Toks;
> +
> + lexString(Sources[i], Toks);
> +
> + ASSERT_EQ(3U, Toks.size());
> +
> + ASSERT_EQ(tok::text, Toks[0].getKind());
> + ASSERT_EQ(StringRef("@"), Toks[0].getText());
> +
> + ASSERT_EQ(tok::newline, Toks[1].getKind());
> + ASSERT_EQ(tok::newline, Toks[2].getKind());
> + }
> +}
> +
> +// A command marker followed by comment end.
> +TEST_F(CommentLexerTest, DoxygenCommand3) {
> + const char *Sources[] = { "/*\\*/", "/**\\*/" };
> + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
> + std::vector<Token> Toks;
> +
> + lexString(Sources[i], Toks);
> +
> + ASSERT_EQ(3U, Toks.size());
> +
> + ASSERT_EQ(tok::text, Toks[0].getKind());
> + ASSERT_EQ(StringRef("\\"), Toks[0].getText());
> +
> + ASSERT_EQ(tok::newline, Toks[1].getKind());
> + ASSERT_EQ(tok::newline, Toks[2].getKind());
> + }
> +}
> +
> +// Doxygen escape sequences.
> +TEST_F(CommentLexerTest, DoxygenCommand4) {
> + const char *Source =
> + "/// \\\\ \\@ \\& \\$ \\# \\< \\> \\% \\\" \\. \\::";
> + const char *Text[] = {
> + " ",
> + "\\", " ", "@", " ", "&", " ", "$", " ", "#", " ",
> + "<", " ", ">", " ", "%", " ", "\"", " ", ".", " ",
> + "::", ""
> + };
> +
> + std::vector<Token> Toks;
> +
> + lexString(Source, Toks);
> +
> + ASSERT_EQ(array_lengthof(Text), Toks.size());
> +
> + for (size_t i = 0, e = Toks.size(); i != e; i++) {
> + if(Toks[i].is(tok::text))
> + ASSERT_EQ(StringRef(Text[i]), Toks[i].getText())
> + << "index " << i;
> + }
> +}
> +
> +TEST_F(CommentLexerTest, DoxygenCommand5) {
> + const char *Source = "/// \\brief Aaa.";
> + std::vector<Token> Toks;
> +
> + lexString(Source, Toks);
> +
> + ASSERT_EQ(4U, Toks.size());
> +
> + ASSERT_EQ(tok::text, Toks[0].getKind());
> + ASSERT_EQ(StringRef(" "), Toks[0].getText());
> +
> + ASSERT_EQ(tok::command, Toks[1].getKind());
> + ASSERT_EQ(StringRef("brief"), Toks[1].getCommandName());
> +
> + ASSERT_EQ(tok::text, Toks[2].getKind());
> + ASSERT_EQ(StringRef(" Aaa."), Toks[2].getText());
> +
> + ASSERT_EQ(tok::newline, Toks[3].getKind());
> +}
> +
> +TEST_F(CommentLexerTest, DoxygenCommand6) {
> + const char *Source = "/// \\aaa\\bbb \\ccc\t\\ddd\n";
> + std::vector<Token> Toks;
> +
> + lexString(Source, Toks);
> +
> + ASSERT_EQ(8U, Toks.size());
> +
> + ASSERT_EQ(tok::text, Toks[0].getKind());
> + ASSERT_EQ(StringRef(" "), Toks[0].getText());
> +
> + ASSERT_EQ(tok::command, Toks[1].getKind());
> + ASSERT_EQ(StringRef("aaa"), Toks[1].getCommandName());
> +
> + ASSERT_EQ(tok::command, Toks[2].getKind());
> + ASSERT_EQ(StringRef("bbb"), Toks[2].getCommandName());
> +
> + ASSERT_EQ(tok::text, Toks[3].getKind());
> + ASSERT_EQ(StringRef(" "), Toks[3].getText());
> +
> + ASSERT_EQ(tok::command, Toks[4].getKind());
> + ASSERT_EQ(StringRef("ccc"), Toks[4].getCommandName());
> +
> + ASSERT_EQ(tok::text, Toks[5].getKind());
> + ASSERT_EQ(StringRef("\t"), Toks[5].getText());
> +
> + ASSERT_EQ(tok::command, Toks[6].getKind());
> + ASSERT_EQ(StringRef("ddd"), Toks[6].getCommandName());
> +
> + ASSERT_EQ(tok::newline, Toks[7].getKind());
> +}
> +
> +// Empty verbatim block.
> +TEST_F(CommentLexerTest, VerbatimBlock1) {
> + const char *Sources[] = {
> + "/// \\verbatim\\endverbatim\n//",
> + "/** \\verbatim\\endverbatim*/"
> + };
> +
> + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
> + std::vector<Token> Toks;
> +
> + lexString(Sources[i], Toks);
> +
> + ASSERT_EQ(5U, Toks.size());
> +
> + ASSERT_EQ(tok::text, Toks[0].getKind());
> + ASSERT_EQ(StringRef(" "), Toks[0].getText());
> +
> + ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
> + ASSERT_EQ(StringRef("verbatim"), Toks[1].getVerbatimBlockName());
> +
> + ASSERT_EQ(tok::verbatim_block_end, Toks[2].getKind());
> + ASSERT_EQ(StringRef("endverbatim"), Toks[2].getVerbatimBlockName());
> +
> + ASSERT_EQ(tok::newline, Toks[3].getKind());
> + ASSERT_EQ(tok::newline, Toks[4].getKind());
> + }
> +}
> +
> +// Empty verbatim block without an end command.
> +TEST_F(CommentLexerTest, VerbatimBlock2) {
> + const char *Sources[] = {
> + "/// \\verbatim\n//",
> + "/** \\verbatim*/"
> + };
> +
> + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
> + std::vector<Token> Toks;
> +
> + lexString(Sources[i], Toks);
> +
> + ASSERT_EQ(4U, Toks.size());
> +
> + ASSERT_EQ(tok::text, Toks[0].getKind());
> + ASSERT_EQ(StringRef(" "), Toks[0].getText());
> +
> + ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
> + ASSERT_EQ(StringRef("verbatim"), Toks[1].getVerbatimBlockName());
> +
> + ASSERT_EQ(tok::newline, Toks[2].getKind());
> + ASSERT_EQ(tok::newline, Toks[3].getKind());
> + }
> +}
> +
> +// Single-line verbatim block.
> +TEST_F(CommentLexerTest, VerbatimBlock3) {
> + const char *Sources[] = {
> + "/// Meow \\verbatim aaa \\endverbatim\n//",
> + "/** Meow \\verbatim aaa \\endverbatim*/"
> + };
> +
> + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
> + std::vector<Token> Toks;
> +
> + lexString(Sources[i], Toks);
> +
> + ASSERT_EQ(6U, Toks.size());
> +
> + ASSERT_EQ(tok::text, Toks[0].getKind());
> + ASSERT_EQ(StringRef(" Meow "), Toks[0].getText());
> +
> + ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
> + ASSERT_EQ(StringRef("verbatim"), Toks[1].getVerbatimBlockName());
> +
> + ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind());
> + ASSERT_EQ(StringRef(" aaa "), Toks[2].getVerbatimBlockText());
> +
> + ASSERT_EQ(tok::verbatim_block_end, Toks[3].getKind());
> + ASSERT_EQ(StringRef("endverbatim"), Toks[3].getVerbatimBlockName());
> +
> + ASSERT_EQ(tok::newline, Toks[4].getKind());
> + ASSERT_EQ(tok::newline, Toks[5].getKind());
> + }
> +}
> +
> +// Single-line verbatim block without an end command.
> +TEST_F(CommentLexerTest, VerbatimBlock4) {
> + const char *Sources[] = {
> + "/// Meow \\verbatim aaa \n//",
> + "/** Meow \\verbatim aaa */"
> + };
> +
> + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
> + std::vector<Token> Toks;
> +
> + lexString(Sources[i], Toks);
> +
> + ASSERT_EQ(5U, Toks.size());
> +
> + ASSERT_EQ(tok::text, Toks[0].getKind());
> + ASSERT_EQ(StringRef(" Meow "), Toks[0].getText());
> +
> + ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
> + ASSERT_EQ(StringRef("verbatim"), Toks[1].getVerbatimBlockName());
> +
> + ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind());
> + ASSERT_EQ(StringRef(" aaa "), Toks[2].getVerbatimBlockText());
> +
> + ASSERT_EQ(tok::newline, Toks[3].getKind());
> + ASSERT_EQ(tok::newline, Toks[4].getKind());
> + }
> +}
> +
> +// Complex test for verbatim blocks.
> +TEST_F(CommentLexerTest, VerbatimBlock5) {
> + const char *Source =
> + "/* Meow \\verbatim aaa\\$\\@\n"
> + "bbb \\endverbati\r"
> + "ccc\r\n"
> + "ddd \\endverbatim Blah \\verbatim eee\n"
> + "\\endverbatim BlahBlah*/";
> + std::vector<Token> Toks;
> +
> + lexString(Source, Toks);
> +
> + ASSERT_EQ(14U, Toks.size());
> +
> + ASSERT_EQ(tok::text, Toks[0].getKind());
> + ASSERT_EQ(StringRef(" Meow "), Toks[0].getText());
> +
> + ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
> + ASSERT_EQ(StringRef("verbatim"), Toks[1].getVerbatimBlockName());
> +
> + ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind());
> + ASSERT_EQ(StringRef(" aaa\\$\\@\n"), Toks[2].getVerbatimBlockText());
> +
> + ASSERT_EQ(tok::verbatim_block_line, Toks[3].getKind());
> + ASSERT_EQ(StringRef("bbb \\endverbati\r"), Toks[3].getVerbatimBlockText());
> +
> + ASSERT_EQ(tok::verbatim_block_line, Toks[4].getKind());
> + ASSERT_EQ(StringRef("ccc\r\n"), Toks[4].getVerbatimBlockText());
> +
> + ASSERT_EQ(tok::verbatim_block_line, Toks[5].getKind());
> + ASSERT_EQ(StringRef("ddd "), Toks[5].getVerbatimBlockText());
> +
> + ASSERT_EQ(tok::verbatim_block_end, Toks[6].getKind());
> + ASSERT_EQ(StringRef("endverbatim"), Toks[6].getVerbatimBlockName());
> +
> + ASSERT_EQ(tok::text, Toks[7].getKind());
> + ASSERT_EQ(StringRef(" Blah "), Toks[7].getText());
> +
> + ASSERT_EQ(tok::verbatim_block_begin, Toks[8].getKind());
> + ASSERT_EQ(StringRef("verbatim"), Toks[8].getVerbatimBlockName());
> +
> + ASSERT_EQ(tok::verbatim_block_line, Toks[9].getKind());
> + ASSERT_EQ(StringRef(" eee\n"), Toks[9].getVerbatimBlockText());
> +
> + ASSERT_EQ(tok::verbatim_block_end, Toks[10].getKind());
> + ASSERT_EQ(StringRef("endverbatim"), Toks[10].getVerbatimBlockName());
> +
> + ASSERT_EQ(tok::text, Toks[11].getKind());
> + ASSERT_EQ(StringRef(" BlahBlah"), Toks[11].getText());
> +
> + ASSERT_EQ(tok::newline, Toks[12].getKind());
> + ASSERT_EQ(tok::newline, Toks[13].getKind());
> +}
> +
> +// LaTeX verbatim blocks.
> +TEST_F(CommentLexerTest, VerbatimBlock6) {
> + const char *Source =
> + "/// \\f$ Aaa \\f$ \\f[ Bbb \\f] \\f{ Ccc \\f}";
> + std::vector<Token> Toks;
> +
> + lexString(Source, Toks);
> +
> + ASSERT_EQ(13U, Toks.size());
> +
> + ASSERT_EQ(tok::text, Toks[0].getKind());
> + ASSERT_EQ(StringRef(" "), Toks[0].getText());
> +
> + ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
> + ASSERT_EQ(StringRef("f$"), Toks[1].getVerbatimBlockName());
> +
> + ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind());
> + ASSERT_EQ(StringRef(" Aaa "), Toks[2].getVerbatimBlockText());
> +
> + ASSERT_EQ(tok::verbatim_block_end, Toks[3].getKind());
> + ASSERT_EQ(StringRef("f$"), Toks[3].getVerbatimBlockName());
> +
> + ASSERT_EQ(tok::text, Toks[4].getKind());
> + ASSERT_EQ(StringRef(" "), Toks[4].getText());
> +
> + ASSERT_EQ(tok::verbatim_block_begin, Toks[5].getKind());
> + ASSERT_EQ(StringRef("f["), Toks[5].getVerbatimBlockName());
> +
> + ASSERT_EQ(tok::verbatim_block_line, Toks[6].getKind());
> + ASSERT_EQ(StringRef(" Bbb "), Toks[6].getVerbatimBlockText());
> +
> + ASSERT_EQ(tok::verbatim_block_end, Toks[7].getKind());
> + ASSERT_EQ(StringRef("f]"), Toks[7].getVerbatimBlockName());
> +
> + ASSERT_EQ(tok::text, Toks[8].getKind());
> + ASSERT_EQ(StringRef(" "), Toks[8].getText());
> +
> + ASSERT_EQ(tok::verbatim_block_begin, Toks[9].getKind());
> + ASSERT_EQ(StringRef("f{"), Toks[9].getVerbatimBlockName());
> +
> + ASSERT_EQ(tok::verbatim_block_line, Toks[10].getKind());
> + ASSERT_EQ(StringRef(" Ccc "), Toks[10].getVerbatimBlockText());
> +
> + ASSERT_EQ(tok::verbatim_block_end, Toks[11].getKind());
> + ASSERT_EQ(StringRef("f}"), Toks[11].getVerbatimBlockName());
> +
> + ASSERT_EQ(tok::newline, Toks[12].getKind());
> +}
> +
> +// Empty verbatim line.
> +TEST_F(CommentLexerTest, VerbatimLine1) {
> + const char *Sources[] = {
> + "/// \\fn\n//",
> + "/** \\fn*/"
> + };
> +
> + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
> + std::vector<Token> Toks;
> +
> + lexString(Sources[i], Toks);
> +
> + ASSERT_EQ(4U, Toks.size());
> +
> + ASSERT_EQ(tok::text, Toks[0].getKind());
> + ASSERT_EQ(StringRef(" "), Toks[0].getText());
> +
> + ASSERT_EQ(tok::verbatim_line, Toks[1].getKind());
> + ASSERT_EQ(StringRef("fn"), Toks[1].getVerbatimLineName());
> + ASSERT_EQ(StringRef(""), Toks[1].getVerbatimLineText());
> +
> + ASSERT_EQ(tok::newline, Toks[2].getKind());
> + ASSERT_EQ(tok::newline, Toks[3].getKind());
> + }
> +}
> +
> +// Verbatim line with Doxygen escape sequences, which should not be expanded.
> +TEST_F(CommentLexerTest, VerbatimLine2) {
> + const char *Sources[] = {
> + "/// \\fn void *foo(const char *zzz = \"\\$\");\n//",
> + "/** \\fn void *foo(const char *zzz = \"\\$\");*/"
> + };
> +
> + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
> + std::vector<Token> Toks;
> +
> + lexString(Sources[i], Toks);
> +
> + ASSERT_EQ(4U, Toks.size());
> +
> + ASSERT_EQ(tok::text, Toks[0].getKind());
> + ASSERT_EQ(StringRef(" "), Toks[0].getText());
> +
> + ASSERT_EQ(tok::verbatim_line, Toks[1].getKind());
> + ASSERT_EQ(StringRef("fn"), Toks[1].getVerbatimLineName());
> + ASSERT_EQ(StringRef(" void *foo(const char *zzz = \"\\$\");"),
> + Toks[1].getVerbatimLineText());
> +
> + ASSERT_EQ(tok::newline, Toks[2].getKind());
> + ASSERT_EQ(tok::newline, Toks[3].getKind());
> + }
> +}
> +
> +// Verbatim line should not eat anything from next source line.
> +TEST_F(CommentLexerTest, VerbatimLine3) {
> + const char *Source =
> + "/** \\fn void *foo(const char *zzz = \"\\$\");\n"
> + " * Meow\n"
> + " */";
> +
> + std::vector<Token> Toks;
> +
> + lexString(Source, Toks);
> +
> + ASSERT_EQ(8U, Toks.size());
> +
> + ASSERT_EQ(tok::text, Toks[0].getKind());
> + ASSERT_EQ(StringRef(" "), Toks[0].getText());
> +
> + ASSERT_EQ(tok::verbatim_line, Toks[1].getKind());
> + ASSERT_EQ(StringRef("fn"), Toks[1].getVerbatimLineName());
> + ASSERT_EQ(StringRef(" void *foo(const char *zzz = \"\\$\");"),
> + Toks[1].getVerbatimLineText());
> + ASSERT_EQ(tok::newline, Toks[2].getKind());
> +
> + ASSERT_EQ(tok::text, Toks[3].getKind());
> + ASSERT_EQ(StringRef(" Meow"), Toks[3].getText());
> + ASSERT_EQ(tok::newline, Toks[4].getKind());
> +
> + ASSERT_EQ(tok::text, Toks[5].getKind());
> + ASSERT_EQ(StringRef(" "), Toks[5].getText());
> +
> + ASSERT_EQ(tok::newline, Toks[6].getKind());
> + ASSERT_EQ(tok::newline, Toks[7].getKind());
> +}
> +
> +TEST_F(CommentLexerTest, HTML1) {
> + const char *Source =
> + "// <";
> +
> + std::vector<Token> Toks;
> +
> + lexString(Source, Toks);
> +
> + ASSERT_EQ(3U, Toks.size());
> +
> + ASSERT_EQ(tok::text, Toks[0].getKind());
> + ASSERT_EQ(StringRef(" "), Toks[0].getText());
> +
> + ASSERT_EQ(tok::text, Toks[1].getKind());
> + ASSERT_EQ(StringRef("<"), Toks[1].getText());
> +
> + ASSERT_EQ(tok::newline, Toks[2].getKind());
> +}
> +
> +TEST_F(CommentLexerTest, HTML2) {
> + const char *Sources[] = {
> + "// <tag",
> + "// <tag "
> + };
> +
> + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
> + std::vector<Token> Toks;
> +
> + lexString(Sources[i], Toks);
> +
> + ASSERT_EQ(3U, Toks.size());
> +
> + ASSERT_EQ(tok::text, Toks[0].getKind());
> + ASSERT_EQ(StringRef(" "), Toks[0].getText());
> +
> + ASSERT_EQ(tok::html_tag_open, Toks[1].getKind());
> + ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagOpenName());
> +
> + ASSERT_EQ(tok::newline, Toks[2].getKind());
> + }
> +}
> +
> +TEST_F(CommentLexerTest, HTML3) {
> + const char *Source = "// <tag=";
> +
> + std::vector<Token> Toks;
> +
> + lexString(Source, Toks);
> +
> + ASSERT_EQ(4U, Toks.size());
> +
> + ASSERT_EQ(tok::text, Toks[0].getKind());
> + ASSERT_EQ(StringRef(" "), Toks[0].getText());
> +
> + ASSERT_EQ(tok::html_tag_open, Toks[1].getKind());
> + ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagOpenName());
> +
> + ASSERT_EQ(tok::text, Toks[2].getKind());
> + ASSERT_EQ(StringRef("="), Toks[2].getText());
> +
> + ASSERT_EQ(tok::newline, Toks[3].getKind());
> +}
> +
> +TEST_F(CommentLexerTest, HTML4) {
> + const char *Sources[] = {
> + "// <tag attr",
> + "// <tag attr "
> + };
> +
> + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
> + std::vector<Token> Toks;
> +
> + lexString(Sources[i], Toks);
> +
> + ASSERT_EQ(4U, Toks.size());
> +
> + ASSERT_EQ(tok::text, Toks[0].getKind());
> + ASSERT_EQ(StringRef(" "), Toks[0].getText());
> +
> + ASSERT_EQ(tok::html_tag_open, Toks[1].getKind());
> + ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagOpenName());
> +
> + ASSERT_EQ(tok::html_ident, Toks[2].getKind());
> + ASSERT_EQ(StringRef("attr"), Toks[2].getHTMLIdent());
> +
> + ASSERT_EQ(tok::newline, Toks[3].getKind());
> + }
> +}
> +
> +TEST_F(CommentLexerTest, HTML5) {
> + const char *Sources[] = {
> + "// <tag attr=",
> + "// <tag attr ="
> + };
> +
> + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
> + std::vector<Token> Toks;
> +
> + lexString(Sources[i], Toks);
> +
> + ASSERT_EQ(5U, Toks.size());
> +
> + ASSERT_EQ(tok::text, Toks[0].getKind());
> + ASSERT_EQ(StringRef(" "), Toks[0].getText());
> +
> + ASSERT_EQ(tok::html_tag_open, Toks[1].getKind());
> + ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagOpenName());
> +
> + ASSERT_EQ(tok::html_ident, Toks[2].getKind());
> + ASSERT_EQ(StringRef("attr"), Toks[2].getHTMLIdent());
> +
> + ASSERT_EQ(tok::html_equals, Toks[3].getKind());
> +
> + ASSERT_EQ(tok::newline, Toks[4].getKind());
> + }
> +}
> +
> +TEST_F(CommentLexerTest, HTML6) {
> + const char *Sources[] = {
> + "// <tag attr=\"",
> + "// <tag attr = \"",
> + "// <tag attr=\'",
> + "// <tag attr = \'"
> + };
> +
> + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
> + std::vector<Token> Toks;
> +
> + lexString(Sources[i], Toks);
> +
> + ASSERT_EQ(6U, Toks.size());
> +
> + ASSERT_EQ(tok::text, Toks[0].getKind());
> + ASSERT_EQ(StringRef(" "), Toks[0].getText());
> +
> + ASSERT_EQ(tok::html_tag_open, Toks[1].getKind());
> + ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagOpenName());
> +
> + ASSERT_EQ(tok::html_ident, Toks[2].getKind());
> + ASSERT_EQ(StringRef("attr"), Toks[2].getHTMLIdent());
> +
> + ASSERT_EQ(tok::html_equals, Toks[3].getKind());
> +
> + ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind());
> + ASSERT_EQ(StringRef(""), Toks[4].getHTMLQuotedString());
> +
> + ASSERT_EQ(tok::newline, Toks[5].getKind());
> + }
> +}
> +
> +TEST_F(CommentLexerTest, HTML7) {
> + const char *Source = "// <tag attr=@";
> +
> + std::vector<Token> Toks;
> +
> + lexString(Source, Toks);
> +
> + ASSERT_EQ(6U, Toks.size());
> +
> + ASSERT_EQ(tok::text, Toks[0].getKind());
> + ASSERT_EQ(StringRef(" "), Toks[0].getText());
> +
> + ASSERT_EQ(tok::html_tag_open, Toks[1].getKind());
> + ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagOpenName());
> +
> + ASSERT_EQ(tok::html_ident, Toks[2].getKind());
> + ASSERT_EQ(StringRef("attr"), Toks[2].getHTMLIdent());
> +
> + ASSERT_EQ(tok::html_equals, Toks[3].getKind());
> +
> + ASSERT_EQ(tok::text, Toks[4].getKind());
> + ASSERT_EQ(StringRef("@"), Toks[4].getText());
> +
> + ASSERT_EQ(tok::newline, Toks[5].getKind());
> +}
> +
> +TEST_F(CommentLexerTest, HTML8) {
> + const char *Sources[] = {
> + "// <tag attr=\"val\\\"\\'val",
> + "// <tag attr=\"val\\\"\\'val\"",
> + "// <tag attr=\'val\\\"\\'val",
> + "// <tag attr=\'val\\\"\\'val\'"
> + };
> +
> + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
> + std::vector<Token> Toks;
> +
> + lexString(Sources[i], Toks);
> +
> + ASSERT_EQ(6U, Toks.size());
> +
> + ASSERT_EQ(tok::text, Toks[0].getKind());
> + ASSERT_EQ(StringRef(" "), Toks[0].getText());
> +
> + ASSERT_EQ(tok::html_tag_open, Toks[1].getKind());
> + ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagOpenName());
> +
> + ASSERT_EQ(tok::html_ident, Toks[2].getKind());
> + ASSERT_EQ(StringRef("attr"), Toks[2].getHTMLIdent());
> +
> + ASSERT_EQ(tok::html_equals, Toks[3].getKind());
> +
> + ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind());
> + ASSERT_EQ(StringRef("val\\\"\\'val"), Toks[4].getHTMLQuotedString());
> +
> + ASSERT_EQ(tok::newline, Toks[5].getKind());
> + }
> +}
> +
> +TEST_F(CommentLexerTest, HTML9) {
> + const char *Sources[] = {
> + "// <tag attr=\"val\\\"\\'val\">",
> + "// <tag attr=\'val\\\"\\'val\'>"
> + };
> +
> + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
> + std::vector<Token> Toks;
> +
> + lexString(Sources[i], Toks);
> +
> + ASSERT_EQ(7U, Toks.size());
> +
> + ASSERT_EQ(tok::text, Toks[0].getKind());
> + ASSERT_EQ(StringRef(" "), Toks[0].getText());
> +
> + ASSERT_EQ(tok::html_tag_open, Toks[1].getKind());
> + ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagOpenName());
> +
> + ASSERT_EQ(tok::html_ident, Toks[2].getKind());
> + ASSERT_EQ(StringRef("attr"), Toks[2].getHTMLIdent());
> +
> + ASSERT_EQ(tok::html_equals, Toks[3].getKind());
> +
> + ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind());
> + ASSERT_EQ(StringRef("val\\\"\\'val"), Toks[4].getHTMLQuotedString());
> +
> + ASSERT_EQ(tok::html_greater, Toks[5].getKind());
> +
> + ASSERT_EQ(tok::newline, Toks[6].getKind());
> + }
> +}
> +
> +TEST_F(CommentLexerTest, HTML10) {
> + const char *Source = "// </";
> +
> + std::vector<Token> Toks;
> +
> + lexString(Source, Toks);
> +
> + ASSERT_EQ(3U, Toks.size());
> +
> + ASSERT_EQ(tok::text, Toks[0].getKind());
> + ASSERT_EQ(StringRef(" "), Toks[0].getText());
> +
> + ASSERT_EQ(tok::html_tag_close, Toks[1].getKind());
> + ASSERT_EQ(StringRef(""), Toks[1].getHTMLTagCloseName());
> +
> + ASSERT_EQ(tok::newline, Toks[2].getKind());
> +}
> +
> +
> +TEST_F(CommentLexerTest, HTML11) {
> + const char *Source = "// </@";
> +
> + std::vector<Token> Toks;
> +
> + lexString(Source, Toks);
> +
> + ASSERT_EQ(4U, Toks.size());
> +
> + ASSERT_EQ(tok::text, Toks[0].getKind());
> + ASSERT_EQ(StringRef(" "), Toks[0].getText());
> +
> + ASSERT_EQ(tok::html_tag_close, Toks[1].getKind());
> + ASSERT_EQ(StringRef(""), Toks[1].getHTMLTagCloseName());
> +
> + ASSERT_EQ(tok::text, Toks[2].getKind());
> + ASSERT_EQ(StringRef("@"), Toks[2].getText());
> +
> + ASSERT_EQ(tok::newline, Toks[3].getKind());
> +}
> +
> +TEST_F(CommentLexerTest, HTML12) {
> + const char *Sources[] = {
> + "// </tag",
> + "// </tag>",
> + "// </ tag>",
> + "// </ tag >"
> + };
> +
> + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
> + std::vector<Token> Toks;
> +
> + lexString(Sources[i], Toks);
> +
> + ASSERT_EQ(3U, Toks.size());
> +
> + ASSERT_EQ(tok::text, Toks[0].getKind());
> + ASSERT_EQ(StringRef(" "), Toks[0].getText());
> +
> + ASSERT_EQ(tok::html_tag_close, Toks[1].getKind());
> + ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagCloseName());
> +
> + ASSERT_EQ(tok::newline, Toks[2].getKind());
> + }
> +}
> +
> +TEST_F(CommentLexerTest, MultipleComments) {
> + const char *Source =
> + "// Aaa\n"
> + "/// Bbb\n"
> + "/* Ccc\n"
> + " * Ddd*/\n"
> + "/** Eee*/";
> +
> + std::vector<Token> Toks;
> +
> + lexString(Source, Toks);
> +
> + ASSERT_EQ(12U, Toks.size());
> +
> + ASSERT_EQ(tok::text, Toks[0].getKind());
> + ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText());
> + ASSERT_EQ(tok::newline, Toks[1].getKind());
> +
> + ASSERT_EQ(tok::text, Toks[2].getKind());
> + ASSERT_EQ(StringRef(" Bbb"), Toks[2].getText());
> + ASSERT_EQ(tok::newline, Toks[3].getKind());
> +
> + ASSERT_EQ(tok::text, Toks[4].getKind());
> + ASSERT_EQ(StringRef(" Ccc"), Toks[4].getText());
> + ASSERT_EQ(tok::newline, Toks[5].getKind());
> +
> + ASSERT_EQ(tok::text, Toks[6].getKind());
> + ASSERT_EQ(StringRef(" Ddd"), Toks[6].getText());
> + ASSERT_EQ(tok::newline, Toks[7].getKind());
> + ASSERT_EQ(tok::newline, Toks[8].getKind());
> +
> + ASSERT_EQ(tok::text, Toks[9].getKind());
> + ASSERT_EQ(StringRef(" Eee"), Toks[9].getText());
> +
> + ASSERT_EQ(tok::newline, Toks[10].getKind());
> + ASSERT_EQ(tok::newline, Toks[11].getKind());
> +}
> +
> +} // end namespace comments
> +} // end namespace clang
> +
>
> Added: cfe/trunk/unittests/AST/Makefile
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/AST/Makefile?rev=159223&view=auto
> ==============================================================================
> --- cfe/trunk/unittests/AST/Makefile (added)
> +++ cfe/trunk/unittests/AST/Makefile Tue Jun 26 15:39:18 2012
> @@ -0,0 +1,15 @@
> +##===- unittests/AST/Makefile ------------------------------*- Makefile -*-===##
> +#
> +# The LLVM Compiler Infrastructure
> +#
> +# This file is distributed under the University of Illinois Open Source
> +# License. See LICENSE.TXT for details.
> +#
> +##===----------------------------------------------------------------------===##
> +
> +CLANG_LEVEL = ../..
> +TESTNAME = AST
> +LINK_COMPONENTS := support mc
> +USEDLIBS = clangAST.a clangBasic.a
> +
> +include $(CLANG_LEVEL)/unittests/Makefile
>
> Modified: cfe/trunk/unittests/Makefile
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Makefile?rev=159223&r1=159222&r2=159223&view=diff
> ==============================================================================
> --- cfe/trunk/unittests/Makefile (original)
> +++ cfe/trunk/unittests/Makefile Tue Jun 26 15:39:18 2012
> @@ -14,7 +14,7 @@
>
> IS_UNITTEST_LEVEL := 1
> CLANG_LEVEL := ..
> -PARALLEL_DIRS = Basic Frontend Lex Tooling
> +PARALLEL_DIRS = Basic AST Frontend Lex Tooling
>
> endif # CLANG_LEVEL
>
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
More information about the cfe-commits
mailing list