[cfe-commits] r159963 - in /cfe/trunk: lib/AST/CommentLexer.cpp unittests/AST/CommentLexer.cpp
Dmitri Gribenko
gribozavr at gmail.com
Mon Jul 9 14:32:40 PDT 2012
Author: gribozavr
Date: Mon Jul 9 16:32:40 2012
New Revision: 159963
URL: http://llvm.org/viewvc/llvm-project?rev=159963&view=rev
Log:
Comment lexing: fix lexing to actually work in non-error cases.
Modified:
cfe/trunk/lib/AST/CommentLexer.cpp
cfe/trunk/unittests/AST/CommentLexer.cpp
Modified: cfe/trunk/lib/AST/CommentLexer.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/CommentLexer.cpp?rev=159963&r1=159962&r2=159963&view=diff
==============================================================================
--- cfe/trunk/lib/AST/CommentLexer.cpp (original)
+++ cfe/trunk/lib/AST/CommentLexer.cpp Mon Jul 9 16:32:40 2012
@@ -147,6 +147,11 @@
return BufferPtr;
}
+bool isHTMLIdentifierStartingCharacter(char C) {
+ return (C >= 'a' && C <= 'z') ||
+ (C >= 'A' && C <= 'Z');
+}
+
bool isHTMLIdentifierCharacter(char C) {
return (C >= 'a' && C <= 'z') ||
(C >= 'A' && C <= 'Z') ||
@@ -357,7 +362,7 @@
return;
}
const char C = *TokenPtr;
- if (isHTMLIdentifierCharacter(C))
+ if (isHTMLIdentifierStartingCharacter(C))
setupAndLexHTMLOpenTag(T);
else if (C == '/')
setupAndLexHTMLCloseTag(T);
@@ -383,7 +388,7 @@
TokenPtr++;
if (TokenPtr == CommentEnd)
break;
- char C = *TokenPtr;
+ const char C = *TokenPtr;
if(C == '\n' || C == '\r' ||
C == '\\' || C == '@' || C == '<')
break;
@@ -492,7 +497,8 @@
}
void Lexer::setupAndLexHTMLOpenTag(Token &T) {
- assert(BufferPtr[0] == '<' && isHTMLIdentifierCharacter(BufferPtr[1]));
+ assert(BufferPtr[0] == '<' &&
+ isHTMLIdentifierStartingCharacter(BufferPtr[1]));
const char *TagNameEnd = skipHTMLIdentifier(BufferPtr + 2, CommentEnd);
StringRef Name(BufferPtr + 1, TagNameEnd - (BufferPtr + 1));
@@ -501,12 +507,9 @@
BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
- if (BufferPtr != CommentEnd && *BufferPtr == '>') {
- BufferPtr++;
- return;
- }
-
- if (BufferPtr != CommentEnd && isHTMLIdentifierCharacter(*BufferPtr))
+ const char C = *BufferPtr;
+ if (BufferPtr != CommentEnd &&
+ (C == '>' || isHTMLIdentifierStartingCharacter(C)))
State = LS_HTMLOpenTag;
}
@@ -541,7 +544,8 @@
case '>':
TokenPtr++;
formTokenWithChars(T, TokenPtr, tok::html_greater);
- break;
+ State = LS_Normal;
+ return;
}
}
@@ -554,7 +558,7 @@
}
C = *BufferPtr;
- if (!isHTMLIdentifierCharacter(C) &&
+ if (!isHTMLIdentifierStartingCharacter(C) &&
C != '=' && C != '\"' && C != '\'' && C != '>') {
State = LS_Normal;
return;
@@ -656,8 +660,9 @@
EndWhitespace++;
// Turn any whitespace between comments (and there is only whitespace
- // between them) into a newline. We have two newlines between C comments
- // in total (first one was synthesized after a comment).
+ // between them -- guaranteed by comment extraction) into a newline. We
+ // have two newlines between C comments in total (first one was synthesized
+ // after a comment).
formTokenWithChars(T, EndWhitespace, tok::newline);
CommentState = LCS_BeforeComment;
Modified: cfe/trunk/unittests/AST/CommentLexer.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/AST/CommentLexer.cpp?rev=159963&r1=159962&r2=159963&view=diff
==============================================================================
--- cfe/trunk/unittests/AST/CommentLexer.cpp (original)
+++ cfe/trunk/unittests/AST/CommentLexer.cpp Mon Jul 9 16:32:40 2012
@@ -803,6 +803,28 @@
TEST_F(CommentLexerTest, HTML2) {
const char *Source =
+ "// a<2";
+
+ std::vector<Token> Toks;
+
+ lexString(Source, Toks);
+
+ ASSERT_EQ(4U, Toks.size());
+
+ ASSERT_EQ(tok::text, Toks[0].getKind());
+ ASSERT_EQ(StringRef(" a"), Toks[0].getText());
+
+ ASSERT_EQ(tok::text, Toks[1].getKind());
+ ASSERT_EQ(StringRef("<"), Toks[1].getText());
+
+ ASSERT_EQ(tok::text, Toks[2].getKind());
+ ASSERT_EQ(StringRef("2"), Toks[2].getText());
+
+ ASSERT_EQ(tok::newline, Toks[3].getKind());
+}
+
+TEST_F(CommentLexerTest, HTML3) {
+ const char *Source =
"// < tag";
std::vector<Token> Toks;
@@ -823,7 +845,7 @@
ASSERT_EQ(tok::newline, Toks[3].getKind());
}
-TEST_F(CommentLexerTest, HTML3) {
+TEST_F(CommentLexerTest, HTML4) {
const char *Sources[] = {
"// <tag",
"// <tag "
@@ -846,7 +868,52 @@
}
}
-TEST_F(CommentLexerTest, HTML4) {
+TEST_F(CommentLexerTest, HTML5) {
+ const char *Source =
+ "// <tag 42";
+
+ std::vector<Token> Toks;
+
+ lexString(Source, Toks);
+
+ ASSERT_EQ(4U, Toks.size());
+
+ ASSERT_EQ(tok::text, Toks[0].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[0].getText());
+
+ ASSERT_EQ(tok::html_tag_open, Toks[1].getKind());
+ ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagOpenName());
+
+ ASSERT_EQ(tok::text, Toks[2].getKind());
+ ASSERT_EQ(StringRef("42"), Toks[2].getText());
+
+ ASSERT_EQ(tok::newline, Toks[3].getKind());
+}
+
+TEST_F(CommentLexerTest, HTML6) {
+ const char *Source = "// <tag> Meow";
+
+ std::vector<Token> Toks;
+
+ lexString(Source, Toks);
+
+ ASSERT_EQ(5U, Toks.size());
+
+ ASSERT_EQ(tok::text, Toks[0].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[0].getText());
+
+ ASSERT_EQ(tok::html_tag_open, Toks[1].getKind());
+ ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagOpenName());
+
+ ASSERT_EQ(tok::html_greater, Toks[2].getKind());
+
+ ASSERT_EQ(tok::text, Toks[3].getKind());
+ ASSERT_EQ(StringRef(" Meow"), Toks[3].getText());
+
+ ASSERT_EQ(tok::newline, Toks[4].getKind());
+}
+
+TEST_F(CommentLexerTest, HTML7) {
const char *Source = "// <tag=";
std::vector<Token> Toks;
@@ -867,7 +934,35 @@
ASSERT_EQ(tok::newline, Toks[3].getKind());
}
-TEST_F(CommentLexerTest, HTML5) {
+TEST_F(CommentLexerTest, HTML8) {
+ const char *Source = "// <tag attr=> Meow";
+
+ std::vector<Token> Toks;
+
+ lexString(Source, Toks);
+
+ ASSERT_EQ(7U, Toks.size());
+
+ ASSERT_EQ(tok::text, Toks[0].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[0].getText());
+
+ ASSERT_EQ(tok::html_tag_open, Toks[1].getKind());
+ ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagOpenName());
+
+ ASSERT_EQ(tok::html_ident, Toks[2].getKind());
+ ASSERT_EQ(StringRef("attr"), Toks[2].getHTMLIdent());
+
+ ASSERT_EQ(tok::html_equals, Toks[3].getKind());
+
+ ASSERT_EQ(tok::html_greater, Toks[4].getKind());
+
+ ASSERT_EQ(tok::text, Toks[5].getKind());
+ ASSERT_EQ(StringRef(" Meow"), Toks[5].getText());
+
+ ASSERT_EQ(tok::newline, Toks[6].getKind());
+}
+
+TEST_F(CommentLexerTest, HTML9) {
const char *Sources[] = {
"// <tag attr",
"// <tag attr "
@@ -893,7 +988,7 @@
}
}
-TEST_F(CommentLexerTest, HTML6) {
+TEST_F(CommentLexerTest, HTML10) {
const char *Sources[] = {
"// <tag attr=",
"// <tag attr ="
@@ -921,7 +1016,7 @@
}
}
-TEST_F(CommentLexerTest, HTML7) {
+TEST_F(CommentLexerTest, HTML11) {
const char *Sources[] = {
"// <tag attr=\"",
"// <tag attr = \"",
@@ -954,7 +1049,7 @@
}
}
-TEST_F(CommentLexerTest, HTML8) {
+TEST_F(CommentLexerTest, HTML12) {
const char *Source = "// <tag attr=@";
std::vector<Token> Toks;
@@ -980,7 +1075,7 @@
ASSERT_EQ(tok::newline, Toks[5].getKind());
}
-TEST_F(CommentLexerTest, HTML9) {
+TEST_F(CommentLexerTest, HTML13) {
const char *Sources[] = {
"// <tag attr=\"val\\\"\\'val",
"// <tag attr=\"val\\\"\\'val\"",
@@ -1013,7 +1108,7 @@
}
}
-TEST_F(CommentLexerTest, HTML10) {
+TEST_F(CommentLexerTest, HTML14) {
const char *Sources[] = {
"// <tag attr=\"val\\\"\\'val\">",
"// <tag attr=\'val\\\"\\'val\'>"
@@ -1046,7 +1141,7 @@
}
}
-TEST_F(CommentLexerTest, HTML11) {
+TEST_F(CommentLexerTest, HTML15) {
const char *Source = "// </";
std::vector<Token> Toks;
@@ -1065,7 +1160,7 @@
}
-TEST_F(CommentLexerTest, HTML12) {
+TEST_F(CommentLexerTest, HTML16) {
const char *Source = "// </@";
std::vector<Token> Toks;
@@ -1086,7 +1181,7 @@
ASSERT_EQ(tok::newline, Toks[3].getKind());
}
-TEST_F(CommentLexerTest, HTML13) {
+TEST_F(CommentLexerTest, HTML17) {
const char *Source = "// </tag";
std::vector<Token> Toks;
@@ -1104,7 +1199,7 @@
ASSERT_EQ(tok::newline, Toks[2].getKind());
}
-TEST_F(CommentLexerTest, HTML14) {
+TEST_F(CommentLexerTest, HTML18) {
const char *Sources[] = {
"// </tag>",
"// </ tag>",
More information about the cfe-commits
mailing list