[PATCH] Fix block comment parser
Rui Ueyama
ruiu at google.com
Thu May 9 16:18:08 PDT 2013
ruiu added you to the CC list for the revision "Fix block comment parser".
Fixes a bug in block comment parser. Clang is now able to parse the following block comment.
/* *\
\
/
Before this patch, Clang could skip only one escaped newline between * and /.
http://llvm-reviews.chandlerc.com/D777
Files:
lib/Lex/Lexer.cpp
test/Lexer/block_cmt_end.c
Index: lib/Lex/Lexer.cpp
===================================================================
--- lib/Lex/Lexer.cpp
+++ lib/Lex/Lexer.cpp
@@ -2076,58 +2076,66 @@
static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr,
Lexer *L) {
assert(CurPtr[0] == '\n' || CurPtr[0] == '\r');
+ const char *WhitespacePos = 0;
+ const char *TrigraphPos = 0;
- // Back up off the newline.
- --CurPtr;
-
- // If this is a two-character newline sequence, skip the other character.
- if (CurPtr[0] == '\n' || CurPtr[0] == '\r') {
- // \n\n or \r\r -> not escaped newline.
- if (CurPtr[0] == CurPtr[1])
- return false;
- // \n\r or \r\n -> skip the newline.
+ // Skip escaped newlines, or return if it's not an escaped newline sequence.
+ do {
+ // Back up off the newline.
--CurPtr;
- }
- // If we have horizontal whitespace, skip over it. We allow whitespace
- // between the slash and newline.
- bool HasSpace = false;
- while (isHorizontalWhitespace(*CurPtr) || *CurPtr == 0) {
- --CurPtr;
- HasSpace = true;
- }
+ // If this is a two-character newline sequence, skip the other character.
+ if (CurPtr[0] == '\n' || CurPtr[0] == '\r') {
+ // \n\n or \r\r -> not escaped newline.
+ if (CurPtr[0] == CurPtr[1])
+ return false;
+ // \n\r or \r\n -> skip the newline.
+ --CurPtr;
+ }
- // If we have a slash, we know this is an escaped newline.
- if (*CurPtr == '\\') {
- if (CurPtr[-1] != '*') return false;
- } else {
- // It isn't a slash, is it the ?? / trigraph?
- if (CurPtr[0] != '/' || CurPtr[-1] != '?' || CurPtr[-2] != '?' ||
- CurPtr[-3] != '*')
+ // If we have horizontal whitespace, skip over it. We allow whitespace
+ // between the backslash and newline.
+ while (isHorizontalWhitespace(*CurPtr) || *CurPtr == 0) {
+ --CurPtr;
+ WhitespacePos = CurPtr;
+ }
+
+ // If we have a backslash, skip over it.
+ if (*CurPtr == '\\') {
+ --CurPtr;
+ } else if (CurPtr[0] == '/' || CurPtr[-1] == '?' || CurPtr[-2] == '?') {
+ // It was not a backslash, but the trigraph equivalent to backslash.
+ TrigraphPos = CurPtr - 2;
+ CurPtr -= 3;
+ } else {
return false;
+ }
+ } while (*CurPtr == '\n' || *CurPtr == '\r');
+
+ // If the character before an escaped newline is not '*', the last
+ // slash was not the end of block comment after all.
+ if (*CurPtr != '*')
+ return false;
- // This is the trigraph ending the comment. Emit a stern warning!
- CurPtr -= 2;
+ // Warn about having an escaped newline between the */ characters.
+ if (!L->isLexingRawMode())
+ L->Diag(CurPtr, diag::escaped_newline_block_comment_end);
- // If no trigraphs are enabled, warn that we ignored this trigraph and
- // ignore this * character.
+ // If no trigraphs are enabled, warn that we ignored this trigraph and ignore
+ // this * character.
+ if (TrigraphPos) {
if (!L->getLangOpts().Trigraphs) {
if (!L->isLexingRawMode())
- L->Diag(CurPtr, diag::trigraph_ignored_block_comment);
+ L->Diag(TrigraphPos, diag::trigraph_ignored_block_comment);
return false;
}
if (!L->isLexingRawMode())
- L->Diag(CurPtr, diag::trigraph_ends_block_comment);
+ L->Diag(TrigraphPos, diag::trigraph_ends_block_comment);
}
- // Warn about having an escaped newline between the */ characters.
- if (!L->isLexingRawMode())
- L->Diag(CurPtr, diag::escaped_newline_block_comment_end);
-
// If there was space between the backslash and newline, warn about it.
- if (HasSpace && !L->isLexingRawMode())
- L->Diag(CurPtr, diag::backslash_newline_space);
-
+ if (WhitespacePos && !L->isLexingRawMode())
+ L->Diag(WhitespacePos, diag::backslash_newline_space);
return true;
}
Index: test/Lexer/block_cmt_end.c
===================================================================
--- test/Lexer/block_cmt_end.c
+++ test/Lexer/block_cmt_end.c
@@ -3,7 +3,7 @@
RUN: %clang_cc1 -E -trigraphs %s | grep foo
RUN: %clang_cc1 -E -trigraphs %s | not grep qux
RUN: %clang_cc1 -E -trigraphs %s | not grep xyz
- RUN: %clang_cc1 -fsyntax-only -trigraphs -verify %s
+ RUN: %clang_cc1 -fsyntax-only -trigraphs -verify %s
*/
// This is a simple comment, /*/ does not end a comment, the trailing */ does.
@@ -14,21 +14,36 @@
next comment ends with normal escaped newline:
*/
-/* expected-warning {{escaped newline}} expected-warning {{backslash and newline}} *\
+/* expected-warning {{escaped newline}} *\
+\
/
+/* expected-warning at 24 {{escaped newline}}
+ expected-warning at 25 {{trigraph ends block comment}}
+ expected-warning at 26 {{backslash and newline separated by space}}
+*\
+??/
+\
+/
+
+// "*\n/" does not end a comment.
+/* *
+/
+*/
+
+// "* \\\n/" does not end a comment.
+/* * \
+/
+*/
+
int bar /* expected-error {{expected ';' after top level declarator}} */
/* xyz
next comment ends with a trigraph escaped newline: */
-/* expected-warning {{escaped newline between}} expected-warning {{backslash and newline separated by space}} expected-warning {{trigraph ends block comment}} *??/
-/
-
foo
-
// rdar://6060752 - We should not get warnings about trigraphs in comments:
// '????'
/* ???? */
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D777.1.patch
Type: text/x-patch
Size: 5331 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20130509/f1855e9b/attachment.bin>
More information about the llvm-commits
mailing list