[PATCH] Fix block comment parser
Rui Ueyama
ruiu at google.com
Wed Jun 12 12:47:00 PDT 2013
Ping.
On Wed, Jun 12, 2013 at 12:45 PM, Rui Ueyama <ruiu at google.com> wrote:
> ruiu added you to the CC list for the revision "Fix block comment parser".
>
> Fixes a bug in block comment parser. Clang is now able to parse the
> following block comment.
>
> /* *\
> \
> /
>
> Before this patch, Clang could skip only one escaped newline between * and
> /.
>
> http://llvm-reviews.chandlerc.com/D777
>
> Files:
> lib/Lex/Lexer.cpp
> test/Lexer/block_cmt_end.c
>
> Index: lib/Lex/Lexer.cpp
> ===================================================================
> --- lib/Lex/Lexer.cpp
> +++ lib/Lex/Lexer.cpp
> @@ -2076,58 +2076,66 @@
> static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr,
> Lexer *L) {
> assert(CurPtr[0] == '\n' || CurPtr[0] == '\r');
> + const char *WhitespacePos = 0;
> + const char *TrigraphPos = 0;
>
> - // Back up off the newline.
> - --CurPtr;
> -
> - // If this is a two-character newline sequence, skip the other
> character.
> - if (CurPtr[0] == '\n' || CurPtr[0] == '\r') {
> - // \n\n or \r\r -> not escaped newline.
> - if (CurPtr[0] == CurPtr[1])
> - return false;
> - // \n\r or \r\n -> skip the newline.
> + // Skip escaped newlines, or return if it's not an escaped newline
> sequence.
> + do {
> + // Back up off the newline.
> --CurPtr;
> - }
>
> - // If we have horizontal whitespace, skip over it. We allow whitespace
> - // between the slash and newline.
> - bool HasSpace = false;
> - while (isHorizontalWhitespace(*CurPtr) || *CurPtr == 0) {
> - --CurPtr;
> - HasSpace = true;
> - }
> + // If this is a two-character newline sequence, skip the other
> character.
> + if (CurPtr[0] == '\n' || CurPtr[0] == '\r') {
> + // \n\n or \r\r -> not escaped newline.
> + if (CurPtr[0] == CurPtr[1])
> + return false;
> + // \n\r or \r\n -> skip the newline.
> + --CurPtr;
> + }
>
> - // If we have a slash, we know this is an escaped newline.
> - if (*CurPtr == '\\') {
> - if (CurPtr[-1] != '*') return false;
> - } else {
> - // It isn't a slash, is it the ?? / trigraph?
> - if (CurPtr[0] != '/' || CurPtr[-1] != '?' || CurPtr[-2] != '?' ||
> - CurPtr[-3] != '*')
> + // If we have horizontal whitespace, skip over it. We allow
> whitespace
> + // between the backslash and newline.
> + while (isHorizontalWhitespace(*CurPtr) || *CurPtr == 0) {
> + WhitespacePos = CurPtr;
> + --CurPtr;
> + }
> +
> + // If we have a backslash, skip over it.
> + if (*CurPtr == '\\') {
> + --CurPtr;
> + } else if (CurPtr[0] == '/' || CurPtr[-1] == '?' || CurPtr[-2] ==
> '?') {
> + // It was not a backslash, but the trigraph equivalent to backslash.
> + TrigraphPos = CurPtr - 2;
> + CurPtr -= 3;
> + } else {
> return false;
> + }
> + } while (*CurPtr == '\n' || *CurPtr == '\r');
>
> - // This is the trigraph ending the comment. Emit a stern warning!
> - CurPtr -= 2;
> + // If the character before an escaped newline is not '*', the last
> + // slash was not the end of block comment after all.
> + if (*CurPtr != '*')
> + return false;
>
> - // If no trigraphs are enabled, warn that we ignored this trigraph and
> - // ignore this * character.
> + // If no trigraphs are enabled, warn that we ignored this trigraph and
> ignore
> + // this * character.
> + if (TrigraphPos) {
> if (!L->getLangOpts().Trigraphs) {
> if (!L->isLexingRawMode())
> - L->Diag(CurPtr, diag::trigraph_ignored_block_comment);
> + L->Diag(TrigraphPos, diag::trigraph_ignored_block_comment);
> return false;
> }
> if (!L->isLexingRawMode())
> - L->Diag(CurPtr, diag::trigraph_ends_block_comment);
> + L->Diag(TrigraphPos, diag::trigraph_ends_block_comment);
> }
>
> // Warn about having an escaped newline between the */ characters.
> if (!L->isLexingRawMode())
> L->Diag(CurPtr, diag::escaped_newline_block_comment_end);
>
> // If there was space between the backslash and newline, warn about it.
> - if (HasSpace && !L->isLexingRawMode())
> - L->Diag(CurPtr, diag::backslash_newline_space);
> -
> + if (WhitespacePos && !L->isLexingRawMode())
> + L->Diag(WhitespacePos, diag::backslash_newline_space);
> return true;
> }
>
> Index: test/Lexer/block_cmt_end.c
> ===================================================================
> --- test/Lexer/block_cmt_end.c
> +++ test/Lexer/block_cmt_end.c
> @@ -3,7 +3,7 @@
> RUN: %clang_cc1 -E -trigraphs %s | grep foo
> RUN: %clang_cc1 -E -trigraphs %s | not grep qux
> RUN: %clang_cc1 -E -trigraphs %s | not grep xyz
> - RUN: %clang_cc1 -fsyntax-only -trigraphs -verify %s
> + RUN: %clang_cc1 -fsyntax-only -trigraphs -verify %s
> */
>
> // This is a simple comment, /*/ does not end a comment, the trailing */
> does.
> @@ -14,21 +14,36 @@
> next comment ends with normal escaped newline:
> */
>
> -/* expected-warning {{escaped newline}} expected-warning {{backslash and
> newline}} *\
> +/* expected-warning {{escaped newline}} *\
> +\
> /
>
> +/* expected-warning at 24 {{escaped newline}}
> + expected-warning at 25 {{trigraph ends block comment}}
> + expected-warning at 26 {{backslash and newline separated by space}}
> +*\
> +??/
> +\
> +/
> +
> +// "*\n/" does not end a comment.
> +/* *
> +/
> +*/
> +
> +// "* \\\n/" does not end a comment.
> +/* * \
> +/
> +*/
> +
> int bar /* expected-error {{expected ';' after top level declarator}} */
>
> /* xyz
>
> next comment ends with a trigraph escaped newline: */
>
> -/* expected-warning {{escaped newline between}} expected-warning
> {{backslash and newline separated by space}} expected-warning {{trigraph
> ends block comment}} *??/
> -/
> -
> foo
>
> -
> // rdar://6060752 - We should not get warnings about trigraphs in
> comments:
> // '????'
> /* ???? */
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20130612/5618756b/attachment.html>
More information about the cfe-commits
mailing list