[PATCH] Fix block comment parser

Rui Ueyama ruiu at google.com
Wed Jun 12 12:45:05 PDT 2013


ruiu added you to the CC list for the revision "Fix block comment parser".

Fixes a bug in block comment parser. Clang is now able to parse the following block comment.

/* *\
\
/

Before this patch, Clang could skip only one escaped newline between * and /.

http://llvm-reviews.chandlerc.com/D777

Files:
  lib/Lex/Lexer.cpp
  test/Lexer/block_cmt_end.c

Index: lib/Lex/Lexer.cpp
===================================================================
--- lib/Lex/Lexer.cpp
+++ lib/Lex/Lexer.cpp
@@ -2076,58 +2076,66 @@
 static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr,
                                                   Lexer *L) {
   assert(CurPtr[0] == '\n' || CurPtr[0] == '\r');
+  const char *WhitespacePos = 0;
+  const char *TrigraphPos = 0;
 
-  // Back up off the newline.
-  --CurPtr;
-
-  // If this is a two-character newline sequence, skip the other character.
-  if (CurPtr[0] == '\n' || CurPtr[0] == '\r') {
-    // \n\n or \r\r -> not escaped newline.
-    if (CurPtr[0] == CurPtr[1])
-      return false;
-    // \n\r or \r\n -> skip the newline.
+  // Skip escaped newlines, or return if it's not an escaped newline sequence.
+  do {
+    // Back up off the newline.
     --CurPtr;
-  }
 
-  // If we have horizontal whitespace, skip over it.  We allow whitespace
-  // between the slash and newline.
-  bool HasSpace = false;
-  while (isHorizontalWhitespace(*CurPtr) || *CurPtr == 0) {
-    --CurPtr;
-    HasSpace = true;
-  }
+    // If this is a two-character newline sequence, skip the other character.
+    if (CurPtr[0] == '\n' || CurPtr[0] == '\r') {
+      // \n\n or \r\r -> not escaped newline.
+      if (CurPtr[0] == CurPtr[1])
+        return false;
+      // \n\r or \r\n -> skip the newline.
+      --CurPtr;
+    }
 
-  // If we have a slash, we know this is an escaped newline.
-  if (*CurPtr == '\\') {
-    if (CurPtr[-1] != '*') return false;
-  } else {
-    // It isn't a slash, is it the ?? / trigraph?
-    if (CurPtr[0] != '/' || CurPtr[-1] != '?' || CurPtr[-2] != '?' ||
-        CurPtr[-3] != '*')
+    // If we have horizontal whitespace, skip over it.  We allow whitespace
+    // between the backslash and newline.
+    while (isHorizontalWhitespace(*CurPtr) || *CurPtr == 0) {
+      WhitespacePos = CurPtr;
+      --CurPtr;
+    }
+
+    // If we have a backslash, skip over it.
+    if (*CurPtr == '\\') {
+      --CurPtr;
+    } else if (CurPtr[0] == '/' || CurPtr[-1] == '?' || CurPtr[-2] == '?') {
+      // It was not a backslash, but the trigraph equivalent to backslash.
+      TrigraphPos = CurPtr - 2;
+      CurPtr -= 3;
+    } else {
       return false;
+    }
+  } while (*CurPtr == '\n' || *CurPtr == '\r');
 
-    // This is the trigraph ending the comment.  Emit a stern warning!
-    CurPtr -= 2;
+  // If the character before an escaped newline is not '*', the last
+  // slash was not the end of block comment after all.
+  if (*CurPtr != '*')
+    return false;
 
-    // If no trigraphs are enabled, warn that we ignored this trigraph and
-    // ignore this * character.
+  // If no trigraphs are enabled, warn that we ignored this trigraph and ignore
+  // this * character.
+  if (TrigraphPos) {
     if (!L->getLangOpts().Trigraphs) {
       if (!L->isLexingRawMode())
-        L->Diag(CurPtr, diag::trigraph_ignored_block_comment);
+        L->Diag(TrigraphPos, diag::trigraph_ignored_block_comment);
       return false;
     }
     if (!L->isLexingRawMode())
-      L->Diag(CurPtr, diag::trigraph_ends_block_comment);
+      L->Diag(TrigraphPos, diag::trigraph_ends_block_comment);
   }
 
   // Warn about having an escaped newline between the */ characters.
   if (!L->isLexingRawMode())
     L->Diag(CurPtr, diag::escaped_newline_block_comment_end);
 
   // If there was space between the backslash and newline, warn about it.
-  if (HasSpace && !L->isLexingRawMode())
-    L->Diag(CurPtr, diag::backslash_newline_space);
-
+  if (WhitespacePos && !L->isLexingRawMode())
+    L->Diag(WhitespacePos, diag::backslash_newline_space);
   return true;
 }
 
Index: test/Lexer/block_cmt_end.c
===================================================================
--- test/Lexer/block_cmt_end.c
+++ test/Lexer/block_cmt_end.c
@@ -3,7 +3,7 @@
   RUN: %clang_cc1 -E -trigraphs %s | grep foo
   RUN: %clang_cc1 -E -trigraphs %s | not grep qux
   RUN: %clang_cc1 -E -trigraphs %s | not grep xyz
-  RUN: %clang_cc1 -fsyntax-only -trigraphs -verify %s  
+  RUN: %clang_cc1 -fsyntax-only -trigraphs -verify %s
 */
 
 // This is a simple comment, /*/ does not end a comment, the trailing */ does.
@@ -14,21 +14,36 @@
 next comment ends with normal escaped newline:
 */
 
-/* expected-warning {{escaped newline}} expected-warning {{backslash and newline}}  *\  
+/* expected-warning {{escaped newline}} *\
+\
 /
 
+/* expected-warning at 24 {{escaped newline}}
+   expected-warning at 25 {{trigraph ends block comment}}
+   expected-warning at 26 {{backslash and newline separated by space}}
+*\
+??/
+\  
+/
+
+// "*\n/" does not end a comment.
+/* *
+/
+*/
+
+// "* \\\n/" does not end a comment.
+/* * \
+/
+*/
+
 int bar /* expected-error {{expected ';' after top level declarator}} */
 
 /* xyz
 
 next comment ends with a trigraph escaped newline: */
 
-/* expected-warning {{escaped newline between}}   expected-warning {{backslash and newline separated by space}}    expected-warning {{trigraph ends block comment}}   *??/    
-/
-
 foo
 
-
 // rdar://6060752 - We should not get warnings about trigraphs in comments:
 // '????'
 /* ???? */
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D777.2.patch
Type: text/x-patch
Size: 5166 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20130612/08df2219/attachment.bin>


More information about the cfe-commits mailing list