[clang] de6164e - PR50456: Properly handle multiple escaped newlines in a '*/'.

Richard Smith via cfe-commits cfe-commits at lists.llvm.org
Mon May 24 16:21:13 PDT 2021


Author: Richard Smith
Date: 2021-05-24T16:21:03-07:00
New Revision: de6164ec4da0cfea1b0d0e472c432ea1be4d9c29

URL: https://github.com/llvm/llvm-project/commit/de6164ec4da0cfea1b0d0e472c432ea1be4d9c29
DIFF: https://github.com/llvm/llvm-project/commit/de6164ec4da0cfea1b0d0e472c432ea1be4d9c29.diff

LOG: PR50456: Properly handle multiple escaped newlines in a '*/'.

Added: 
    

Modified: 
    clang/lib/Lex/Lexer.cpp
    clang/test/Lexer/block_cmt_end.c

Removed: 
    


################################################################################
diff  --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 75c0fb65f5b1..d31987a432b8 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -2443,56 +2443,70 @@ static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr,
                                                   Lexer *L) {
   assert(CurPtr[0] == '\n' || CurPtr[0] == '\r');
 
-  // Back up off the newline.
-  --CurPtr;
+  // Position of the first trigraph in the ending sequence.
+  const char *TrigraphPos = 0;
+  // Position of the first whitespace after a '\' in the ending sequence.
+  const char *SpacePos = 0;
 
-  // If this is a two-character newline sequence, skip the other character.
-  if (CurPtr[0] == '\n' || CurPtr[0] == '\r') {
-    // \n\n or \r\r -> not escaped newline.
-    if (CurPtr[0] == CurPtr[1])
-      return false;
-    // \n\r or \r\n -> skip the newline.
+  while (true) {
+    // Back up off the newline.
     --CurPtr;
-  }
 
-  // If we have horizontal whitespace, skip over it.  We allow whitespace
-  // between the slash and newline.
-  bool HasSpace = false;
-  while (isHorizontalWhitespace(*CurPtr) || *CurPtr == 0) {
-    --CurPtr;
-    HasSpace = true;
-  }
+    // If this is a two-character newline sequence, skip the other character.
+    if (CurPtr[0] == '\n' || CurPtr[0] == '\r') {
+      // \n\n or \r\r -> not escaped newline.
+      if (CurPtr[0] == CurPtr[1])
+        return false;
+      // \n\r or \r\n -> skip the newline.
+      --CurPtr;
+    }
 
-  // If we have a slash, we know this is an escaped newline.
-  if (*CurPtr == '\\') {
-    if (CurPtr[-1] != '*') return false;
-  } else {
-    // It isn't a slash, is it the ?? / trigraph?
-    if (CurPtr[0] != '/' || CurPtr[-1] != '?' || CurPtr[-2] != '?' ||
-        CurPtr[-3] != '*')
+    // If we have horizontal whitespace, skip over it.  We allow whitespace
+    // between the slash and newline.
+    while (isHorizontalWhitespace(*CurPtr) || *CurPtr == 0) {
+      SpacePos = CurPtr;
+      --CurPtr;
+    }
+
+    // If we have a slash, this is an escaped newline.
+    if (*CurPtr == '\\') {
+      --CurPtr;
+    } else if (CurPtr[0] == '/' && CurPtr[-1] == '?' && CurPtr[-2] == '?') {
+      // This is a trigraph encoding of a slash.
+      TrigraphPos = CurPtr - 2;
+      CurPtr -= 3;
+    } else {
       return false;
+    }
 
-    // This is the trigraph ending the comment.  Emit a stern warning!
-    CurPtr -= 2;
+    // If the character preceding the escaped newline is a '*', then after line
+    // splicing we have a '*/' ending the comment.
+    if (*CurPtr == '*')
+      break;
+
+    if (*CurPtr != '\n' && *CurPtr != '\r')
+      return false;
+  }
 
+  if (TrigraphPos) {
     // If no trigraphs are enabled, warn that we ignored this trigraph and
     // ignore this * character.
     if (!L->getLangOpts().Trigraphs) {
       if (!L->isLexingRawMode())
-        L->Diag(CurPtr, diag::trigraph_ignored_block_comment);
+        L->Diag(TrigraphPos, diag::trigraph_ignored_block_comment);
       return false;
     }
     if (!L->isLexingRawMode())
-      L->Diag(CurPtr, diag::trigraph_ends_block_comment);
+      L->Diag(TrigraphPos, diag::trigraph_ends_block_comment);
   }
 
   // Warn about having an escaped newline between the */ characters.
   if (!L->isLexingRawMode())
-    L->Diag(CurPtr, diag::escaped_newline_block_comment_end);
+    L->Diag(CurPtr + 1, diag::escaped_newline_block_comment_end);
 
   // If there was space between the backslash and newline, warn about it.
-  if (HasSpace && !L->isLexingRawMode())
-    L->Diag(CurPtr, diag::backslash_newline_space);
+  if (SpacePos && !L->isLexingRawMode())
+    L->Diag(SpacePos, diag::backslash_newline_space);
 
   return true;
 }

diff  --git a/clang/test/Lexer/block_cmt_end.c b/clang/test/Lexer/block_cmt_end.c
index 1d00137644c3..7d24817042f4 100644
--- a/clang/test/Lexer/block_cmt_end.c
+++ b/clang/test/Lexer/block_cmt_end.c
@@ -32,3 +32,14 @@ foo
 // rdar://6060752 - We should not get warnings about trigraphs in comments:
 // '????'
 /* ???? */
+
+// PR50456: multiple escaped newlines in one */.
+/*
+ *\
+??/
+??/  
+\  
+/
+// expected-warning at -5 {{escaped newline}}
+// expected-warning at -4 {{separated by space}}
+// expected-warning at -6 {{trigraph ends block comment}}


        


More information about the cfe-commits mailing list