[clang] de6164e - PR50456: Properly handle multiple escaped newlines in a '*/'.
Richard Smith via cfe-commits
cfe-commits at lists.llvm.org
Mon May 24 16:21:13 PDT 2021
Author: Richard Smith
Date: 2021-05-24T16:21:03-07:00
New Revision: de6164ec4da0cfea1b0d0e472c432ea1be4d9c29
URL: https://github.com/llvm/llvm-project/commit/de6164ec4da0cfea1b0d0e472c432ea1be4d9c29
DIFF: https://github.com/llvm/llvm-project/commit/de6164ec4da0cfea1b0d0e472c432ea1be4d9c29.diff
LOG: PR50456: Properly handle multiple escaped newlines in a '*/'.
Added:
Modified:
clang/lib/Lex/Lexer.cpp
clang/test/Lexer/block_cmt_end.c
Removed:
################################################################################
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 75c0fb65f5b1..d31987a432b8 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -2443,56 +2443,70 @@ static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr,
Lexer *L) {
assert(CurPtr[0] == '\n' || CurPtr[0] == '\r');
- // Back up off the newline.
- --CurPtr;
+ // Position of the first trigraph in the ending sequence.
+ const char *TrigraphPos = 0;
+ // Position of the first whitespace after a '\' in the ending sequence.
+ const char *SpacePos = 0;
- // If this is a two-character newline sequence, skip the other character.
- if (CurPtr[0] == '\n' || CurPtr[0] == '\r') {
- // \n\n or \r\r -> not escaped newline.
- if (CurPtr[0] == CurPtr[1])
- return false;
- // \n\r or \r\n -> skip the newline.
+ while (true) {
+ // Back up off the newline.
--CurPtr;
- }
- // If we have horizontal whitespace, skip over it. We allow whitespace
- // between the slash and newline.
- bool HasSpace = false;
- while (isHorizontalWhitespace(*CurPtr) || *CurPtr == 0) {
- --CurPtr;
- HasSpace = true;
- }
+ // If this is a two-character newline sequence, skip the other character.
+ if (CurPtr[0] == '\n' || CurPtr[0] == '\r') {
+ // \n\n or \r\r -> not escaped newline.
+ if (CurPtr[0] == CurPtr[1])
+ return false;
+ // \n\r or \r\n -> skip the newline.
+ --CurPtr;
+ }
- // If we have a slash, we know this is an escaped newline.
- if (*CurPtr == '\\') {
- if (CurPtr[-1] != '*') return false;
- } else {
- // It isn't a slash, is it the ?? / trigraph?
- if (CurPtr[0] != '/' || CurPtr[-1] != '?' || CurPtr[-2] != '?' ||
- CurPtr[-3] != '*')
+ // If we have horizontal whitespace, skip over it. We allow whitespace
+ // between the slash and newline.
+ while (isHorizontalWhitespace(*CurPtr) || *CurPtr == 0) {
+ SpacePos = CurPtr;
+ --CurPtr;
+ }
+
+ // If we have a slash, this is an escaped newline.
+ if (*CurPtr == '\\') {
+ --CurPtr;
+ } else if (CurPtr[0] == '/' && CurPtr[-1] == '?' && CurPtr[-2] == '?') {
+ // This is a trigraph encoding of a slash.
+ TrigraphPos = CurPtr - 2;
+ CurPtr -= 3;
+ } else {
return false;
+ }
- // This is the trigraph ending the comment. Emit a stern warning!
- CurPtr -= 2;
+ // If the character preceding the escaped newline is a '*', then after line
+ // splicing we have a '*/' ending the comment.
+ if (*CurPtr == '*')
+ break;
+
+ if (*CurPtr != '\n' && *CurPtr != '\r')
+ return false;
+ }
+ if (TrigraphPos) {
// If no trigraphs are enabled, warn that we ignored this trigraph and
// ignore this * character.
if (!L->getLangOpts().Trigraphs) {
if (!L->isLexingRawMode())
- L->Diag(CurPtr, diag::trigraph_ignored_block_comment);
+ L->Diag(TrigraphPos, diag::trigraph_ignored_block_comment);
return false;
}
if (!L->isLexingRawMode())
- L->Diag(CurPtr, diag::trigraph_ends_block_comment);
+ L->Diag(TrigraphPos, diag::trigraph_ends_block_comment);
}
// Warn about having an escaped newline between the */ characters.
if (!L->isLexingRawMode())
- L->Diag(CurPtr, diag::escaped_newline_block_comment_end);
+ L->Diag(CurPtr + 1, diag::escaped_newline_block_comment_end);
// If there was space between the backslash and newline, warn about it.
- if (HasSpace && !L->isLexingRawMode())
- L->Diag(CurPtr, diag::backslash_newline_space);
+ if (SpacePos && !L->isLexingRawMode())
+ L->Diag(SpacePos, diag::backslash_newline_space);
return true;
}
diff --git a/clang/test/Lexer/block_cmt_end.c b/clang/test/Lexer/block_cmt_end.c
index 1d00137644c3..7d24817042f4 100644
--- a/clang/test/Lexer/block_cmt_end.c
+++ b/clang/test/Lexer/block_cmt_end.c
@@ -32,3 +32,14 @@ foo
// rdar://6060752 - We should not get warnings about trigraphs in comments:
// '????'
/* ???? */
+
+// PR50456: multiple escaped newlines in one */.
+/*
+ *\
+??/
+??/
+\
+/
+// expected-warning at -5 {{escaped newline}}
+// expected-warning at -4 {{separated by space}}
+// expected-warning at -6 {{trigraph ends block comment}}
More information about the cfe-commits
mailing list