<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">On 10 August 2017 at 03:06, Alexander Kornienko via cfe-commits <span dir="ltr"><<a href="mailto:cfe-commits@lists.llvm.org" target="_blank">cfe-commits@lists.llvm.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: alexfh<br>
Date: Thu Aug 10 03:06:16 2017<br>
New Revision: 310576<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=310576&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project?rev=310576&view=rev</a><br>
Log:<br>
[Lexer] Finding beginning of token with escaped new line<br>
<br>
Summary:<br>
Lexer::GetBeginningOfToken produced invalid location when<br>
backtracking across escaped new lines.<br>
<br>
This fixes PR26228<br>
<br>
Reviewers: akyrtzi, alexfh, rsmith, doug.gregor<br>
<br>
Reviewed By: alexfh<br>
<br>
Subscribers: alexfh, cfe-commits<br>
<br>
Patch by Paweł Żukowski!<br>
<br>
Differential Revision: <a href="https://reviews.llvm.org/D30748" rel="noreferrer" target="_blank">https://reviews.llvm.org/<wbr>D30748</a><br>
<br>
Modified:<br>
cfe/trunk/include/clang/Lex/<wbr>Lexer.h<br>
cfe/trunk/lib/Lex/Lexer.cpp<br>
cfe/trunk/unittests/Lex/<wbr>LexerTest.cpp<br>
<br>
Modified: cfe/trunk/include/clang/Lex/<wbr>Lexer.h<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Lex/Lexer.h?rev=310576&r1=310575&r2=310576&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/cfe/trunk/include/<wbr>clang/Lex/Lexer.h?rev=310576&<wbr>r1=310575&r2=310576&view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- cfe/trunk/include/clang/Lex/<wbr>Lexer.h (original)<br>
+++ cfe/trunk/include/clang/Lex/<wbr>Lexer.h Thu Aug 10 03:06:16 2017<br>
@@ -463,6 +463,10 @@ public:<br>
/// \brief Returns true if the given character could appear in an identifier.<br>
static bool isIdentifierBodyChar(char c, const LangOptions &LangOpts);<br>
<br>
+ /// \brief Checks whether new line pointed by Str is preceded by escape<br>
+ /// sequence.<br>
+ static bool isNewLineEscaped(const char *BufferStart, const char *Str);<br>
+<br>
/// getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever<br>
/// emit a warning.<br>
static inline char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size,<br>
<br>
Modified: cfe/trunk/lib/Lex/Lexer.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/Lexer.cpp?rev=310576&r1=310575&r2=310576&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/cfe/trunk/lib/Lex/<wbr>Lexer.cpp?rev=310576&r1=<wbr>310575&r2=310576&view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- cfe/trunk/lib/Lex/Lexer.cpp (original)<br>
+++ cfe/trunk/lib/Lex/Lexer.cpp Thu Aug 10 03:06:16 2017<br>
@@ -463,19 +463,15 @@ static const char *findBeginningOfLine(S<br>
const char *BufStart = Buffer.data();<br>
if (Offset >= Buffer.size())<br>
return nullptr;<br>
- const char *StrData = BufStart + Offset;<br>
<br>
- if (StrData[0] == '\n' || StrData[0] == '\r')<br>
- return StrData;<br>
-<br>
- const char *LexStart = StrData;<br>
- while (LexStart != BufStart) {<br>
- if (LexStart[0] == '\n' || LexStart[0] == '\r') {<br>
+ const char *LexStart = BufStart + Offset;<br>
+ for (; LexStart != BufStart; --LexStart) {<br>
+ if (isVerticalWhitespace(<wbr>LexStart[0]) &&<br>
+ !Lexer::isNewLineEscaped(<wbr>BufStart, LexStart)) {<br>
+ // LexStart should point at first character of logical line.<br>
++LexStart;<br>
break;<br>
}<br>
-<br>
- --LexStart;<br>
}<br>
return LexStart;<br>
}<br>
@@ -487,7 +483,7 @@ static SourceLocation getBeginningOfFile<br>
std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc);<br>
if (LocInfo.first.isInvalid())<br>
return Loc;<br>
-<br>
+<br>
bool Invalid = false;<br>
StringRef Buffer = SM.getBufferData(LocInfo.<wbr>first, &Invalid);<br>
if (Invalid)<br>
@@ -499,31 +495,31 @@ static SourceLocation getBeginningOfFile<br>
const char *LexStart = findBeginningOfLine(Buffer, LocInfo.second);<br>
if (!LexStart || LexStart == StrData)<br>
return Loc;<br>
-<br>
+<br>
// Create a lexer starting at the beginning of this token.<br>
SourceLocation LexerStartLoc = Loc.getLocWithOffset(-LocInfo.<wbr>second);<br>
Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart,<br>
Buffer.end());<br>
TheLexer.<wbr>SetCommentRetentionState(true)<wbr>;<br>
-<br>
+<br>
// Lex tokens until we find the token that contains the source location.<br>
Token TheTok;<br>
do {<br>
TheLexer.LexFromRawLexer(<wbr>TheTok);<br>
-<br>
+<br>
if (TheLexer.getBufferLocation() > StrData) {<br>
// Lexing this token has taken the lexer past the source location we're<br>
// looking for. If the current token encompasses our source location,<br>
// return the beginning of that token.<br>
if (TheLexer.getBufferLocation() - TheTok.getLength() <= StrData)<br>
return TheTok.getLocation();<br>
-<br>
+<br>
// We ended up skipping over the source location entirely, which means<br>
// that it points into whitespace. We're done here.<br>
break;<br>
}<br>
} while (TheTok.getKind() != tok::eof);<br>
-<br>
+<br>
// We've passed our source location; just return the original source location.<br>
return Loc;<br>
}<br>
@@ -531,20 +527,20 @@ static SourceLocation getBeginningOfFile<br>
SourceLocation Lexer::GetBeginningOfToken(<wbr>SourceLocation Loc,<br>
const SourceManager &SM,<br>
const LangOptions &LangOpts) {<br>
- if (Loc.isFileID())<br>
- return getBeginningOfFileToken(Loc, SM, LangOpts);<br>
-<br>
- if (!SM.isMacroArgExpansion(Loc))<br>
- return Loc;<br>
-<br>
- SourceLocation FileLoc = SM.getSpellingLoc(Loc);<br>
- SourceLocation BeginFileLoc = getBeginningOfFileToken(<wbr>FileLoc, SM, LangOpts);<br>
- std::pair<FileID, unsigned> FileLocInfo = SM.getDecomposedLoc(FileLoc);<br>
- std::pair<FileID, unsigned> BeginFileLocInfo<br>
- = SM.getDecomposedLoc(<wbr>BeginFileLoc);<br>
- assert(FileLocInfo.first == BeginFileLocInfo.first &&<br>
- FileLocInfo.second >= BeginFileLocInfo.second);<br>
- return Loc.getLocWithOffset(<wbr>BeginFileLocInfo.second - FileLocInfo.second);<br>
+ if (Loc.isFileID())<br>
+ return getBeginningOfFileToken(Loc, SM, LangOpts);<br>
+<br>
+ if (!SM.isMacroArgExpansion(Loc))<br>
+ return Loc;<br>
+<br>
+ SourceLocation FileLoc = SM.getSpellingLoc(Loc);<br>
+ SourceLocation BeginFileLoc = getBeginningOfFileToken(<wbr>FileLoc, SM, LangOpts);<br>
+ std::pair<FileID, unsigned> FileLocInfo = SM.getDecomposedLoc(FileLoc);<br>
+ std::pair<FileID, unsigned> BeginFileLocInfo =<br>
+ SM.getDecomposedLoc(<wbr>BeginFileLoc);<br>
+ assert(FileLocInfo.first == BeginFileLocInfo.first &&<br>
+ FileLocInfo.second >= BeginFileLocInfo.second);<br>
+ return Loc.getLocWithOffset(<wbr>BeginFileLocInfo.second - FileLocInfo.second);<br>
}<br>
<br>
namespace {<br>
@@ -1032,6 +1028,26 @@ bool Lexer::isIdentifierBodyChar(<wbr>char c,<br>
return isIdentifierBody(c, LangOpts.DollarIdents);<br>
}<br>
<br>
+bool Lexer::isNewLineEscaped(const char *BufferStart, const char *Str) {<br>
+ assert(isVerticalWhitespace(<wbr>Str[0]));<br>
+ if (Str - 1 < BufferStart)<br>
+ return false;<br>
+<br>
+ if ((Str[0] == '\n' && Str[-1] == '\r') ||<br>
+ (Str[0] == '\r' && Str[-1] == '\n')) {<br>
+ if (Str - 2 < BufferStart)<br>
+ return false;<br>
+ --Str;<br>
+ }<br>
+ --Str;<br>
+<br>
+ // Rewind to first non-space character:<br>
+ while (Str > BufferStart && isHorizontalWhitespace(*Str))<br>
+ --Str;<br>
+<br>
+ return *Str == '\\';<br></blockquote><div><br></div><div>When trigraphs are enabled, "??/" can also be used to escape a newline.</div><div><br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+}<br>
+<br>
StringRef Lexer::getIndentationForLine(<wbr>SourceLocation Loc,<br>
const SourceManager &SM) {<br>
if (Loc.isInvalid() || Loc.isMacroID())<br>
<br>
Modified: cfe/trunk/unittests/Lex/<wbr>LexerTest.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Lex/LexerTest.cpp?rev=310576&r1=310575&r2=310576&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/cfe/trunk/unittests/<wbr>Lex/LexerTest.cpp?rev=310576&<wbr>r1=310575&r2=310576&view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- cfe/trunk/unittests/Lex/<wbr>LexerTest.cpp (original)<br>
+++ cfe/trunk/unittests/Lex/<wbr>LexerTest.cpp Thu Aug 10 03:06:16 2017<br>
@@ -420,4 +420,57 @@ TEST_F(LexerTest, DontOverallocateString<br>
#endif<br>
}<br>
<br>
+TEST_F(LexerTest, IsNewLineEscapedValid) {<br>
+ auto hasNewLineEscaped = [](const char *S) {<br>
+ return Lexer::isNewLineEscaped(S, S + strlen(S) - 1);<br>
+ };<br>
+<br>
+ EXPECT_TRUE(hasNewLineEscaped(<wbr>"\\\r"));<br>
+ EXPECT_TRUE(hasNewLineEscaped(<wbr>"\\\n"));<br>
+ EXPECT_TRUE(hasNewLineEscaped(<wbr>"\\\r\n"));<br>
+ EXPECT_TRUE(hasNewLineEscaped(<wbr>"\\\n\r"));<br>
+ EXPECT_TRUE(hasNewLineEscaped(<wbr>"\\ \t\v\f\r"));<br>
+ EXPECT_TRUE(hasNewLineEscaped(<wbr>"\\ \t\v\f\r\n"));<br>
+<br>
+ EXPECT_FALSE(<wbr>hasNewLineEscaped("\\\r\r"));<br>
+ EXPECT_FALSE(<wbr>hasNewLineEscaped("\\\r\r\n"))<wbr>;<br>
+ EXPECT_FALSE(<wbr>hasNewLineEscaped("\\\n\n"));<br>
+ EXPECT_FALSE(<wbr>hasNewLineEscaped("\r"));<br>
+ EXPECT_FALSE(<wbr>hasNewLineEscaped("\n"));<br>
+ EXPECT_FALSE(<wbr>hasNewLineEscaped("\r\n"));<br>
+ EXPECT_FALSE(<wbr>hasNewLineEscaped("\n\r"));<br>
+ EXPECT_FALSE(<wbr>hasNewLineEscaped("\r\r"));<br>
+ EXPECT_FALSE(<wbr>hasNewLineEscaped("\n\n"));<br>
+}<br>
+<br>
+TEST_F(LexerTest, GetBeginningOfTokenWithEscaped<wbr>NewLine) {<br>
+ // Each line should have the same length for<br>
+ // further offset calculation to be more straightforward.<br>
+ const unsigned IdentifierLength = 8;<br>
+ std::string TextToLex = "rabarbar\n"<br>
+ "foo\\\nbar\n"<br>
+ "foo\\\rbar\n"<br>
+ "fo\\\r\nbar\n"<br>
+ "foo\\\n\rba\n";<br>
+ std::vector<tok::TokenKind> ExpectedTokens{5, tok::identifier};<br>
+ std::vector<Token> LexedTokens = CheckLex(TextToLex, ExpectedTokens);<br>
+<br>
+ for (const Token &Tok : LexedTokens) {<br>
+ std::pair<FileID, unsigned> OriginalLocation =<br>
+ SourceMgr.getDecomposedLoc(<wbr>Tok.getLocation());<br>
+ for (unsigned Offset = 0; Offset < IdentifierLength; ++Offset) {<br>
+ SourceLocation LookupLocation =<br>
+ Tok.getLocation().<wbr>getLocWithOffset(Offset);<br>
+<br>
+ std::pair<FileID, unsigned> FoundLocation =<br>
+ SourceMgr.<wbr>getDecomposedExpansionLoc(<br>
+ Lexer::GetBeginningOfToken(<wbr>LookupLocation, SourceMgr, LangOpts));<br>
+<br>
+ // Check that location returned by the GetBeginningOfToken<br>
+ // is the same as original token location reported by Lexer.<br>
+ EXPECT_EQ(FoundLocation.<wbr>second, OriginalLocation.second);<br>
+ }<br>
+ }<br>
+}<br>
+<br>
} // anonymous namespace<br>
<br>
<br>
______________________________<wbr>_________________<br>
cfe-commits mailing list<br>
<a href="mailto:cfe-commits@lists.llvm.org">cfe-commits@lists.llvm.org</a><br>
<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/<wbr>mailman/listinfo/cfe-commits</a><br>
</blockquote></div><br></div></div>