<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">On 10 August 2017 at 03:06, Alexander Kornienko via cfe-commits <span dir="ltr"><<a href="mailto:cfe-commits@lists.llvm.org" target="_blank">cfe-commits@lists.llvm.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: alexfh<br>
Date: Thu Aug 10 03:06:16 2017<br>
New Revision: 310576<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=310576&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project?rev=310576&view=rev</a><br>
Log:<br>
[Lexer] Finding beginning of token with escaped new line<br>
<br>
Summary:<br>
Lexer::GetBeginningOfToken produced invalid location when<br>
backtracking across escaped new lines.<br>
<br>
This fixes PR26228<br>
<br>
Reviewers: akyrtzi, alexfh, rsmith, doug.gregor<br>
<br>
Reviewed By: alexfh<br>
<br>
Subscribers: alexfh, cfe-commits<br>
<br>
Patch by Paweł Żukowski!<br>
<br>
Differential Revision: <a href="https://reviews.llvm.org/D30748" rel="noreferrer" target="_blank">https://reviews.llvm.org/<wbr>D30748</a><br>
<br>
Modified:<br>
    cfe/trunk/include/clang/Lex/<wbr>Lexer.h<br>
    cfe/trunk/lib/Lex/Lexer.cpp<br>
    cfe/trunk/unittests/Lex/<wbr>LexerTest.cpp<br>
<br>
Modified: cfe/trunk/include/clang/Lex/<wbr>Lexer.h<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Lex/Lexer.h?rev=310576&r1=310575&r2=310576&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/cfe/trunk/include/<wbr>clang/Lex/Lexer.h?rev=310576&<wbr>r1=310575&r2=310576&view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- cfe/trunk/include/clang/Lex/<wbr>Lexer.h (original)<br>
+++ cfe/trunk/include/clang/Lex/<wbr>Lexer.h Thu Aug 10 03:06:16 2017<br>
@@ -463,6 +463,10 @@ public:<br>
   /// \brief Returns true if the given character could appear in an identifier.<br>
   static bool isIdentifierBodyChar(char c, const LangOptions &LangOpts);<br>
<br>
+  /// \brief Checks whether new line pointed by Str is preceded by escape<br>
+  /// sequence.<br>
+  static bool isNewLineEscaped(const char *BufferStart, const char *Str);<br>
+<br>
   /// getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever<br>
   /// emit a warning.<br>
   static inline char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size,<br>
<br>
Modified: cfe/trunk/lib/Lex/Lexer.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Lex/Lexer.cpp?rev=310576&r1=310575&r2=310576&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/cfe/trunk/lib/Lex/<wbr>Lexer.cpp?rev=310576&r1=<wbr>310575&r2=310576&view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- cfe/trunk/lib/Lex/Lexer.cpp (original)<br>
+++ cfe/trunk/lib/Lex/Lexer.cpp Thu Aug 10 03:06:16 2017<br>
@@ -463,19 +463,15 @@ static const char *findBeginningOfLine(S<br>
   const char *BufStart = Buffer.data();<br>
   if (Offset >= Buffer.size())<br>
     return nullptr;<br>
-  const char *StrData = BufStart + Offset;<br>
<br>
-  if (StrData[0] == '\n' || StrData[0] == '\r')<br>
-    return StrData;<br>
-<br>
-  const char *LexStart = StrData;<br>
-  while (LexStart != BufStart) {<br>
-    if (LexStart[0] == '\n' || LexStart[0] == '\r') {<br>
+  const char *LexStart = BufStart + Offset;<br>
+  for (; LexStart != BufStart; --LexStart) {<br>
+    if (isVerticalWhitespace(<wbr>LexStart[0]) &&<br>
+        !Lexer::isNewLineEscaped(<wbr>BufStart, LexStart)) {<br>
+      // LexStart should point at first character of logical line.<br>
       ++LexStart;<br>
       break;<br>
     }<br>
-<br>
-    --LexStart;<br>
   }<br>
   return LexStart;<br>
 }<br>
@@ -487,7 +483,7 @@ static SourceLocation getBeginningOfFile<br>
   std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc);<br>
   if (LocInfo.first.isInvalid())<br>
     return Loc;<br>
-<br>
+<br>
   bool Invalid = false;<br>
   StringRef Buffer = SM.getBufferData(LocInfo.<wbr>first, &Invalid);<br>
   if (Invalid)<br>
@@ -499,31 +495,31 @@ static SourceLocation getBeginningOfFile<br>
   const char *LexStart = findBeginningOfLine(Buffer, LocInfo.second);<br>
   if (!LexStart || LexStart == StrData)<br>
     return Loc;<br>
-<br>
+<br>
   // Create a lexer starting at the beginning of this token.<br>
   SourceLocation LexerStartLoc = Loc.getLocWithOffset(-LocInfo.<wbr>second);<br>
   Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart,<br>
                  Buffer.end());<br>
   TheLexer.<wbr>SetCommentRetentionState(true)<wbr>;<br>
-<br>
+<br>
   // Lex tokens until we find the token that contains the source location.<br>
   Token TheTok;<br>
   do {<br>
     TheLexer.LexFromRawLexer(<wbr>TheTok);<br>
-<br>
+<br>
     if (TheLexer.getBufferLocation() > StrData) {<br>
       // Lexing this token has taken the lexer past the source location we're<br>
       // looking for. If the current token encompasses our source location,<br>
       // return the beginning of that token.<br>
       if (TheLexer.getBufferLocation() - TheTok.getLength() <= StrData)<br>
         return TheTok.getLocation();<br>
-<br>
+<br>
       // We ended up skipping over the source location entirely, which means<br>
       // that it points into whitespace. We're done here.<br>
       break;<br>
     }<br>
   } while (TheTok.getKind() != tok::eof);<br>
-<br>
+<br>
   // We've passed our source location; just return the original source location.<br>
   return Loc;<br>
 }<br>
@@ -531,20 +527,20 @@ static SourceLocation getBeginningOfFile<br>
 SourceLocation Lexer::GetBeginningOfToken(<wbr>SourceLocation Loc,<br>
                                           const SourceManager &SM,<br>
                                           const LangOptions &LangOpts) {<br>
- if (Loc.isFileID())<br>
-   return getBeginningOfFileToken(Loc, SM, LangOpts);<br>
-<br>
- if (!SM.isMacroArgExpansion(Loc))<br>
-   return Loc;<br>
-<br>
- SourceLocation FileLoc = SM.getSpellingLoc(Loc);<br>
- SourceLocation BeginFileLoc = getBeginningOfFileToken(<wbr>FileLoc, SM, LangOpts);<br>
- std::pair<FileID, unsigned> FileLocInfo = SM.getDecomposedLoc(FileLoc);<br>
- std::pair<FileID, unsigned> BeginFileLocInfo<br>
-   = SM.getDecomposedLoc(<wbr>BeginFileLoc);<br>
- assert(FileLocInfo.first == BeginFileLocInfo.first &&<br>
-        FileLocInfo.second >= BeginFileLocInfo.second);<br>
- return Loc.getLocWithOffset(<wbr>BeginFileLocInfo.second - FileLocInfo.second);<br>
+  if (Loc.isFileID())<br>
+    return getBeginningOfFileToken(Loc, SM, LangOpts);<br>
+<br>
+  if (!SM.isMacroArgExpansion(Loc))<br>
+    return Loc;<br>
+<br>
+  SourceLocation FileLoc = SM.getSpellingLoc(Loc);<br>
+  SourceLocation BeginFileLoc = getBeginningOfFileToken(<wbr>FileLoc, SM, LangOpts);<br>
+  std::pair<FileID, unsigned> FileLocInfo = SM.getDecomposedLoc(FileLoc);<br>
+  std::pair<FileID, unsigned> BeginFileLocInfo =<br>
+      SM.getDecomposedLoc(<wbr>BeginFileLoc);<br>
+  assert(FileLocInfo.first == BeginFileLocInfo.first &&<br>
+         FileLocInfo.second >= BeginFileLocInfo.second);<br>
+  return Loc.getLocWithOffset(<wbr>BeginFileLocInfo.second - FileLocInfo.second);<br>
 }<br>
<br>
 namespace {<br>
@@ -1032,6 +1028,26 @@ bool Lexer::isIdentifierBodyChar(<wbr>char c,<br>
   return isIdentifierBody(c, LangOpts.DollarIdents);<br>
 }<br>
<br>
+bool Lexer::isNewLineEscaped(const char *BufferStart, const char *Str) {<br>
+  assert(isVerticalWhitespace(<wbr>Str[0]));<br>
+  if (Str - 1 < BufferStart)<br>
+    return false;<br>
+<br>
+  if ((Str[0] == '\n' && Str[-1] == '\r') ||<br>
+      (Str[0] == '\r' && Str[-1] == '\n')) {<br>
+    if (Str - 2 < BufferStart)<br>
+      return false;<br>
+    --Str;<br>
+  }<br>
+  --Str;<br>
+<br>
+  // Rewind to first non-space character:<br>
+  while (Str > BufferStart && isHorizontalWhitespace(*Str))<br>
+    --Str;<br>
+<br>
+  return *Str == '\\';<br></blockquote><div><br></div><div>When trigraphs are enabled, "??/" can also be used to escape a newline.</div><div><br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+}<br>
+<br>
 StringRef Lexer::getIndentationForLine(<wbr>SourceLocation Loc,<br>
                                        const SourceManager &SM) {<br>
   if (Loc.isInvalid() || Loc.isMacroID())<br>
<br>
Modified: cfe/trunk/unittests/Lex/<wbr>LexerTest.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Lex/LexerTest.cpp?rev=310576&r1=310575&r2=310576&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/cfe/trunk/unittests/<wbr>Lex/LexerTest.cpp?rev=310576&<wbr>r1=310575&r2=310576&view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- cfe/trunk/unittests/Lex/<wbr>LexerTest.cpp (original)<br>
+++ cfe/trunk/unittests/Lex/<wbr>LexerTest.cpp Thu Aug 10 03:06:16 2017<br>
@@ -420,4 +420,57 @@ TEST_F(LexerTest, DontOverallocateString<br>
 #endif<br>
 }<br>
<br>
+TEST_F(LexerTest, IsNewLineEscapedValid) {<br>
+  auto hasNewLineEscaped = [](const char *S) {<br>
+    return Lexer::isNewLineEscaped(S, S + strlen(S) - 1);<br>
+  };<br>
+<br>
+  EXPECT_TRUE(hasNewLineEscaped(<wbr>"\\\r"));<br>
+  EXPECT_TRUE(hasNewLineEscaped(<wbr>"\\\n"));<br>
+  EXPECT_TRUE(hasNewLineEscaped(<wbr>"\\\r\n"));<br>
+  EXPECT_TRUE(hasNewLineEscaped(<wbr>"\\\n\r"));<br>
+  EXPECT_TRUE(hasNewLineEscaped(<wbr>"\\ \t\v\f\r"));<br>
+  EXPECT_TRUE(hasNewLineEscaped(<wbr>"\\ \t\v\f\r\n"));<br>
+<br>
+  EXPECT_FALSE(<wbr>hasNewLineEscaped("\\\r\r"));<br>
+  EXPECT_FALSE(<wbr>hasNewLineEscaped("\\\r\r\n"))<wbr>;<br>
+  EXPECT_FALSE(<wbr>hasNewLineEscaped("\\\n\n"));<br>
+  EXPECT_FALSE(<wbr>hasNewLineEscaped("\r"));<br>
+  EXPECT_FALSE(<wbr>hasNewLineEscaped("\n"));<br>
+  EXPECT_FALSE(<wbr>hasNewLineEscaped("\r\n"));<br>
+  EXPECT_FALSE(<wbr>hasNewLineEscaped("\n\r"));<br>
+  EXPECT_FALSE(<wbr>hasNewLineEscaped("\r\r"));<br>
+  EXPECT_FALSE(<wbr>hasNewLineEscaped("\n\n"));<br>
+}<br>
+<br>
+TEST_F(LexerTest, GetBeginningOfTokenWithEscaped<wbr>NewLine) {<br>
+  // Each line should have the same length for<br>
+  // further offset calculation to be more straightforward.<br>
+  const unsigned IdentifierLength = 8;<br>
+  std::string TextToLex = "rabarbar\n"<br>
+                          "foo\\\nbar\n"<br>
+                          "foo\\\rbar\n"<br>
+                          "fo\\\r\nbar\n"<br>
+                          "foo\\\n\rba\n";<br>
+  std::vector<tok::TokenKind> ExpectedTokens{5, tok::identifier};<br>
+  std::vector<Token> LexedTokens = CheckLex(TextToLex, ExpectedTokens);<br>
+<br>
+  for (const Token &Tok : LexedTokens) {<br>
+    std::pair<FileID, unsigned> OriginalLocation =<br>
+        SourceMgr.getDecomposedLoc(<wbr>Tok.getLocation());<br>
+    for (unsigned Offset = 0; Offset < IdentifierLength; ++Offset) {<br>
+      SourceLocation LookupLocation =<br>
+          Tok.getLocation().<wbr>getLocWithOffset(Offset);<br>
+<br>
+      std::pair<FileID, unsigned> FoundLocation =<br>
+          SourceMgr.<wbr>getDecomposedExpansionLoc(<br>
+              Lexer::GetBeginningOfToken(<wbr>LookupLocation, SourceMgr, LangOpts));<br>
+<br>
+      // Check that location returned by the GetBeginningOfToken<br>
+      // is the same as original token location reported by Lexer.<br>
+      EXPECT_EQ(FoundLocation.<wbr>second, OriginalLocation.second);<br>
+    }<br>
+  }<br>
+}<br>
+<br>
 } // anonymous namespace<br>
<br>
<br>
______________________________<wbr>_________________<br>
cfe-commits mailing list<br>
<a href="mailto:cfe-commits@lists.llvm.org">cfe-commits@lists.llvm.org</a><br>
<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/<wbr>mailman/listinfo/cfe-commits</a><br>
</blockquote></div><br></div></div>