r183536 - Improved handling of escaped newlines at the token start.

Fri Jun 7 10:45:08 PDT 2013

Author: alexfh
Date: Fri Jun  7 12:45:07 2013
New Revision: 183536

URL: http://llvm.org/viewvc/llvm-project?rev=183536&view=rev
Log:
Improved handling of escaped newlines at the token start.

Summary: Remove them from the TokenText as well.

Reviewers: klimek

Reviewed By: klimek

CC: cfe-commits

Differential Revision: http://llvm-reviews.chandlerc.com/D935

Modified:
    cfe/trunk/lib/Format/Format.cpp
    cfe/trunk/lib/Format/FormatToken.h
    cfe/trunk/unittests/Format/FormatTest.cpp

Modified: cfe/trunk/lib/Format/Format.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=183536&r1=183535&r2=183536&view=diff
==============================================================================

--- cfe/trunk/lib/Format/Format.cpp (original)
+++ cfe/trunk/lib/Format/Format.cpp Fri Jun  7 12:45:07 2013
@@ -1131,7 +1131,7 @@ private:
           FormatTok->Tok.getLocation().getLocWithOffset(1);
       FormatTok->WhitespaceRange =
           SourceRange(GreaterLocation, GreaterLocation);
-      FormatTok->ByteCount = 1;
+      FormatTok->TokenText = ">";
       FormatTok->CodePointCount = 1;
       GreaterStashed = false;
       return FormatTok;
@@ -1151,65 +1151,48 @@ private:
       unsigned Newlines = Text.count('\n');
       if (Newlines > 0)
         FormatTok->LastNewlineOffset = WhitespaceLength + Text.rfind('\n') + 1;
-      unsigned EscapedNewlines = Text.count("\\\n");
       FormatTok->NewlinesBefore += Newlines;
+      unsigned EscapedNewlines = Text.count("\\\n");
       FormatTok->HasUnescapedNewline |= EscapedNewlines != Newlines;
       WhitespaceLength += FormatTok->Tok.getLength();
 
-      if (FormatTok->Tok.is(tok::eof)) {
-        FormatTok->WhitespaceRange =
-            SourceRange(WhitespaceStart,
-                        WhitespaceStart.getLocWithOffset(WhitespaceLength));
-        return FormatTok;
-      }
       Lex.LexFromRawLexer(FormatTok->Tok);
       Text = rawTokenText(FormatTok->Tok);
     }
 
-    // Now FormatTok is the next non-whitespace token.
-    FormatTok->ByteCount = Text.size();
-
-    TrailingWhitespace = 0;
-    if (FormatTok->Tok.is(tok::comment)) {
-      TrailingWhitespace = Text.size() - Text.rtrim().size();
-      FormatTok->ByteCount -= TrailingWhitespace;
-    }
-
     // In case the token starts with escaped newlines, we want to
     // take them into account as whitespace - this pattern is quite frequent
     // in macro definitions.
     // FIXME: What do we want to do with other escaped spaces, and escaped
     // spaces or newlines in the middle of tokens?
     // FIXME: Add a more explicit test.
-    unsigned i = 0;
-    while (i + 1 < Text.size() && Text[i] == '\\' && Text[i + 1] == '\n') {
+    while (Text.size() > 1 && Text[0] == '\\' && Text[1] == '\n') {
       // FIXME: ++FormatTok->NewlinesBefore is missing...
       WhitespaceLength += 2;
-      FormatTok->ByteCount -= 2;
-      i += 2;
+      Text = Text.substr(2);
     }
 
-    if (FormatTok->Tok.is(tok::raw_identifier)) {
+    TrailingWhitespace = 0;
+    if (FormatTok->Tok.is(tok::comment)) {
+      StringRef UntrimmedText = Text;
+      Text = Text.rtrim();
+      TrailingWhitespace = UntrimmedText.size() - Text.size();
+    } else if (FormatTok->Tok.is(tok::raw_identifier)) {
       IdentifierInfo &Info = IdentTable.get(Text);
       FormatTok->Tok.setIdentifierInfo(&Info);
       FormatTok->Tok.setKind(Info.getTokenID());
-    }
-
-    if (FormatTok->Tok.is(tok::greatergreater)) {
+    } else if (FormatTok->Tok.is(tok::greatergreater)) {
       FormatTok->Tok.setKind(tok::greater);
-      FormatTok->ByteCount = 1;
+      Text = Text.substr(0, 1);
       GreaterStashed = true;
     }
 
-    unsigned EncodingExtraBytes =
-        Text.size() - encoding::getCodePointCount(Text, Encoding);
-    FormatTok->CodePointCount = FormatTok->ByteCount - EncodingExtraBytes;
+    // Now FormatTok is the next non-whitespace token.
+    FormatTok->TokenText = Text;
+    FormatTok->CodePointCount = encoding::getCodePointCount(Text, Encoding);
 
     FormatTok->WhitespaceRange = SourceRange(
         WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
-    FormatTok->TokenText = StringRef(
-        SourceMgr.getCharacterData(FormatTok->getStartOfNonWhitespace()),
-        FormatTok->ByteCount);
     return FormatTok;
   }
 
@@ -1587,7 +1570,7 @@ private:
     CharSourceRange LineRange = CharSourceRange::getCharRange(
         First->WhitespaceRange.getBegin().getLocWithOffset(
             First->LastNewlineOffset),
-        Last->Tok.getLocation().getLocWithOffset(Last->ByteCount - 1));
+        Last->Tok.getLocation().getLocWithOffset(Last->TokenText.size() - 1));
     return touchesRanges(LineRange);
   }
 

Modified: cfe/trunk/lib/Format/FormatToken.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/FormatToken.h?rev=183536&r1=183535&r2=183536&view=diff
==============================================================================
--- cfe/trunk/lib/Format/FormatToken.h (original)
+++ cfe/trunk/lib/Format/FormatToken.h Fri Jun  7 12:45:07 2013
@@ -61,12 +61,11 @@ enum TokenType {
 struct FormatToken {
   FormatToken()
       : NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0),
-        ByteCount(0), CodePointCount(0), IsFirst(false),
-        MustBreakBefore(false), Type(TT_Unknown), SpacesRequiredBefore(0),
-        CanBreakBefore(false), ClosesTemplateDeclaration(false),
-        ParameterCount(0), TotalLength(0), UnbreakableTailLength(0),
-        BindingStrength(0), SplitPenalty(0), LongestObjCSelectorName(0),
-        FakeRParens(0), LastInChainOfCalls(false),
+        CodePointCount(0), IsFirst(false), MustBreakBefore(false),
+        Type(TT_Unknown), SpacesRequiredBefore(0), CanBreakBefore(false),
+        ClosesTemplateDeclaration(false), ParameterCount(0), TotalLength(0),
+        UnbreakableTailLength(0), BindingStrength(0), SplitPenalty(0),
+        LongestObjCSelectorName(0), FakeRParens(0), LastInChainOfCalls(false),
         PartOfMultiVariableDeclStmt(false), MatchingParen(NULL), Previous(NULL),
         Next(NULL) {}
 
@@ -90,11 +89,6 @@ struct FormatToken {
   /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'.
   unsigned LastNewlineOffset;
 
-  /// \brief The number of bytes of the non-whitespace parts of the token. This
-  /// is necessary because we need to handle escaped newlines that are stored
-  /// with the token.
-  unsigned ByteCount;
-
   /// \brief The length of the non-whitespace parts of the token in CodePoints.
   /// We need this to correctly measure number of columns a token spans.
   unsigned CodePointCount;

Modified: cfe/trunk/unittests/Format/FormatTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=183536&r1=183535&r2=183536&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTest.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTest.cpp Fri Jun  7 12:45:07 2013
@@ -1935,10 +1935,11 @@ TEST_F(FormatTest, FormatUnbalancedStruc
             format("#define A } }\nint i;", getLLVMStyleWithColumns(11)));
 }
 
-TEST_F(FormatTest, EscapedNewlineAtStartOfTokenInMacroDefinition) {
+TEST_F(FormatTest, EscapedNewlineAtStartOfToken) {
   EXPECT_EQ(
       "#define A \\\n  int i;  \\\n  int j;",
       format("#define A \\\nint i;\\\n  int j;", getLLVMStyleWithColumns(11)));
+  EXPECT_EQ("template <class T> f();", format("\\\ntemplate <class T> f();"));
 }
 
 TEST_F(FormatTest, CalculateSpaceOnConsecutiveLinesInMacro) {