[PATCH] Improved handling of escaped newlines at the token start.

Fri Jun 7 10:45:56 PDT 2013

Removed FormatToken::ByteCount and some useless code.

Hi klimek,

http://llvm-reviews.chandlerc.com/D935

CHANGE SINCE LAST DIFF
  http://llvm-reviews.chandlerc.com/D935?vs=2313&id=2314#toc

BRANCH
  svn

ARCANIST PROJECT
  clang

Files:
  lib/Format/Format.cpp
  lib/Format/FormatToken.h
  unittests/Format/FormatTest.cpp

Index: lib/Format/Format.cpp
===================================================================

--- lib/Format/Format.cpp
+++ lib/Format/Format.cpp
@@ -1131,7 +1131,7 @@
           FormatTok->Tok.getLocation().getLocWithOffset(1);
       FormatTok->WhitespaceRange =
           SourceRange(GreaterLocation, GreaterLocation);
-      FormatTok->ByteCount = 1;
+      FormatTok->TokenText = ">";
       FormatTok->CodePointCount = 1;
       GreaterStashed = false;
       return FormatTok;
@@ -1151,65 +1151,48 @@
       unsigned Newlines = Text.count('\n');
       if (Newlines > 0)
         FormatTok->LastNewlineOffset = WhitespaceLength + Text.rfind('\n') + 1;
-      unsigned EscapedNewlines = Text.count("\\\n");
       FormatTok->NewlinesBefore += Newlines;
+      unsigned EscapedNewlines = Text.count("\\\n");
       FormatTok->HasUnescapedNewline |= EscapedNewlines != Newlines;
       WhitespaceLength += FormatTok->Tok.getLength();
 
-      if (FormatTok->Tok.is(tok::eof)) {
-        FormatTok->WhitespaceRange =
-            SourceRange(WhitespaceStart,
-                        WhitespaceStart.getLocWithOffset(WhitespaceLength));
-        return FormatTok;
-      }
       Lex.LexFromRawLexer(FormatTok->Tok);
       Text = rawTokenText(FormatTok->Tok);
     }
 
-    // Now FormatTok is the next non-whitespace token.
-    FormatTok->ByteCount = Text.size();
-
-    TrailingWhitespace = 0;
-    if (FormatTok->Tok.is(tok::comment)) {
-      TrailingWhitespace = Text.size() - Text.rtrim().size();
-      FormatTok->ByteCount -= TrailingWhitespace;
-    }
-
     // In case the token starts with escaped newlines, we want to
     // take them into account as whitespace - this pattern is quite frequent
     // in macro definitions.
     // FIXME: What do we want to do with other escaped spaces, and escaped
     // spaces or newlines in the middle of tokens?
     // FIXME: Add a more explicit test.
-    unsigned i = 0;
-    while (i + 1 < Text.size() && Text[i] == '\\' && Text[i + 1] == '\n') {
+    while (Text.size() > 1 && Text[0] == '\\' && Text[1] == '\n') {
       // FIXME: ++FormatTok->NewlinesBefore is missing...
       WhitespaceLength += 2;
-      FormatTok->ByteCount -= 2;
-      i += 2;
+      Text = Text.substr(2);
     }
 
-    if (FormatTok->Tok.is(tok::raw_identifier)) {
+    TrailingWhitespace = 0;
+    if (FormatTok->Tok.is(tok::comment)) {
+      StringRef UntrimmedText = Text;
+      Text = Text.rtrim();
+      TrailingWhitespace = UntrimmedText.size() - Text.size();
+    } else if (FormatTok->Tok.is(tok::raw_identifier)) {
       IdentifierInfo &Info = IdentTable.get(Text);
       FormatTok->Tok.setIdentifierInfo(&Info);
       FormatTok->Tok.setKind(Info.getTokenID());
-    }
-
-    if (FormatTok->Tok.is(tok::greatergreater)) {
+    } else if (FormatTok->Tok.is(tok::greatergreater)) {
       FormatTok->Tok.setKind(tok::greater);
-      FormatTok->ByteCount = 1;
+      Text = Text.substr(0, 1);
       GreaterStashed = true;
     }
 
-    unsigned EncodingExtraBytes =
-        Text.size() - encoding::getCodePointCount(Text, Encoding);
-    FormatTok->CodePointCount = FormatTok->ByteCount - EncodingExtraBytes;
+    // Now FormatTok is the next non-whitespace token.
+    FormatTok->TokenText = Text;
+    FormatTok->CodePointCount = encoding::getCodePointCount(Text, Encoding);
 
     FormatTok->WhitespaceRange = SourceRange(
         WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
-    FormatTok->TokenText = StringRef(
-        SourceMgr.getCharacterData(FormatTok->getStartOfNonWhitespace()),
-        FormatTok->ByteCount);
     return FormatTok;
   }
 
@@ -1587,7 +1570,7 @@
     CharSourceRange LineRange = CharSourceRange::getCharRange(
         First->WhitespaceRange.getBegin().getLocWithOffset(
             First->LastNewlineOffset),
-        Last->Tok.getLocation().getLocWithOffset(Last->ByteCount - 1));
+        Last->Tok.getLocation().getLocWithOffset(Last->TokenText.size() - 1));
     return touchesRanges(LineRange);
   }
 
Index: lib/Format/FormatToken.h
===================================================================
--- lib/Format/FormatToken.h
+++ lib/Format/FormatToken.h
@@ -61,12 +61,11 @@
 struct FormatToken {
   FormatToken()
       : NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0),
-        ByteCount(0), CodePointCount(0), IsFirst(false),
-        MustBreakBefore(false), Type(TT_Unknown), SpacesRequiredBefore(0),
-        CanBreakBefore(false), ClosesTemplateDeclaration(false),
-        ParameterCount(0), TotalLength(0), UnbreakableTailLength(0),
-        BindingStrength(0), SplitPenalty(0), LongestObjCSelectorName(0),
-        FakeRParens(0), LastInChainOfCalls(false),
+        CodePointCount(0), IsFirst(false), MustBreakBefore(false),
+        Type(TT_Unknown), SpacesRequiredBefore(0), CanBreakBefore(false),
+        ClosesTemplateDeclaration(false), ParameterCount(0), TotalLength(0),
+        UnbreakableTailLength(0), BindingStrength(0), SplitPenalty(0),
+        LongestObjCSelectorName(0), FakeRParens(0), LastInChainOfCalls(false),
         PartOfMultiVariableDeclStmt(false), MatchingParen(NULL), Previous(NULL),
         Next(NULL) {}
 
@@ -90,11 +89,6 @@
   /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'.
   unsigned LastNewlineOffset;
 
-  /// \brief The number of bytes of the non-whitespace parts of the token. This
-  /// is necessary because we need to handle escaped newlines that are stored
-  /// with the token.
-  unsigned ByteCount;
-
   /// \brief The length of the non-whitespace parts of the token in CodePoints.
   /// We need this to correctly measure number of columns a token spans.
   unsigned CodePointCount;
Index: unittests/Format/FormatTest.cpp
===================================================================
--- unittests/Format/FormatTest.cpp
+++ unittests/Format/FormatTest.cpp
@@ -1935,10 +1935,11 @@
             format("#define A } }\nint i;", getLLVMStyleWithColumns(11)));
 }
 
-TEST_F(FormatTest, EscapedNewlineAtStartOfTokenInMacroDefinition) {
+TEST_F(FormatTest, EscapedNewlineAtStartOfToken) {
   EXPECT_EQ(
       "#define A \\\n  int i;  \\\n  int j;",
       format("#define A \\\nint i;\\\n  int j;", getLLVMStyleWithColumns(11)));
+  EXPECT_EQ("template <class T> f();", format("\\\ntemplate <class T> f();"));
 }
 
 TEST_F(FormatTest, CalculateSpaceOnConsecutiveLinesInMacro) {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D935.2.patch
Type: text/x-patch
Size: 6411 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20130607/1ce9d2b5/attachment.bin>