r189585 - Fixes various problems with accounting for tabs in the original code.

Thu Aug 29 08:21:40 PDT 2013

Author: klimek
Date: Thu Aug 29 10:21:40 2013
New Revision: 189585

URL: http://llvm.org/viewvc/llvm-project?rev=189585&view=rev
Log:
Fixes various problems with accounting for tabs in the original code.

We now count the original token's column directly when lexing the
tokens, where we already have all knowledge about where lines start.

Before this patch, formatting:
 void f() {
 \tg();
 \th();
 }
would incorrectly count the \t's as 1 character if only the line
containing h() was reformatted, and thus indent h() at offset 1.

Modified:
    cfe/trunk/lib/Format/Format.cpp
    cfe/trunk/lib/Format/FormatToken.h
    cfe/trunk/unittests/Format/FormatTest.cpp

Modified: cfe/trunk/lib/Format/Format.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=189585&r1=189584&r2=189585&view=diff
==============================================================================

--- cfe/trunk/lib/Format/Format.cpp (original)
+++ cfe/trunk/lib/Format/Format.cpp Thu Aug 29 10:21:40 2013
@@ -487,11 +487,11 @@ private:
 
 class FormatTokenLexer {
 public:
-  FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr,
+  FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr, FormatStyle &Style,
                    encoding::Encoding Encoding)
-      : FormatTok(NULL), GreaterStashed(false), TrailingWhitespace(0), Lex(Lex),
-        SourceMgr(SourceMgr), IdentTable(getFormattingLangOpts()),
-        Encoding(Encoding) {
+      : FormatTok(NULL), GreaterStashed(false), Column(0),
+        TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr), Style(Style),
+        IdentTable(getFormattingLangOpts()), Encoding(Encoding) {
     Lex.SetKeepWhitespaceMode(true);
   }
 
@@ -509,6 +509,7 @@ private:
   FormatToken *getNextToken() {
     if (GreaterStashed) {
       // Create a synthesized second '>' token.
+      // FIXME: Increment Column and set OriginalColumn.
       Token Greater = FormatTok->Tok;
       FormatTok = new (Allocator.Allocate()) FormatToken;
       FormatTok->Tok = Greater;
@@ -532,13 +533,29 @@ private:
     // Consume and record whitespace until we find a significant token.
     unsigned WhitespaceLength = TrailingWhitespace;
     while (FormatTok->Tok.is(tok::unknown)) {
-      unsigned Newlines = FormatTok->TokenText.count('\n');
-      if (Newlines > 0)
-        FormatTok->LastNewlineOffset =
-            WhitespaceLength + FormatTok->TokenText.rfind('\n') + 1;
-      FormatTok->NewlinesBefore += Newlines;
-      unsigned EscapedNewlines = FormatTok->TokenText.count("\\\n");
-      FormatTok->HasUnescapedNewline |= EscapedNewlines != Newlines;
+      for (int i = 0, e = FormatTok->TokenText.size(); i != e; ++i) {
+        switch (FormatTok->TokenText[i]) {
+        case '\n':
+          ++FormatTok->NewlinesBefore;
+          // FIXME: This is technically incorrect, as it could also
+          // be a literal backslash at the end of the line.
+          if (i == 0 || FormatTok->TokenText[i-1] != '\\')
+            FormatTok->HasUnescapedNewline = true;
+          FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
+          Column = 0;
+          break;
+        case ' ':
+          ++Column;
+          break;
+        case '\t':
+          Column += Style.IndentWidth - Column % Style.IndentWidth;
+          break;
+        default:
+          ++Column;
+          break;
+        }
+      }
+
       WhitespaceLength += FormatTok->Tok.getLength();
 
       readRawToken(*FormatTok);
@@ -554,11 +571,14 @@ private:
            FormatTok->TokenText[1] == '\n') {
       // FIXME: ++FormatTok->NewlinesBefore is missing...
       WhitespaceLength += 2;
+      Column = 0;
       FormatTok->TokenText = FormatTok->TokenText.substr(2);
     }
+    FormatTok->OriginalColumn = Column;
 
     TrailingWhitespace = 0;
     if (FormatTok->Tok.is(tok::comment)) {
+      // FIXME: Add the trimmed whitespace to Column.
       StringRef UntrimmedText = FormatTok->TokenText;
       FormatTok->TokenText = FormatTok->TokenText.rtrim();
       TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
@@ -576,6 +596,8 @@ private:
     FormatTok->CodePointCount =
         encoding::getCodePointCount(FormatTok->TokenText, Encoding);
 
+    // FIXME: Add the CodePointCount to Column.
+
     FormatTok->WhitespaceRange = SourceRange(
         WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
     return FormatTok;
@@ -583,9 +605,11 @@ private:
 
   FormatToken *FormatTok;
   bool GreaterStashed;
+  unsigned Column;
   unsigned TrailingWhitespace;
   Lexer &Lex;
   SourceManager &SourceMgr;
+  FormatStyle &Style;
   IdentifierTable IdentTable;
   encoding::Encoding Encoding;
   llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
@@ -595,7 +619,6 @@ private:
     Lex.LexFromRawLexer(Tok.Tok);
     Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
                               Tok.Tok.getLength());
-
     // For formatting, treat unterminated string literals like normal string
     // literals.
     if (Tok.is(tok::unknown) && !Tok.TokenText.empty() &&
@@ -622,7 +645,7 @@ public:
   virtual ~Formatter() {}
 
   tooling::Replacements format() {
-    FormatTokenLexer Tokens(Lex, SourceMgr, Encoding);
+    FormatTokenLexer Tokens(Lex, SourceMgr, Style, Encoding);
 
     UnwrappedLineParser Parser(Style, Tokens.lex(), *this);
     bool StructuralError = Parser.parse();
@@ -692,9 +715,7 @@ public:
           formatFirstToken(*TheLine.First, PreviousLineLastToken, Indent,
                            TheLine.InPPDirective);
         } else {
-          Indent = LevelIndent =
-              SourceMgr.getSpellingColumnNumber(FirstTok->Tok.getLocation()) -
-              1;
+          Indent = LevelIndent = FirstTok->OriginalColumn;
         }
         ContinuationIndenter Indenter(Style, SourceMgr, TheLine, Indent,
                                       Whitespaces, Encoding,
@@ -727,8 +748,7 @@ public:
              Tok = Tok->Next) {
           if (Tok == TheLine.First &&
               (Tok->NewlinesBefore > 0 || Tok->IsFirst)) {
-            unsigned LevelIndent =
-                SourceMgr.getSpellingColumnNumber(Tok->Tok.getLocation()) - 1;
+            unsigned LevelIndent = Tok->OriginalColumn;
             // Remove trailing whitespace of the previous line if it was
             // touched.
             if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine)) {

Modified: cfe/trunk/lib/Format/FormatToken.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/FormatToken.h?rev=189585&r1=189584&r2=189585&view=diff
==============================================================================
--- cfe/trunk/lib/Format/FormatToken.h (original)
+++ cfe/trunk/lib/Format/FormatToken.h Thu Aug 29 10:21:40 2013
@@ -169,9 +169,18 @@ struct FormatToken {
   /// \brief If this is an opening parenthesis, how are the parameters packed?
   ParameterPackingKind PackingKind;
 
-  /// \brief The total length of the line up to and including this token.
+  /// \brief The total length of the unwrapped line up to and including this
+  /// token.
   unsigned TotalLength;
 
+  /// \brief The original column of this token, including expanded tabs.
+  /// The configured IndentWidth is used as tab width. Only tabs in whitespace
+  /// are expanded.
+  /// FIXME: This is currently only used on the first token of an unwrapped
+  /// line, and the implementation is not correct for other tokens (see the
+  /// FIXMEs in FormatTokenLexer::getNextToken()).
+  unsigned OriginalColumn;
+
   /// \brief The length of following tokens until the next natural split point,
   /// or the next token that can be broken.
   unsigned UnbreakableTailLength;

Modified: cfe/trunk/unittests/Format/FormatTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=189585&r1=189584&r2=189585&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTest.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTest.cpp Thu Aug 29 10:21:40 2013
@@ -5528,7 +5528,33 @@ TEST_F(FormatTest, ConfigurableUseOfTab)
                "\t\t    parameter2); \\\n"
                "\t}",
                Tab);
-
+  EXPECT_EQ("void f() {\n"
+            "\tf();\n"
+            "\tg();\n"
+            "}",
+            format("void f() {\n"
+                   "\tf();\n"
+                   "\tg();\n"
+                   "}",
+                   0, 0, Tab));
+  EXPECT_EQ("void f() {\n"
+            "\tf();\n"
+            "\tg();\n"
+            "}",
+            format("void f() {\n"
+                   "\tf();\n"
+                   "\tg();\n"
+                   "}",
+                   16, 0, Tab));
+  EXPECT_EQ("void f() {\n"
+            "  \tf();\n"
+            "\tg();\n"
+            "}",
+            format("void f() {\n"
+                   "  \tf();\n"
+                   "  \tg();\n"
+                   "}",
+                   21, 0, Tab));
 
   // FIXME: To correctly count mixed whitespace we need to
   // also correctly count mixed whitespace in front of the comment.