[PATCH] Correctly calculate OriginalColumn after multi-line tokens.

Alexander Kornienko alexfh at google.com
Tue Sep 10 05:13:59 PDT 2013


Hi djasper,

This also unifies the handling of escaped newlines for all tokens.

http://llvm-reviews.chandlerc.com/D1638

Files:
  lib/Format/ContinuationIndenter.cpp
  lib/Format/ContinuationIndenter.h
  lib/Format/Format.cpp
  unittests/Format/FormatTest.cpp

Index: lib/Format/ContinuationIndenter.cpp
===================================================================
--- lib/Format/ContinuationIndenter.cpp
+++ lib/Format/ContinuationIndenter.cpp
@@ -611,9 +611,8 @@
   return Penalty;
 }
 
-unsigned
-ContinuationIndenter::addMultilineStringLiteral(const FormatToken &Current,
-                                                LineState &State) {
+unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current,
+                                                 LineState &State) {
   // Break before further function parameters on all levels.
   for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
     State.Stack[i].BreakBeforeParameter = true;
@@ -631,20 +630,19 @@
 unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
                                                     LineState &State,
                                                     bool DryRun) {
+  if (Current.Type != TT_BlockComment && Current.IsMultiline) {
+    // Don't break multi-line tokens other than block comments.
+    return addMultilineToken(Current, State);
+  }
+
   if (!Current.isOneOf(tok::string_literal, tok::comment))
     return 0;
 
   llvm::OwningPtr<BreakableToken> Token;
   unsigned StartColumn = State.Column - Current.ColumnWidth;
 
   if (Current.is(tok::string_literal) &&
       Current.Type != TT_ImplicitStringLiteral) {
-    // Don't break string literals with (in case of non-raw strings, escaped)
-    // newlines. As clang-format must not change the string's content, it is
-    // unlikely that we'll end up with a better format.
-    if (Current.IsMultiline)
-      return addMultilineStringLiteral(Current, State);
-
     // Only break up default narrow strings.
     if (!Current.TokenText.startswith("\""))
       return 0;
@@ -662,16 +660,6 @@
   } else if (Current.Type == TT_LineComment &&
              (Current.Previous == NULL ||
               Current.Previous->Type != TT_ImplicitStringLiteral)) {
-    // Don't break line comments with escaped newlines. These look like
-    // separate line comments, but in fact contain a single line comment with
-    // multiple lines including leading whitespace and the '//' markers.
-    //
-    // FIXME: If we want to handle them correctly, we'll need to adjust
-    // leading whitespace in consecutive lines when changing indentation of
-    // the first line similar to what we do with block comments.
-    if (Current.IsMultiline)
-      return 0;
-
     Token.reset(new BreakableLineComment(
         Current, StartColumn, State.Line->InPPDirective, Encoding, Style));
   } else {
Index: lib/Format/ContinuationIndenter.h
===================================================================
--- lib/Format/ContinuationIndenter.h
+++ lib/Format/ContinuationIndenter.h
@@ -84,13 +84,12 @@
   unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
                                 bool DryRun);
 
-  /// \brief Adds a multiline string literal to the \p State.
+  /// \brief Adds a multiline token to the \p State.
   ///
   /// \returns Extra penalty for the first line of the literal: last line is
   /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
   /// matter, as we don't change them.
-  unsigned addMultilineStringLiteral(const FormatToken &Current,
-                                     LineState &State);
+  unsigned addMultilineToken(const FormatToken &Current, LineState &State);
 
   /// \brief Returns \c true if the next token starts a multiline string
   /// literal.
Index: lib/Format/Format.cpp
===================================================================
--- lib/Format/Format.cpp
+++ lib/Format/Format.cpp
@@ -656,8 +656,6 @@
     // In case the token starts with escaped newlines, we want to
     // take them into account as whitespace - this pattern is quite frequent
     // in macro definitions.
-    // FIXME: What do we want to do with other escaped spaces, and escaped
-    // spaces or newlines in the middle of tokens?
     // FIXME: Add a more explicit test.
     while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\' &&
            FormatTok->TokenText[1] == '\n') {
@@ -692,26 +690,26 @@
 
     StringRef Text = FormatTok->TokenText;
     size_t FirstNewlinePos = Text.find('\n');
-    if (FirstNewlinePos != StringRef::npos) {
+    if (FirstNewlinePos == StringRef::npos) {
+      // FIXME: ColumnWidth actually depends on the start column, we need to
+      // take this into account when the token is moved.
+      FormatTok->ColumnWidth =
+          encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
+      Column += FormatTok->ColumnWidth;
+    } else {
       FormatTok->IsMultiline = true;
+      // FIXME: ColumnWidth actually depends on the start column, we need to
+      // take this into account when the token is moved.
+      FormatTok->ColumnWidth = encoding::columnWidthWithTabs(
+          Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding);
+
       // The last line of the token always starts in column 0.
       // Thus, the length can be precomputed even in the presence of tabs.
-      FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
+      Column = FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs(
           Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth,
           Encoding);
-      Text = Text.substr(0, FirstNewlinePos);
     }
 
-    // FIXME: ColumnWidth actually depends on the start column, we need to
-    // take this into account when the token is moved.
-    FormatTok->ColumnWidth =
-        encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding);
-
-    // FIXME: For multi-line tokens this should be LastLineColumnWidth.
-    // For line comments this should probably be zero. But before changing,
-    // we need good tests for this.
-    Column += FormatTok->ColumnWidth;
-
     return FormatTok;
   }
 
Index: unittests/Format/FormatTest.cpp
===================================================================
--- unittests/Format/FormatTest.cpp
+++ unittests/Format/FormatTest.cpp
@@ -5735,9 +5735,6 @@
                "};",
                Tab);
 
-  // FIXME: To correctly count mixed whitespace we need to
-  // also correctly count mixed whitespace in front of the comment.
-
   Tab.TabWidth = 8;
   Tab.IndentWidth = 8;
   EXPECT_EQ("/*\n"
@@ -5794,6 +5791,39 @@
                    "}"));
 }
 
+TEST_F(FormatTest, CalculatesOriginalColumn) {
+  EXPECT_EQ("\"qqqqqqqqqqqqqqqqqqqqqqqqqq\\\n"
+            "q\"; /* some\n"
+            "       comment */",
+            format("  \"qqqqqqqqqqqqqqqqqqqqqqqqqq\\\n"
+                   "q\"; /* some\n"
+                   "       comment */",
+                   getLLVMStyle()));
+  EXPECT_EQ("// qqqqqqqqqqqqqqqqqqqqqqqqqq\n"
+            "/* some\n"
+            "   comment */",
+            format("// qqqqqqqqqqqqqqqqqqqqqqqqqq\n"
+                   " /* some\n"
+                   "    comment */",
+                   getLLVMStyle()));
+  EXPECT_EQ("// qqqqqqqqqqqqqqqqqqqqqqqqqq\\\n"
+            "qqq\n"
+            "/* some\n"
+            "   comment */",
+            format("// qqqqqqqqqqqqqqqqqqqqqqqqqq\\\n"
+                   "qqq\n"
+                   " /* some\n"
+                   "    comment */",
+                   getLLVMStyle()));
+  EXPECT_EQ("inttt qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq\\\n"
+            "wwww; /* some\n"
+            "         comment */",
+            format("  inttt qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq\\\n"
+                   "wwww; /* some\n"
+                   "         comment */",
+                   getLLVMStyle()));
+}
+
 TEST_F(FormatTest, ConfigurableSpaceAfterControlStatementKeyword) {
   FormatStyle NoSpace = getLLVMStyle();
   NoSpace.SpaceAfterControlStatementKeyword = false;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D1638.1.patch
Type: text/x-patch
Size: 7912 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20130910/91e2a0ee/attachment.bin>


More information about the cfe-commits mailing list