r175613 - Implements breaking of string literals if they stick out.

Manuel Klimek klimek at google.com
Wed Feb 20 02:15:13 PST 2013


Author: klimek
Date: Wed Feb 20 04:15:13 2013
New Revision: 175613

URL: http://llvm.org/viewvc/llvm-project?rev=175613&view=rev
Log:
Implements breaking of string literals if they stick out.

An alternative strategy to calculating the break on demand when hitting
a token that would need to be broken would be to put all possible breaks
inside the token into the optimizer.

Currently only supports breaking at spaces; more break points to come.

Modified:
    cfe/trunk/lib/Format/Format.cpp
    cfe/trunk/unittests/Format/FormatTest.cpp

Modified: cfe/trunk/lib/Format/Format.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=175613&r1=175612&r2=175613&view=diff
==============================================================================
--- cfe/trunk/lib/Format/Format.cpp (original)
+++ cfe/trunk/lib/Format/Format.cpp Wed Feb 20 04:15:13 2013
@@ -135,8 +135,7 @@ public:
     // If this line does not have a trailing comment, align the stored comments.
     if (Tok.Children.empty() && !isTrailingComment(Tok))
       alignComments();
-    storeReplacement(Tok.FormatTok,
-                     std::string(NewLines, '\n') + std::string(Spaces, ' '));
+    storeReplacement(Tok.FormatTok, getNewLineText(NewLines, Spaces));
   }
 
   /// \brief Like \c replaceWhitespace, but additionally adds right-aligned
@@ -147,6 +146,47 @@ public:
   void replacePPWhitespace(const AnnotatedToken &Tok, unsigned NewLines,
                            unsigned Spaces, unsigned WhitespaceStartColumn,
                            const FormatStyle &Style) {
+    storeReplacement(
+        Tok.FormatTok,
+        getNewLineText(NewLines, Spaces, WhitespaceStartColumn, Style));
+  }
+
+  /// \brief Inserts a line break into the middle of a token.
+  ///
+  /// Will break at \p Offset inside \p Tok, putting \p Prefix before the line
+  /// break and \p Postfix before the rest of the token starts in the next line.
+  ///
+  /// \p InPPDirective, \p Spaces, \p WhitespaceStartColumn and \p Style are
+  /// used to generate the correct line break.
+  void breakToken(const AnnotatedToken &Tok, unsigned Offset, StringRef Prefix,
+                  StringRef Postfix, bool InPPDirective, unsigned Spaces,
+                  unsigned WhitespaceStartColumn, const FormatStyle &Style) {
+    std::string NewLineText;
+    if (!InPPDirective)
+      NewLineText = getNewLineText(1, Spaces);
+    else
+      NewLineText = getNewLineText(1, Spaces, WhitespaceStartColumn, Style);
+    std::string ReplacementText = (Prefix + NewLineText + Postfix).str();
+    SourceLocation InsertAt = Tok.FormatTok.WhiteSpaceStart
+        .getLocWithOffset(Tok.FormatTok.WhiteSpaceLength + Offset);
+    Replaces.insert(
+        tooling::Replacement(SourceMgr, InsertAt, 0, ReplacementText));
+  }
+
+  /// \brief Returns all the \c Replacements created during formatting.
+  const tooling::Replacements &generateReplacements() {
+    alignComments();
+    return Replaces;
+  }
+
+private:
+  std::string getNewLineText(unsigned NewLines, unsigned Spaces) {
+    return std::string(NewLines, '\n') + std::string(Spaces, ' ');
+  }
+
+  std::string
+  getNewLineText(unsigned NewLines, unsigned Spaces,
+                 unsigned WhitespaceStartColumn, const FormatStyle &Style) {
     std::string NewLineText;
     if (NewLines > 0) {
       unsigned Offset =
@@ -157,16 +197,9 @@ public:
         Offset = 0;
       }
     }
-    storeReplacement(Tok.FormatTok, NewLineText + std::string(Spaces, ' '));
-  }
-
-  /// \brief Returns all the \c Replacements created during formatting.
-  const tooling::Replacements &generateReplacements() {
-    alignComments();
-    return Replaces;
+    return NewLineText + std::string(Spaces, ' ');
   }
 
-private:
   /// \brief Structure to store a comment for later layout and alignment.
   struct StoredComment {
     FormatToken Tok;
@@ -258,7 +291,7 @@ public:
     });
 
     // The first token has already been indented and thus consumed.
-    moveStateToNextToken(State);
+    moveStateToNextToken(State, /*DryRun=*/ false);
 
     // If everything fits on a single line, just put it there.
     if (Line.Last->TotalLength <= getColumnLimit() - FirstIndent) {
@@ -419,7 +452,7 @@ private:
   ///
   /// If \p DryRun is \c false, also creates and stores the required
   /// \c Replacement.
-  void addTokenToState(bool Newline, bool DryRun, LineState &State) {
+  unsigned addTokenToState(bool Newline, bool DryRun, LineState &State) {
     const AnnotatedToken &Current = *State.NextToken;
     const AnnotatedToken &Previous = *State.NextToken->Parent;
     assert(State.Stack.size());
@@ -431,7 +464,7 @@ private:
         State.NextToken = NULL;
       else
         State.NextToken = &State.NextToken->Children[0];
-      return;
+      return 0;
     }
 
     if (Newline) {
@@ -576,12 +609,12 @@ private:
       }
     }
 
-    moveStateToNextToken(State);
+    return moveStateToNextToken(State, DryRun);
   }
 
   /// \brief Mark the next token as consumed in \p State and modify its stacks
   /// accordingly.
-  void moveStateToNextToken(LineState &State) {
+  unsigned moveStateToNextToken(LineState &State, bool DryRun) {
     const AnnotatedToken &Current = *State.NextToken;
     assert(State.Stack.size());
 
@@ -649,12 +682,59 @@ private:
       State.Stack.pop_back();
     }
 
+    State.Column += Current.FormatTok.TokenLength;
+
     if (State.NextToken->Children.empty())
       State.NextToken = NULL;
     else
       State.NextToken = &State.NextToken->Children[0];
 
-    State.Column += Current.FormatTok.TokenLength;
+    return breakProtrudingToken(Current, State, DryRun);
+  }
+
+  /// \brief If the current token sticks out over the end of the line, break
+  /// it if possible.
+  unsigned breakProtrudingToken(const AnnotatedToken &Current, LineState &State,
+                                bool DryRun) {
+    if (Current.isNot(tok::string_literal))
+      return 0;
+
+    unsigned Penalty = 0;
+    unsigned TailOffset = 0;
+    unsigned TailLength = Current.FormatTok.TokenLength;
+    unsigned StartColumn = State.Column - Current.FormatTok.TokenLength;
+    unsigned OffsetFromStart = 0;
+    while (StartColumn + TailLength > getColumnLimit()) {
+      StringRef Text = StringRef(Current.FormatTok.Tok.getLiteralData() +
+                                 TailOffset, TailLength);
+      StringRef::size_type SplitPoint =
+          getSplitPoint(Text, getColumnLimit() - StartColumn - 1);
+      if (SplitPoint == StringRef::npos)
+        break;
+      assert(SplitPoint != 0);
+      // +2, because 'Text' starts after the opening quotes, and does not
+      // include the closing quote we need to insert.
+      unsigned WhitespaceStartColumn =
+          StartColumn + OffsetFromStart + SplitPoint + 2;
+      State.Stack.back().LastSpace = StartColumn;
+      if (!DryRun) {
+        Whitespaces.breakToken(Current, TailOffset + SplitPoint + 1, "\"", "\"",
+                               Line.InPPDirective, StartColumn,
+                               WhitespaceStartColumn, Style);
+      }
+      TailOffset += SplitPoint + 1;
+      TailLength -= SplitPoint + 1;
+      OffsetFromStart = 1;
+      Penalty += 100;
+    }
+    State.Column = StartColumn + TailLength;
+    return Penalty;
+  }
+
+  StringRef::size_type
+  getSplitPoint(StringRef Text, StringRef::size_type Offset) {
+    // FIXME: Implement more sophisticated splitting mechanism, and a fallback.
+    return Text.rfind(' ', Offset);
   }
 
   unsigned getColumnLimit() {
@@ -767,7 +847,7 @@ private:
 
     StateNode *Node = new (Allocator.Allocate())
         StateNode(PreviousNode->State, NewLine, PreviousNode);
-    addTokenToState(NewLine, true, Node->State);
+    Penalty += addTokenToState(NewLine, true, Node->State);
     if (Node->State.Column > getColumnLimit()) {
       unsigned ExcessCharacters = Node->State.Column - getColumnLimit();
       Penalty += Style.PenaltyExcessCharacter * ExcessCharacters;

Modified: cfe/trunk/unittests/Format/FormatTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=175613&r1=175612&r2=175613&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTest.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTest.cpp Wed Feb 20 04:15:13 2013
@@ -2843,5 +2843,65 @@ TEST_F(FormatTest, ReformatRegionAdjusts
                    13, 0, getLLVMStyle()));
 }
 
+TEST_F(FormatTest, BreakStringLiterals) {
+  EXPECT_EQ("\"some text \"\n"
+            "\"other\";",
+            format("\"some text other\";", getLLVMStyleWithColumns(12)));
+  EXPECT_EQ(
+      "#define A  \\\n"
+      "  \"some \"  \\\n"
+      "  \"text \"  \\\n"
+      "  \"other\";",
+      format("#define A \"some text other\";", getLLVMStyleWithColumns(12)));
+  EXPECT_EQ(
+      "#define A  \\\n"
+      "  \"so \"    \\\n"
+      "  \"text \"  \\\n"
+      "  \"other\";",
+      format("#define A \"so text other\";", getLLVMStyleWithColumns(12)));
+
+  EXPECT_EQ("\"some text\"",
+            format("\"some text\"", getLLVMStyleWithColumns(1)));
+  EXPECT_EQ("\"some text\"",
+            format("\"some text\"", getLLVMStyleWithColumns(11)));
+  EXPECT_EQ("\"some \"\n"
+            "\"text\"",
+            format("\"some text\"", getLLVMStyleWithColumns(10)));
+  EXPECT_EQ("\"some \"\n"
+            "\"text\"",
+            format("\"some text\"", getLLVMStyleWithColumns(7)));
+  EXPECT_EQ("\"some text\"",
+            format("\"some text\"", getLLVMStyleWithColumns(6)));
+
+  EXPECT_EQ("variable =\n"
+            "    \"long string \"\n"
+            "    \"literal\";",
+            format("variable = \"long string literal\";",
+                   getLLVMStyleWithColumns(20)));
+
+  EXPECT_EQ("variable = f(\n"
+            "    \"long string \"\n"
+            "    \"literal\", short,\n"
+            "    loooooooooooooooooooong);",
+            format("variable = f(\"long string literal\", short, "
+                   "loooooooooooooooooooong);",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ(
+      "f(\"one two\".split(\n"
+      "    variable));",
+      format("f(\"one two\".split(variable));", getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("f(\"one two three four five six \"\n"
+            "  \"seven\".split(\n"
+            "      really_looooong_variable));",
+            format("f(\"one two three four five six seven\"."
+                   "split(really_looooong_variable));",
+                   getLLVMStyleWithColumns(33)));
+
+  EXPECT_EQ("f(\"some \"\n"
+            "  \"text\",\n"
+            "  other);",
+            format("f(\"some text\", other);", getLLVMStyleWithColumns(10)));
+}
+
 } // end namespace tooling
 } // end namespace clang





More information about the cfe-commits mailing list