[PATCH] Implement string literal breaking on unbreakable token sequences.

Manuel Klimek klimek at google.com
Tue May 14 01:46:15 PDT 2013


  - Clean up tests.
  - Apply review comments.

Hi alexfh, djasper,

http://llvm-reviews.chandlerc.com/D789

CHANGE SINCE LAST DIFF
  http://llvm-reviews.chandlerc.com/D789?vs=1947&id=1954#toc

BRANCH
  break-strings-off-by-one

ARCANIST PROJECT
  clang

Files:
  lib/Format/Format.cpp
  unittests/Format/FormatTest.cpp

Index: lib/Format/Format.cpp
===================================================================
--- lib/Format/Format.cpp
+++ lib/Format/Format.cpp
@@ -757,10 +757,18 @@
 
   /// \brief If the current token sticks out over the end of the line, break
   /// it if possible.
+  ///
+  /// \returns An extra penalty if a token was broken, otherwise 0.
+  ///
+  /// Note that the penalty of the token protruding the allowed line length is
+  /// already handled in \c addNextStateToQueue; the returned penalty will only
+  /// cover the cost of the additional line breaks.
   unsigned breakProtrudingToken(const AnnotatedToken &Current, LineState &State,
-                                bool DryRun) {
+                                bool DryRun,
+                                unsigned UnbreakableTailLength = 0) {
     llvm::OwningPtr<BreakableToken> Token;
-    unsigned StartColumn = State.Column - Current.FormatTok.TokenLength;
+    unsigned StartColumn = State.Column - Current.FormatTok.TokenLength -
+                           UnbreakableTailLength;
     if (Current.is(tok::string_literal)) {
       // Only break up default narrow strings.
       const char *LiteralData = SourceMgr.getCharacterData(
@@ -781,42 +789,55 @@
                 Current.Parent->Type != TT_ImplicitStringLiteral)) {
       Token.reset(new BreakableLineComment(SourceMgr, Current, StartColumn));
     } else {
-      return 0;
+      // If a token that we cannot breaks protrudes, it means we were unable to
+      // break a sequence of tokens due to disallowed breaks between the tokens.
+      // Thus, we recursively search backwards to try to find a breakable token.
+      if (State.Column <= getColumnLimit() ||
+          Current.CanBreakBefore || !Current.Parent)
+        return 0;
+      return breakProtrudingToken(
+          *Current.Parent, State, DryRun,
+          UnbreakableTailLength + Current.FormatTok.TokenLength);
     }
+    if (UnbreakableTailLength >= getColumnLimit())
+      return 0;
+    unsigned RemainingSpace = getColumnLimit() - UnbreakableTailLength;
 
     bool BreakInserted = false;
     unsigned Penalty = 0;
+    unsigned PositionAfterLastLineInToken = 0;
     for (unsigned LineIndex = 0; LineIndex < Token->getLineCount();
          ++LineIndex) {
       unsigned TailOffset = 0;
-      unsigned RemainingLength =
+      unsigned RemainingTokenLength =
           Token->getLineLengthAfterSplit(LineIndex, TailOffset);
-      while (RemainingLength > getColumnLimit()) {
+      while (RemainingTokenLength > RemainingSpace) {
         BreakableToken::Split Split =
-            Token->getSplit(LineIndex, TailOffset, getColumnLimit());
+            Token->getSplit(LineIndex, TailOffset, RemainingSpace);
         if (Split.first == StringRef::npos)
           break;
         assert(Split.first != 0);
-        unsigned NewRemainingLength = Token->getLineLengthAfterSplit(
+        unsigned NewRemainingTokenLength = Token->getLineLengthAfterSplit(
             LineIndex, TailOffset + Split.first + Split.second);
-        if (NewRemainingLength >= RemainingLength)
+        if (NewRemainingTokenLength >= RemainingTokenLength)
           break;
         if (!DryRun) {
           Token->insertBreak(LineIndex, TailOffset, Split, Line.InPPDirective,
                              Whitespaces);
         }
         TailOffset += Split.first + Split.second;
-        RemainingLength = NewRemainingLength;
+        RemainingTokenLength = NewRemainingTokenLength;
         Penalty += Style.PenaltyExcessCharacter;
         BreakInserted = true;
       }
-      State.Column = RemainingLength;
+      PositionAfterLastLineInToken = RemainingTokenLength;
       if (!DryRun) {
         Token->trimLine(LineIndex, TailOffset, Line.InPPDirective, Whitespaces);
       }
     }
 
     if (BreakInserted) {
+      State.Column = PositionAfterLastLineInToken + UnbreakableTailLength;
       for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
         State.Stack[i].BreakBeforeParameter = true;
       State.Stack.back().LastSpace = StartColumn;
Index: unittests/Format/FormatTest.cpp
===================================================================
--- unittests/Format/FormatTest.cpp
+++ unittests/Format/FormatTest.cpp
@@ -3932,6 +3932,48 @@
       format("#define A \"some text other\";", AlignLeft));
 }
 
+TEST_F(FormatTest, BreakStringLiteralsBeforeUnbreakableTokenSequence) {
+  EXPECT_EQ("someFunction(\"aaabbbcccd\"\n"
+            "             \"ddeeefff\");",
+            format("someFunction(\"aaabbbcccdddeeefff\");",
+                   getLLVMStyleWithColumns(25)));
+  EXPECT_EQ("someFunction1234567890(\n"
+            "    \"aaabbbcccdddeeefff\");",
+            format("someFunction1234567890(\"aaabbbcccdddeeefff\");",
+                   getLLVMStyleWithColumns(26)));
+  EXPECT_EQ("someFunction1234567890(\n"
+            "    \"aaabbbcccdddeeeff\"\n"
+            "    \"f\");",
+            format("someFunction1234567890(\"aaabbbcccdddeeefff\");",
+                   getLLVMStyleWithColumns(25)));
+  EXPECT_EQ("someFunction1234567890(\n"
+            "    \"aaabbbcccdddeeeff\"\n"
+            "    \"f\");",
+            format("someFunction1234567890(\"aaabbbcccdddeeefff\");",
+                   getLLVMStyleWithColumns(24)));
+  EXPECT_EQ("someFunction(\n"
+            "    \"aaabbbcc \"\n"
+            "    \"dddeeefff\");",
+            format("someFunction(\"aaabbbcc dddeeefff\");",
+                   getLLVMStyleWithColumns(25)));
+  EXPECT_EQ("someFunction(\"aaabbbccc \"\n"
+            "             \"ddeeefff\");",
+            format("someFunction(\"aaabbbccc ddeeefff\");",
+                   getLLVMStyleWithColumns(25)));
+  EXPECT_EQ("someFunction1234567890(\n"
+            "    \"aaabb \"\n"
+            "    \"cccdddeeefff\");",
+            format("someFunction1234567890(\"aaabb cccdddeeefff\");",
+                   getLLVMStyleWithColumns(25)));
+  EXPECT_EQ("#define A          \\\n"
+            "  string s =       \\\n"
+            "      \"123456789\"  \\\n"
+            "      \"0\";         \\\n"
+            "  int i;",
+            format("#define A string s = \"1234567890\"; int i;",
+                   getLLVMStyleWithColumns(20)));
+}
+
 TEST_F(FormatTest, DoNotBreakStringLiteralsInEscapeSequence) {
   EXPECT_EQ("\"\\a\"",
             format("\"\\a\"", getLLVMStyleWithColumns(3)));
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D789.2.patch
Type: text/x-patch
Size: 6401 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20130514/b7f4799d/attachment.bin>


More information about the cfe-commits mailing list