[PATCH] Implement string literal breaking on unbreakable token sequences.

Manuel Klimek klimek at google.com
Mon May 13 13:13:32 PDT 2013


Hi djasper, alexfh,

This fixes indentation where there are for example multiple closing
parentheses after a string literal, and where those parentheses
run over the end of the line.

During testing this revealed a bug in the implementation of
breakProtrudingToken: we don't want to change the state if we didn't
actually do anything.

http://llvm-reviews.chandlerc.com/D789

Files:
  lib/Format/Format.cpp
  unittests/Format/FormatTest.cpp

Index: lib/Format/Format.cpp
===================================================================
--- lib/Format/Format.cpp
+++ lib/Format/Format.cpp
@@ -752,15 +752,18 @@
     else
       State.NextToken = &State.NextToken->Children[0];
 
-    return breakProtrudingToken(Current, State, DryRun);
+    return breakProtrudingToken(Current, State, DryRun,
+                                /*TrailingUnbreakableLength=*/ 0);
   }
 
   /// \brief If the current token sticks out over the end of the line, break
   /// it if possible.
   unsigned breakProtrudingToken(const AnnotatedToken &Current, LineState &State,
-                                bool DryRun) {
+                                bool DryRun,
+                                unsigned TrailingUnbreakableLength) {
     llvm::OwningPtr<BreakableToken> Token;
-    unsigned StartColumn = State.Column - Current.FormatTok.TokenLength;
+    unsigned StartColumn = State.Column - Current.FormatTok.TokenLength -
+                           TrailingUnbreakableLength;
     if (Current.is(tok::string_literal)) {
       // Only break up default narrow strings.
       const char *LiteralData = SourceMgr.getCharacterData(
@@ -781,42 +784,56 @@
                 Current.Parent->Type != TT_ImplicitStringLiteral)) {
       Token.reset(new BreakableLineComment(SourceMgr, Current, StartColumn));
     } else {
-      return 0;
+      // If a token that we cannot breaks protrudes, it means we were unable to
+      // break a sequence of tokens due to illegal breaks between the tokens.
+      // Thus, we recursively search backwards to try to find a breakable token.
+      if (State.Column <= getColumnLimit() ||
+          Current.CanBreakBefore || !Current.Parent)
+        return 0;
+      return breakProtrudingToken(
+          *Current.Parent, State, DryRun,
+          TrailingUnbreakableLength + Current.FormatTok.TokenLength);
     }
+    if (TrailingUnbreakableLength >= getColumnLimit())
+      return 0;
+    unsigned RemainingSpace = getColumnLimit() - TrailingUnbreakableLength;
 
     bool BreakInserted = false;
     unsigned Penalty = 0;
+    unsigned PositionAfterLastLineInToken = 0;
     for (unsigned LineIndex = 0; LineIndex < Token->getLineCount();
          ++LineIndex) {
       unsigned TailOffset = 0;
-      unsigned RemainingLength =
+      unsigned RemainingTokenLength =
           Token->getLineLengthAfterSplit(LineIndex, TailOffset);
-      while (RemainingLength > getColumnLimit()) {
+      while (RemainingTokenLength > RemainingSpace) {
         BreakableToken::Split Split =
-            Token->getSplit(LineIndex, TailOffset, getColumnLimit());
+            Token->getSplit(LineIndex, TailOffset, RemainingSpace);
         if (Split.first == StringRef::npos)
           break;
         assert(Split.first != 0);
-        unsigned NewRemainingLength = Token->getLineLengthAfterSplit(
+        unsigned NewRemainingTokenLength = Token->getLineLengthAfterSplit(
             LineIndex, TailOffset + Split.first + Split.second);
-        if (NewRemainingLength >= RemainingLength)
+        if (NewRemainingTokenLength >= RemainingTokenLength)
           break;
         if (!DryRun) {
           Token->insertBreak(LineIndex, TailOffset, Split, Line.InPPDirective,
                              Whitespaces);
         }
         TailOffset += Split.first + Split.second;
-        RemainingLength = NewRemainingLength;
+        RemainingTokenLength = NewRemainingTokenLength;
         Penalty += Style.PenaltyExcessCharacter;
         BreakInserted = true;
       }
-      State.Column = RemainingLength;
+      PositionAfterLastLineInToken = RemainingTokenLength;
       if (!DryRun) {
         Token->trimLine(LineIndex, TailOffset, Line.InPPDirective, Whitespaces);
       }
     }
 
     if (BreakInserted) {
+      // FIXME: Do we want to add TrailingUnbreakableLength here?
+      State.Column = PositionAfterLastLineInToken;
       for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
         State.Stack[i].BreakBeforeParameter = true;
       State.Stack.back().LastSpace = StartColumn;
Index: unittests/Format/FormatTest.cpp
===================================================================
--- unittests/Format/FormatTest.cpp
+++ unittests/Format/FormatTest.cpp
@@ -3932,6 +3932,41 @@
       format("#define A \"some text other\";", AlignLeft));
 }
 
+TEST_F(FormatTest, BreakStringLiteralsBeforeUnbreakableTokenSequence) {
+  EXPECT_EQ("someFunction(\"aaaaaaaaaa\"\n"
+            "             \"aaaaaaaa\");",
+            format("someFunction(\"aaaaaaaaaaaaaaaaaa\");",
+                   getLLVMStyleWithColumns(25)));
+  EXPECT_EQ("someFunction1111111111(\n"
+            "    \"aaaaaaaaaaaaaaaaaa\");",
+            format("someFunction1111111111(\"aaaaaaaaaaaaaaaaaa\");",
+                   getLLVMStyleWithColumns(26)));
+  EXPECT_EQ("someFunction1111111111(\n"
+            "    \"aaaaaaaaaaaaaaaaa\"\n"
+            "    \"a\");",
+            format("someFunction1111111111(\"aaaaaaaaaaaaaaaaaa\");",
+                   getLLVMStyleWithColumns(25)));
+  EXPECT_EQ("someFunction1111111111(\n"
+            "    \"aaaaaaaaaaaaaaaaa\"\n"
+            "    \"a\");",
+            format("someFunction1111111111(\"aaaaaaaaaaaaaaaaaa\");",
+                   getLLVMStyleWithColumns(24)));
+  EXPECT_EQ("someFunction(\n"
+            "    \"aaaaaaaa \"\n"
+            "    \"aaaaaaaaa\");",
+            format("someFunction(\"aaaaaaaa aaaaaaaaa\");",
+                   getLLVMStyleWithColumns(25)));
+  EXPECT_EQ("someFunction(\"aaaaaaaaa \"\n"
+            "             \"aaaaaaaa\");",
+            format("someFunction(\"aaaaaaaaa aaaaaaaa\");",
+                   getLLVMStyleWithColumns(25)));
+  EXPECT_EQ("someFunction1111111111(\n"
+            "    \"aaaaa \"\n"
+            "    \"aaaaaaaaaaaa\");",
+            format("someFunction1111111111(\"aaaaa aaaaaaaaaaaa\");",
+                   getLLVMStyleWithColumns(25)));
+}
+
 TEST_F(FormatTest, DoNotBreakStringLiteralsInEscapeSequence) {
   EXPECT_EQ("\"\\a\"",
             format("\"\\a\"", getLLVMStyleWithColumns(3)));
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D789.1.patch
Type: text/x-patch
Size: 6113 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20130513/d82a8b33/attachment.bin>


More information about the cfe-commits mailing list