r319541 - Better trade-off for excess characters vs. staying within the column limits.

Manuel Klimek via cfe-commits cfe-commits at lists.llvm.org
Fri Dec 1 05:28:08 PST 2017


Author: klimek
Date: Fri Dec  1 05:28:08 2017
New Revision: 319541

URL: http://llvm.org/viewvc/llvm-project?rev=319541&view=rev
Log:
Better trade-off for excess characters vs. staying within the column limits.

When we break a long line like:
Column limit: 21
                      |
  // foo foo foo foo foo foo foo foo foo foo foo foo

The local decision when to allow protruding vs. breaking can lead to this
outcome (2 excess characters, 2 breaks):
  // foo foo foo foo foo
  // foo foo foo foo foo
  // foo foo

While strictly staying within the column limit leads to this strictly better
outcome (fully below the column limit, 2 breaks):
  // foo foo foo foo
  // foo foo foo foo
  // foo foo foo foo

To get an optimal solution, we would need to consider all combinations of excess
characters vs. breaking for all lines, but that would lead to a significant
increase in the search space of the algorithm for little gain.

Instead, we blindly try both approches and·select the one that leads to the
overall lower penalty.

Differential Revision: https://reviews.llvm.org/D40605

Modified:
    cfe/trunk/lib/Format/ContinuationIndenter.cpp
    cfe/trunk/lib/Format/ContinuationIndenter.h
    cfe/trunk/unittests/Format/FormatTest.cpp

Modified: cfe/trunk/lib/Format/ContinuationIndenter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/ContinuationIndenter.cpp?rev=319541&r1=319540&r2=319541&view=diff
==============================================================================
--- cfe/trunk/lib/Format/ContinuationIndenter.cpp (original)
+++ cfe/trunk/lib/Format/ContinuationIndenter.cpp Fri Dec  1 05:28:08 2017
@@ -1390,7 +1390,36 @@ unsigned ContinuationIndenter::handleEnd
     Penalty = addMultilineToken(Current, State);
   } else if (State.Line->Type != LT_ImportStatement) {
     // We generally don't break import statements.
-    Penalty = breakProtrudingToken(Current, State, AllowBreak, DryRun);
+    LineState OriginalState = State;
+
+    // Whether we force the reflowing algorithm to stay strictly within the
+    // column limit.
+    bool Strict = false;
+    // Whether the first non-strict attempt at reflowing did intentionally
+    // exceed the column limit.
+    bool Exceeded = false;
+    std::tie(Penalty, Exceeded) = breakProtrudingToken(
+        Current, State, AllowBreak, /*DryRun=*/true, Strict);
+    if (Exceeded) {
+      // If non-strict reflowing exceeds the column limit, try whether strict
+      // reflowing leads to an overall lower penalty.
+      LineState StrictState = OriginalState;
+      unsigned StrictPenalty =
+          breakProtrudingToken(Current, StrictState, AllowBreak,
+                               /*DryRun=*/true, /*Strict=*/true)
+              .first;
+      Strict = StrictPenalty <= Penalty;
+      if (Strict) {
+        Penalty = StrictPenalty;
+        State = StrictState;
+      }
+    }
+    if (!DryRun) {
+      // If we're not in dry-run mode, apply the changes with the decision on
+      // strictness made above.
+      breakProtrudingToken(Current, OriginalState, AllowBreak, /*DryRun=*/false,
+                           Strict);
+    }
   }
   if (State.Column > getColumnLimit(State)) {
     unsigned ExcessCharacters = State.Column - getColumnLimit(State);
@@ -1480,14 +1509,14 @@ std::unique_ptr<BreakableToken> Continua
   return nullptr;
 }
 
-unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
-                                                    LineState &State,
-                                                    bool AllowBreak,
-                                                    bool DryRun) {
+std::pair<unsigned, bool>
+ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
+                                           LineState &State, bool AllowBreak,
+                                           bool DryRun, bool Strict) {
   std::unique_ptr<const BreakableToken> Token =
       createBreakableToken(Current, State, AllowBreak);
   if (!Token)
-    return 0;
+    return {0, false};
   assert(Token->getLineCount() > 0);
   unsigned ColumnLimit = getColumnLimit(State);
   if (Current.is(TT_LineComment)) {
@@ -1495,13 +1524,16 @@ unsigned ContinuationIndenter::breakProt
     ColumnLimit = Style.ColumnLimit;
   }
   if (Current.UnbreakableTailLength >= ColumnLimit)
-    return 0;
+    return {0, false};
   // ColumnWidth was already accounted into State.Column before calling
   // breakProtrudingToken.
   unsigned StartColumn = State.Column - Current.ColumnWidth;
   unsigned NewBreakPenalty = Current.isStringLiteral()
                                  ? Style.PenaltyBreakString
                                  : Style.PenaltyBreakComment;
+  // Stores whether we intentionally decide to let a line exceed the column
+  // limit.
+  bool Exceeded = false;
   // Stores whether we introduce a break anywhere in the token.
   bool BreakInserted = Token->introducesBreakBeforeToken();
   // Store whether we inserted a new line break at the end of the previous
@@ -1612,7 +1644,7 @@ unsigned ContinuationIndenter::breakProt
         bool ContinueOnLine =
             ContentStartColumn + ToNextSplitColumns <= ColumnLimit;
         unsigned ExcessCharactersPenalty = 0;
-        if (!ContinueOnLine) {
+        if (!ContinueOnLine && !Strict) {
           // Similarly, if the excess characters' penalty is lower than the
           // penalty of introducing a new break, continue on the current line.
           ExcessCharactersPenalty =
@@ -1621,8 +1653,10 @@ unsigned ContinuationIndenter::breakProt
           DEBUG(llvm::dbgs()
                 << "    Penalty excess: " << ExcessCharactersPenalty
                 << "\n            break : " << NewBreakPenalty << "\n");
-          if (ExcessCharactersPenalty < NewBreakPenalty)
+          if (ExcessCharactersPenalty < NewBreakPenalty) {
+            Exceeded = true;
             ContinueOnLine = true;
+          }
         }
         if (ContinueOnLine) {
           DEBUG(llvm::dbgs() << "    Continuing on line...\n");
@@ -1817,7 +1851,7 @@ unsigned ContinuationIndenter::breakProt
 
   Token->updateNextToken(State);
 
-  return Penalty;
+  return {Penalty, Exceeded};
 }
 
 unsigned ContinuationIndenter::getColumnLimit(const LineState &State) const {

Modified: cfe/trunk/lib/Format/ContinuationIndenter.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/ContinuationIndenter.h?rev=319541&r1=319540&r2=319541&view=diff
==============================================================================
--- cfe/trunk/lib/Format/ContinuationIndenter.h (original)
+++ cfe/trunk/lib/Format/ContinuationIndenter.h Fri Dec  1 05:28:08 2017
@@ -123,14 +123,25 @@ private:
   /// \brief If the current token sticks out over the end of the line, break
   /// it if possible.
   ///
-  /// \returns An extra penalty if a token was broken, otherwise 0.
+  /// \returns A pair (penalty, exceeded), where penalty is the extra penalty
+  /// when tokens are broken or lines exceed the column limit, and exceeded
+  /// indicates whether the algorithm purposefully left lines exceeding the
+  /// column limit.
   ///
   /// The returned penalty will cover the cost of the additional line breaks
   /// and column limit violation in all lines except for the last one. The
   /// penalty for the column limit violation in the last line (and in single
   /// line tokens) is handled in \c addNextStateToQueue.
-  unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
-                                bool AllowBreak, bool DryRun);
+  ///
+  /// \p Strict indicates whether reflowing is allowed to leave characters
+  /// protruding the column limit; if true, lines will be split strictly within
+  /// the column limit where possible; if false, words are allowed to protrude
+  /// over the column limit as long as the penalty is less than the penalty
+  /// of a break.
+  std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current,
+                                                 LineState &State,
+                                                 bool AllowBreak, bool DryRun,
+                                                 bool Strict);
 
   /// \brief Returns the \c BreakableToken starting at \p Current, or nullptr
   /// if the current token cannot be broken.

Modified: cfe/trunk/unittests/Format/FormatTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=319541&r1=319540&r2=319541&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTest.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTest.cpp Fri Dec  1 05:28:08 2017
@@ -9934,8 +9934,8 @@ TEST_F(FormatTest, OptimizeBreakPenaltyV
   Style.PenaltyExcessCharacter = 90;
   verifyFormat("int a; // the comment", Style);
   EXPECT_EQ("int a; // the comment\n"
-            "       // aa",
-            format("int a; // the comment aa", Style));
+            "       // aaa",
+            format("int a; // the comment aaa", Style));
   EXPECT_EQ("int a; /* first line\n"
             "        * second line\n"
             "        * third line\n"
@@ -9963,14 +9963,14 @@ TEST_F(FormatTest, OptimizeBreakPenaltyV
                    Style));
 
   EXPECT_EQ("// foo bar baz bazfoo\n"
-            "// foo bar\n",
+            "// foo bar foo bar\n",
             format("// foo bar baz bazfoo\n"
-                   "// foo            bar\n",
+                   "// foo bar foo           bar\n",
                    Style));
   EXPECT_EQ("// foo bar baz bazfoo\n"
-            "// foo bar\n",
+            "// foo bar foo bar\n",
             format("// foo bar baz      bazfoo\n"
-                   "// foo            bar\n",
+                   "// foo            bar foo bar\n",
                    Style));
 
   // FIXME: Optimally, we'd keep bazfoo on the first line and reflow bar to the
@@ -9996,6 +9996,20 @@ TEST_F(FormatTest, OptimizeBreakPenaltyV
                    "// foo bar baz      bazfoo bar\n"
                    "// foo           bar\n",
                    Style));
+
+  // Make sure we do not keep protruding characters if strict mode reflow is
+  // cheaper than keeping protruding characters.
+  Style.ColumnLimit = 21;
+  EXPECT_EQ("// foo foo foo foo\n"
+            "// foo foo foo foo\n"
+            "// foo foo foo foo\n",
+            format("// foo foo foo foo foo foo foo foo foo foo foo foo\n",
+                           Style));
+
+  EXPECT_EQ("int a = /* long block\n"
+            "           comment */\n"
+            "    42;",
+            format("int a = /* long block comment */ 42;", Style));
 }
 
 #define EXPECT_ALL_STYLES_EQUAL(Styles)                                        \




More information about the cfe-commits mailing list