r318141 - Refactor ContinuationIndenter's breakProtrudingToken logic.

Manuel Klimek via cfe-commits cfe-commits at lists.llvm.org
Tue Nov 14 01:19:53 PST 2017


Author: klimek
Date: Tue Nov 14 01:19:53 2017
New Revision: 318141

URL: http://llvm.org/viewvc/llvm-project?rev=318141&view=rev
Log:
Refactor ContinuationIndenter's breakProtrudingToken logic.

Create more orthogonal pieces. The restructuring made it easy to try out
several alternatives to D33589, and while none of the alternatives
turned out to be the right solution, the underlying simplification of
the structure is helpful.

Differential Revision: https://reviews.llvm.org/D39900

Modified:
    cfe/trunk/lib/Format/BreakableToken.cpp
    cfe/trunk/lib/Format/ContinuationIndenter.cpp
    cfe/trunk/lib/Format/ContinuationIndenter.h
    cfe/trunk/unittests/Format/FormatTest.cpp
    cfe/trunk/unittests/Format/FormatTestComments.cpp

Modified: cfe/trunk/lib/Format/BreakableToken.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/BreakableToken.cpp?rev=318141&r1=318140&r2=318141&view=diff
==============================================================================
--- cfe/trunk/lib/Format/BreakableToken.cpp (original)
+++ cfe/trunk/lib/Format/BreakableToken.cpp Tue Nov 14 01:19:53 2017
@@ -749,6 +749,7 @@ BreakableLineCommentSection::BreakableLi
     Prefix.resize(Lines.size());
     OriginalPrefix.resize(Lines.size());
     for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) {
+      Lines[i] = Lines[i].ltrim(Blanks);
       // We need to trim the blanks in case this is not the first line in a
       // multiline comment. Then the indent is included in Lines[i].
       StringRef IndentPrefix =

Modified: cfe/trunk/lib/Format/ContinuationIndenter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/ContinuationIndenter.cpp?rev=318141&r1=318140&r2=318141&view=diff
==============================================================================
--- cfe/trunk/lib/Format/ContinuationIndenter.cpp (original)
+++ cfe/trunk/lib/Format/ContinuationIndenter.cpp Tue Nov 14 01:19:53 2017
@@ -1008,8 +1008,8 @@ unsigned ContinuationIndenter::moveState
 
   moveStatePastFakeLParens(State, Newline);
   moveStatePastScopeCloser(State);
-  bool CanBreakProtrudingToken = !State.Stack.back().NoLineBreak &&
-                                 !State.Stack.back().NoLineBreakInOperand;
+  bool AllowBreak = !State.Stack.back().NoLineBreak &&
+                    !State.Stack.back().NoLineBreakInOperand;
   moveStatePastScopeOpener(State, Newline);
   moveStatePastFakeRParens(State);
 
@@ -1023,13 +1023,9 @@ unsigned ContinuationIndenter::moveState
 
   State.Column += Current.ColumnWidth;
   State.NextToken = State.NextToken->Next;
-  unsigned Penalty = 0;
-  if (CanBreakProtrudingToken)
-    Penalty = breakProtrudingToken(Current, State, DryRun);
-  if (State.Column > getColumnLimit(State)) {
-    unsigned ExcessCharacters = State.Column - getColumnLimit(State);
-    Penalty += Style.PenaltyExcessCharacter * ExcessCharacters;
-  }
+
+  unsigned Penalty =
+      handleEndOfLine(Current, State, DryRun, AllowBreak);
 
   if (Current.Role)
     Current.Role->formatFromToken(State, this, DryRun);
@@ -1282,8 +1278,10 @@ static unsigned getLastLineEndColumn(Str
 }
 
 unsigned ContinuationIndenter::reformatRawStringLiteral(
-    const FormatToken &Current, unsigned StartColumn, LineState &State,
-    StringRef Delimiter, const FormatStyle &RawStringStyle, bool DryRun) {
+    const FormatToken &Current, LineState &State,
+    const FormatStyle &RawStringStyle, bool DryRun) {
+  unsigned StartColumn = State.Column - Current.ColumnWidth;
+  auto Delimiter = *getRawStringDelimiter(Current.TokenText);
   // The text of a raw string is between the leading 'R"delimiter(' and the
   // trailing 'delimiter)"'.
   unsigned PrefixSize = 3 + Delimiter.size();
@@ -1353,9 +1351,6 @@ unsigned ContinuationIndenter::reformatR
 
 unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current,
                                                  LineState &State) {
-  if (!Current.IsMultiline)
-    return 0;
-
   // Break before further function parameters on all levels.
   for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
     State.Stack[i].BreakBeforeParameter = true;
@@ -1370,42 +1365,56 @@ unsigned ContinuationIndenter::addMultil
   return 0;
 }
 
-unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
-                                                    LineState &State,
-                                                    bool DryRun) {
+unsigned ContinuationIndenter::handleEndOfLine(const FormatToken &Current,
+                                               LineState &State, bool DryRun,
+                                               bool AllowBreak) {
+  unsigned Penalty = 0;
   // Compute the raw string style to use in case this is a raw string literal
   // that can be reformatted.
-  llvm::Optional<StringRef> Delimiter = None;
-  llvm::Optional<FormatStyle> RawStringStyle = None;
-  if (Current.isStringLiteral())
-    Delimiter = getRawStringDelimiter(Current.TokenText);
-  if (Delimiter)
-    RawStringStyle = RawStringFormats.get(*Delimiter);
-
-  // Don't break multi-line tokens other than block comments and raw string
-  // literals. Instead, just update the state.
-  if (Current.isNot(TT_BlockComment) && !RawStringStyle && Current.IsMultiline)
-    return addMultilineToken(Current, State);
-
-  // Don't break implicit string literals or import statements.
-  if (Current.is(TT_ImplicitStringLiteral) ||
-      State.Line->Type == LT_ImportStatement)
-    return 0;
+  auto RawStringStyle = getRawStringStyle(Current, State);
+  if (RawStringStyle) {
+    Penalty = reformatRawStringLiteral(Current, State, *RawStringStyle, DryRun);
+  } else if (Current.IsMultiline && Current.isNot(TT_BlockComment)) {
+    // Don't break multi-line tokens other than block comments and raw string
+    // literals. Instead, just update the state.
+    Penalty = addMultilineToken(Current, State);
+  } else if (State.Line->Type != LT_ImportStatement) {
+    // We generally don't break import statements.
+    Penalty = breakProtrudingToken(Current, State, AllowBreak, DryRun);
+  }
+  if (State.Column > getColumnLimit(State)) {
+    unsigned ExcessCharacters = State.Column - getColumnLimit(State);
+    Penalty += Style.PenaltyExcessCharacter * ExcessCharacters;
+  }
+  return Penalty;
+}
 
-  if (!Current.isStringLiteral() && !Current.is(tok::comment))
-    return 0;
+llvm::Optional<FormatStyle>
+ContinuationIndenter::getRawStringStyle(const FormatToken &Current,
+                                        const LineState &State) {
+  if (!Current.isStringLiteral())
+    return None;
+  auto Delimiter = getRawStringDelimiter(Current.TokenText);
+  if (!Delimiter)
+    return None;
+  auto RawStringStyle = RawStringFormats.get(*Delimiter);
+  if (!RawStringStyle)
+    return None;
+  RawStringStyle->ColumnLimit = getColumnLimit(State);
+  return RawStringStyle;
+}
 
-  std::unique_ptr<BreakableToken> Token;
+std::unique_ptr<BreakableToken> ContinuationIndenter::createBreakableToken(
+    const FormatToken &Current, LineState &State, bool AllowBreak) {
   unsigned StartColumn = State.Column - Current.ColumnWidth;
-  unsigned ColumnLimit = getColumnLimit(State);
-
   if (Current.isStringLiteral()) {
     // FIXME: String literal breaking is currently disabled for Java and JS, as
     // it requires strings to be merged using "+" which we don't support.
     if (Style.Language == FormatStyle::LK_Java ||
         Style.Language == FormatStyle::LK_JavaScript ||
-        !Style.BreakStringLiterals)
-      return 0;
+        !Style.BreakStringLiterals ||
+        !AllowBreak)
+      return nullptr;
 
     // Don't break string literals inside preprocessor directives (except for
     // #define directives, as their contents are stored in separate lines and
@@ -1413,17 +1422,12 @@ unsigned ContinuationIndenter::breakProt
     // This way we avoid breaking code with line directives and unknown
     // preprocessor directives that contain long string literals.
     if (State.Line->Type == LT_PreprocessorDirective)
-      return 0;
+      return nullptr;
     // Exempts unterminated string literals from line breaking. The user will
     // likely want to terminate the string before any line breaking is done.
     if (Current.IsUnterminatedLiteral)
-      return 0;
+      return nullptr;
 
-    if (RawStringStyle) {
-      RawStringStyle->ColumnLimit = ColumnLimit;
-      return reformatRawStringLiteral(Current, StartColumn, State, *Delimiter,
-                                      *RawStringStyle, DryRun);
-    } 
     StringRef Text = Current.TokenText;
     StringRef Prefix;
     StringRef Postfix;
@@ -1437,36 +1441,48 @@ unsigned ContinuationIndenter::breakProt
           Text.startswith(Prefix = "u8\"") ||
           Text.startswith(Prefix = "L\""))) ||
         (Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")"))) {
-      Token.reset(new BreakableStringLiteral(Current, StartColumn, Prefix,
-                                             Postfix, State.Line->InPPDirective,
-                                             Encoding, Style));
-    } else {
-      return 0;
+      return llvm::make_unique<BreakableStringLiteral>(
+          Current, StartColumn, Prefix, Postfix, State.Line->InPPDirective,
+          Encoding, Style);
     }
   } else if (Current.is(TT_BlockComment)) {
     if (!Style.ReflowComments ||
         // If a comment token switches formatting, like
         // /* clang-format on */, we don't want to break it further,
         // but we may still want to adjust its indentation.
-        switchesFormatting(Current))
-      return addMultilineToken(Current, State);
-    Token.reset(new BreakableBlockComment(
+        switchesFormatting(Current)) {
+      return nullptr;
+    }
+    return llvm::make_unique<BreakableBlockComment>(
         Current, StartColumn, Current.OriginalColumn, !Current.Previous,
-        State.Line->InPPDirective, Encoding, Style));
+        State.Line->InPPDirective, Encoding, Style);
   } else if (Current.is(TT_LineComment) &&
              (Current.Previous == nullptr ||
               Current.Previous->isNot(TT_ImplicitStringLiteral))) {
     if (!Style.ReflowComments ||
         CommentPragmasRegex.match(Current.TokenText.substr(2)) ||
         switchesFormatting(Current))
-      return 0;
-    Token.reset(new BreakableLineCommentSection(
+      return nullptr;
+    return llvm::make_unique<BreakableLineCommentSection>(
         Current, StartColumn, Current.OriginalColumn, !Current.Previous,
-        /*InPPDirective=*/false, Encoding, Style));
+        /*InPPDirective=*/false, Encoding, Style);
+  }
+  return nullptr;
+}
+
+unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
+                                                    LineState &State,
+                                                    bool AllowBreak,
+                                                    bool DryRun) {
+  std::unique_ptr<BreakableToken> Token =
+      createBreakableToken(Current, State, AllowBreak);
+  if (!Token)
+    return 0;
+  unsigned ColumnLimit = getColumnLimit(State);
+  unsigned StartColumn = State.Column - Current.ColumnWidth;
+  if (Current.is(TT_LineComment)) {
     // We don't insert backslashes when breaking line comments.
     ColumnLimit = Style.ColumnLimit;
-  } else {
-    return 0;
   }
   if (Current.UnbreakableTailLength >= ColumnLimit)
     return 0;

Modified: cfe/trunk/lib/Format/ContinuationIndenter.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/ContinuationIndenter.h?rev=318141&r1=318140&r2=318141&view=diff
==============================================================================
--- cfe/trunk/lib/Format/ContinuationIndenter.h (original)
+++ cfe/trunk/lib/Format/ContinuationIndenter.h Tue Nov 14 01:19:53 2017
@@ -29,6 +29,7 @@ class SourceManager;
 namespace format {
 
 class AnnotatedLine;
+class BreakableToken;
 struct FormatToken;
 struct LineState;
 struct ParenState;
@@ -105,11 +106,20 @@ private:
   /// 
   /// \returns An extra penalty induced by reformatting the token.
   unsigned reformatRawStringLiteral(const FormatToken &Current,
-                                    unsigned StartColumn, LineState &State,
-                                    StringRef Delimiter,
+                                    LineState &State,
                                     const FormatStyle &RawStringStyle,
                                     bool DryRun);
 
+  /// \brief If the current token is at the end of the current line, handle
+  /// the transition to the next line.
+  unsigned handleEndOfLine(const FormatToken &Current, LineState &State,
+                           bool DryRun, bool AllowBreak);
+
+  /// \brief If \p Current is a raw string that is configured to be reformatted,
+  /// return the style to be used.
+  llvm::Optional<FormatStyle> getRawStringStyle(const FormatToken &Current,
+                                                const LineState &State);
+
   /// \brief If the current token sticks out over the end of the line, break
   /// it if possible.
   ///
@@ -120,7 +130,13 @@ private:
   /// penalty for the column limit violation in the last line (and in single
   /// line tokens) is handled in \c addNextStateToQueue.
   unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
-                                bool DryRun);
+                                bool AllowBreak, bool DryRun);
+
+  /// \brief Returns the \c BreakableToken starting at \p Current, or nullptr
+  /// if the current token cannot be broken.
+  std::unique_ptr<BreakableToken>
+  createBreakableToken(const FormatToken &Current, LineState &State,
+                       bool AllowBreak);
 
   /// \brief Appends the next token to \p State and updates information
   /// necessary for indentation.

Modified: cfe/trunk/unittests/Format/FormatTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=318141&r1=318140&r2=318141&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTest.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTest.cpp Tue Nov 14 01:19:53 2017
@@ -6319,7 +6319,8 @@ TEST_F(FormatTest, HandlesIncludeDirecti
                "#include_next <test.h>"
                "#include \"abc.h\" // this is included for ABC\n"
                "#include \"some long include\" // with a comment\n"
-               "#include \"some very long include paaaaaaaaaaaaaaaaaaaaaaath\"",
+               "#include \"some very long include path\"\n"
+               "#include <some/very/long/include/path>\n",
                getLLVMStyleWithColumns(35));
   EXPECT_EQ("#include \"a.h\"", format("#include  \"a.h\""));
   EXPECT_EQ("#include <a>", format("#include<a>"));

Modified: cfe/trunk/unittests/Format/FormatTestComments.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTestComments.cpp?rev=318141&r1=318140&r2=318141&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTestComments.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTestComments.cpp Tue Nov 14 01:19:53 2017
@@ -686,8 +686,7 @@ TEST_F(FormatTestComments, SplitsLongCxx
   EXPECT_EQ("{\n"
             "  //\n"
             "  //\\\n"
-            "  // long 1 2 3 4\n"
-            "  // 5\n"
+            "  // long 1 2 3 4 5\n"
             "}",
             format("{\n"
                    "  //\n"
@@ -695,6 +694,18 @@ TEST_F(FormatTestComments, SplitsLongCxx
                    "  // long 1 2 3 4 5\n"
                    "}",
                    getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("{\n"
+            "  //\n"
+            "  //\\\n"
+            "  // long 1 2 3 4 5\n"
+            "  // 6\n"
+            "}",
+            format("{\n"
+                   "  //\n"
+                   "  //\\\n"
+                   "  // long 1 2 3 4 5 6\n"
+                   "}",
+                   getLLVMStyleWithColumns(20)));
 }
 
 TEST_F(FormatTestComments, PreservesHangingIndentInCxxComments) {




More information about the cfe-commits mailing list