r319314 - Restructure how we break tokens.

Wed Nov 29 06:29:43 PST 2017

Author: klimek
Date: Wed Nov 29 06:29:43 2017
New Revision: 319314

URL: http://llvm.org/viewvc/llvm-project?rev=319314&view=rev
Log:
Restructure how we break tokens.

This fixes some bugs in the reflowing logic and splits out the concerns
of reflowing from BreakableToken.

Things to do after this patch:
- Refactor the breakProtrudingToken function possibly into a class, so we
  can split it up into methods that operate on the common state.
- Optimize whitespace compression when reflowing by using the next possible
  split point instead of the latest possible split point.
- Retry different strategies for reflowing (strictly staying below the
  column limit vs. allowing excess characters if possible).

Differential Revision: https://reviews.llvm.org/D40310

Modified:
    cfe/trunk/lib/Format/BreakableToken.cpp
    cfe/trunk/lib/Format/BreakableToken.h
    cfe/trunk/lib/Format/ContinuationIndenter.cpp
    cfe/trunk/unittests/Format/FormatTest.cpp
    cfe/trunk/unittests/Format/FormatTestComments.cpp

Modified: cfe/trunk/lib/Format/BreakableToken.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/BreakableToken.cpp?rev=319314&r1=319313&r2=319314&view=diff
==============================================================================

--- cfe/trunk/lib/Format/BreakableToken.cpp (original)
+++ cfe/trunk/lib/Format/BreakableToken.cpp Wed Nov 29 06:29:43 2017
@@ -67,6 +67,8 @@ static BreakableToken::Split getCommentS
                                              unsigned ColumnLimit,
                                              unsigned TabWidth,
                                              encoding::Encoding Encoding) {
+  DEBUG(llvm::dbgs() << "Comment split: \"" << Text << ", " << ColumnLimit
+                     << "\", Content start: " << ContentStartColumn << "\n");
   if (ColumnLimit <= ContentStartColumn + 1)
     return BreakableToken::Split(StringRef::npos, 0);
 
@@ -171,7 +173,7 @@ bool switchesFormatting(const FormatToke
 }
 
 unsigned
-BreakableToken::getLineLengthAfterCompression(unsigned RemainingTokenColumns,
+BreakableToken::getLengthAfterCompression(unsigned RemainingTokenColumns,
                                               Split Split) const {
   // Example: consider the content
   // lala  lala
@@ -181,50 +183,58 @@ BreakableToken::getLineLengthAfterCompre
   // We compute the number of columns when the split is compressed into a single
   // space, like:
   // lala lala
+  //
+  // FIXME: Correctly measure the length of whitespace in Split.second so it
+  // works with tabs.
   return RemainingTokenColumns + 1 - Split.second;
 }
 
-unsigned BreakableSingleLineToken::getLineCount() const { return 1; }
+unsigned BreakableStringLiteral::getLineCount() const { return 1; }
 
-unsigned BreakableSingleLineToken::getLineLengthAfterSplit(
-    unsigned LineIndex, unsigned TailOffset,
-    StringRef::size_type Length) const {
-  return StartColumn + Prefix.size() + Postfix.size() +
-         encoding::columnWidthWithTabs(Line.substr(TailOffset, Length),
-                                       StartColumn + Prefix.size(),
-                                       Style.TabWidth, Encoding);
+unsigned BreakableStringLiteral::getRangeLength(unsigned LineIndex,
+                                                unsigned Offset,
+                                                StringRef::size_type Length,
+                                                unsigned StartColumn) const {
+  assert(false &&
+         "Getting the length of a part of the string literal indicates that "
+         "the code tries to reflow it.");
 }
 
-BreakableSingleLineToken::BreakableSingleLineToken(
+unsigned
+BreakableStringLiteral::getRemainingLength(unsigned LineIndex, unsigned Offset,
+                                           unsigned StartColumn) const {
+  return UnbreakableTailLength + Postfix.size() +
+         encoding::columnWidthWithTabs(Line.substr(Offset, StringRef::npos),
+                                       StartColumn, Style.TabWidth, Encoding);
+}
+
+unsigned BreakableStringLiteral::getContentStartColumn(unsigned LineIndex,
+                                                       bool Break) const {
+  return StartColumn + Prefix.size();
+}
+
+BreakableStringLiteral::BreakableStringLiteral(
     const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
     StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding,
     const FormatStyle &Style)
     : BreakableToken(Tok, InPPDirective, Encoding, Style),
-      StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) {
+      StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix),
+      UnbreakableTailLength(Tok.UnbreakableTailLength) {
   assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));
   Line = Tok.TokenText.substr(
       Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
 }
 
-BreakableStringLiteral::BreakableStringLiteral(
-    const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
-    StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding,
-    const FormatStyle &Style)
-    : BreakableSingleLineToken(Tok, StartColumn, Prefix, Postfix, InPPDirective,
-                               Encoding, Style) {}
-
-BreakableToken::Split
-BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
-                                 unsigned ColumnLimit,
-                                 llvm::Regex &CommentPragmasRegex) const {
-  return getStringSplit(Line.substr(TailOffset),
-                        StartColumn + Prefix.size() + Postfix.size(),
-                        ColumnLimit, Style.TabWidth, Encoding);
+BreakableToken::Split BreakableStringLiteral::getSplit(
+    unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
+    unsigned ContentStartColumn, llvm::Regex &CommentPragmasRegex) const {
+  return getStringSplit(Line.substr(TailOffset), ContentStartColumn,
+                        ColumnLimit - Postfix.size(), Style.TabWidth, Encoding);
 }
 
 void BreakableStringLiteral::insertBreak(unsigned LineIndex,
                                          unsigned TailOffset, Split Split,
-                                         WhitespaceManager &Whitespaces) {
+                                         WhitespaceManager &Whitespaces) const {
   Whitespaces.replaceWhitespaceInToken(
       Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix,
       Prefix, InPPDirective, 1, StartColumn);
@@ -241,19 +251,19 @@ unsigned BreakableComment::getLineCount(
 
 BreakableToken::Split
 BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset,
-                           unsigned ColumnLimit,
+                           unsigned ColumnLimit, unsigned ContentStartColumn,
                            llvm::Regex &CommentPragmasRegex) const {
   // Don't break lines matching the comment pragmas regex.
   if (CommentPragmasRegex.match(Content[LineIndex]))
     return Split(StringRef::npos, 0);
   return getCommentSplit(Content[LineIndex].substr(TailOffset),
-                         getContentStartColumn(LineIndex, TailOffset),
-                         ColumnLimit, Style.TabWidth, Encoding);
+                         ContentStartColumn, ColumnLimit, Style.TabWidth,
+                         Encoding);
 }
 
-void BreakableComment::compressWhitespace(unsigned LineIndex,
-                                          unsigned TailOffset, Split Split,
-                                          WhitespaceManager &Whitespaces) {
+void BreakableComment::compressWhitespace(
+    unsigned LineIndex, unsigned TailOffset, Split Split,
+    WhitespaceManager &Whitespaces) const {
   StringRef Text = Content[LineIndex].substr(TailOffset);
   // Text is relative to the content line, but Whitespaces operates relative to
   // the start of the corresponding token, so compute the start of the Split
@@ -267,44 +277,6 @@ void BreakableComment::compressWhitespac
       /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1);
 }
 
-BreakableToken::Split
-BreakableComment::getReflowSplit(StringRef Text, StringRef ReflowPrefix,
-                                 unsigned PreviousEndColumn,
-                                 unsigned ColumnLimit) const {
-  unsigned ReflowStartColumn = PreviousEndColumn + ReflowPrefix.size();
-  StringRef TrimmedText = Text.rtrim(Blanks);
-  // This is the width of the resulting line in case the full line of Text gets
-  // reflown up starting at ReflowStartColumn.
-  unsigned FullWidth = ReflowStartColumn + encoding::columnWidthWithTabs(
-                                               TrimmedText, ReflowStartColumn,
-                                               Style.TabWidth, Encoding);
-  // If the full line fits up, we return a reflow split after it,
-  // otherwise we compute the largest piece of text that fits after
-  // ReflowStartColumn.
-  Split ReflowSplit =
-      FullWidth <= ColumnLimit
-          ? Split(TrimmedText.size(), Text.size() - TrimmedText.size())
-          : getCommentSplit(Text, ReflowStartColumn, ColumnLimit,
-                            Style.TabWidth, Encoding);
-
-  // We need to be extra careful here, because while it's OK to keep a long line
-  // if it can't be broken into smaller pieces (like when the first word of a
-  // long line is longer than the column limit), it's not OK to reflow that long
-  // word up. So we recompute the size of the previous line after reflowing and
-  // only return the reflow split if that's under the line limit.
-  if (ReflowSplit.first != StringRef::npos &&
-      // Check if the width of the newly reflown line is under the limit.
-      PreviousEndColumn + ReflowPrefix.size() +
-              encoding::columnWidthWithTabs(Text.substr(0, ReflowSplit.first),
-                                            PreviousEndColumn +
-                                                ReflowPrefix.size(),
-                                            Style.TabWidth, Encoding) <=
-          ColumnLimit) {
-    return ReflowSplit;
-  }
-  return Split(StringRef::npos, 0);
-}
-
 const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const {
   return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok;
 }
@@ -501,30 +473,44 @@ void BreakableBlockComment::adjustWhites
       IndentDelta;
 }
 
-unsigned BreakableBlockComment::getLineLengthAfterSplit(
-    unsigned LineIndex, unsigned TailOffset,
-    StringRef::size_type Length) const {
-  unsigned ContentStartColumn = getContentStartColumn(LineIndex, TailOffset);
+unsigned BreakableBlockComment::getRangeLength(unsigned LineIndex,
+                                               unsigned Offset,
+                                               StringRef::size_type Length,
+                                               unsigned StartColumn) const {
   unsigned LineLength =
-      ContentStartColumn + encoding::columnWidthWithTabs(
-                               Content[LineIndex].substr(TailOffset, Length),
-                               ContentStartColumn, Style.TabWidth, Encoding);
+      encoding::columnWidthWithTabs(Content[LineIndex].substr(Offset, Length),
+                                    StartColumn, Style.TabWidth, Encoding);
+  // FIXME: This should go into getRemainingLength instead, but we currently
+  // break tests when putting it there. Investigate how to fix those tests.
   // The last line gets a "*/" postfix.
   if (LineIndex + 1 == Lines.size()) {
     LineLength += 2;
     // We never need a decoration when breaking just the trailing "*/" postfix.
     // Note that checking that Length == 0 is not enough, since Length could
     // also be StringRef::npos.
-    if (Content[LineIndex].substr(TailOffset, Length).empty()) {
+    if (Content[LineIndex].substr(Offset, StringRef::npos).empty()) {
       LineLength -= Decoration.size();
     }
   }
   return LineLength;
 }
 
+unsigned BreakableBlockComment::getRemainingLength(unsigned LineIndex,
+                                                   unsigned Offset,
+                                                   unsigned StartColumn) const {
+  return getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn);
+}
+
+unsigned BreakableBlockComment::getContentStartColumn(unsigned LineIndex,
+                                                      bool Break) const {
+  if (Break)
+    return IndentAtLineBreak;
+  return std::max(0, ContentColumn[LineIndex]);
+}
+
 void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
                                         Split Split,
-                                        WhitespaceManager &Whitespaces) {
+                                        WhitespaceManager &Whitespaces) const {
   StringRef Text = Content[LineIndex].substr(TailOffset);
   StringRef Prefix = Decoration;
   // We need this to account for the case when we have a decoration "* " for all
@@ -550,59 +536,14 @@ void BreakableBlockComment::insertBreak(
       /*Spaces=*/LocalIndentAtLineBreak - Prefix.size());
 }
 
-BreakableToken::Split BreakableBlockComment::getSplitBefore(
-    unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,
-    llvm::Regex &CommentPragmasRegex) const {
+BreakableToken::Split
+BreakableBlockComment::getReflowSplit(unsigned LineIndex,
+                                      llvm::Regex &CommentPragmasRegex) const {
   if (!mayReflow(LineIndex, CommentPragmasRegex))
     return Split(StringRef::npos, 0);
-  StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
-  Split Result = getReflowSplit(TrimmedContent, ReflowPrefix, PreviousEndColumn,
-                                ColumnLimit);
-  // Result is relative to TrimmedContent. Adapt it relative to
-  // Content[LineIndex].
-  if (Result.first != StringRef::npos)
-    Result.first += Content[LineIndex].size() - TrimmedContent.size();
-  return Result;
-}
 
-unsigned
-BreakableBlockComment::getReflownColumn(StringRef Content, unsigned LineIndex,
-                                        unsigned PreviousEndColumn) const {
-  unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size();
-  // If this is the last line, it will carry around its '*/' postfix.
-  unsigned PostfixLength = (LineIndex + 1 == Lines.size() ? 2 : 0);
-  // The line is composed of previous text, reflow prefix, reflown text and
-  // postfix.
-  unsigned ReflownColumn = StartColumn +
-                           encoding::columnWidthWithTabs(
-                               Content, StartColumn, Style.TabWidth, Encoding) +
-                           PostfixLength;
-  return ReflownColumn;
-}
-
-unsigned BreakableBlockComment::getLineLengthAfterSplitBefore(
-    unsigned LineIndex, unsigned TailOffset, unsigned PreviousEndColumn,
-    unsigned ColumnLimit, Split SplitBefore) const {
-  if (SplitBefore.first == StringRef::npos ||
-      // Block comment line contents contain the trailing whitespace after the
-      // decoration, so the need of left trim. Note that this behavior is
-      // consistent with the breaking of block comments where the indentation of
-      // a broken line is uniform across all the lines of the block comment.
-      SplitBefore.first + SplitBefore.second <
-          Content[LineIndex].ltrim().size()) {
-    // A piece of line, not the whole, gets reflown.
-    return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
-  } else {
-    // The whole line gets reflown, need to check if we need to insert a break
-    // for the postfix or not.
-    StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
-    unsigned ReflownColumn =
-        getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn);
-    if (ReflownColumn <= ColumnLimit) {
-      return ReflownColumn;
-    }
-    return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
-  }
+  size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);
+  return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
 }
 
 bool BreakableBlockComment::introducesBreakBeforeToken() const {
@@ -611,49 +552,39 @@ bool BreakableBlockComment::introducesBr
          Lines[0].substr(1).find_first_not_of(Blanks) != StringRef::npos;
 }
 
-void BreakableBlockComment::replaceWhitespaceBefore(
-    unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,
-    Split SplitBefore, WhitespaceManager &Whitespaces) {
+void BreakableBlockComment::reflow(unsigned LineIndex,
+                                   WhitespaceManager &Whitespaces) const {
+  StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
+  // Here we need to reflow.
+  assert(Tokens[LineIndex - 1] == Tokens[LineIndex] &&
+         "Reflowing whitespace within a token");
+  // This is the offset of the end of the last line relative to the start of
+  // the token text in the token.
+  unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
+                                     Content[LineIndex - 1].size() -
+                                     tokenAt(LineIndex).TokenText.data();
+  unsigned WhitespaceLength = TrimmedContent.data() -
+                              tokenAt(LineIndex).TokenText.data() -
+                              WhitespaceOffsetInToken;
+  Whitespaces.replaceWhitespaceInToken(
+      tokenAt(LineIndex), WhitespaceOffsetInToken,
+      /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"",
+      /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0,
+      /*Spaces=*/0);
+}
+
+void BreakableBlockComment::adaptStartOfLine(
+    unsigned LineIndex, WhitespaceManager &Whitespaces) const {
   if (LineIndex == 0) {
     if (DelimitersOnNewline) {
-      // Since we're breaking af index 1 below, the break position and the
+      // Since we're breaking at index 1 below, the break position and the
       // break length are the same.
       size_t BreakLength = Lines[0].substr(1).find_first_not_of(Blanks);
-      if (BreakLength != StringRef::npos) {
+      if (BreakLength != StringRef::npos)
         insertBreak(LineIndex, 0, Split(1, BreakLength), Whitespaces);
-        DelimitersOnNewline = true;
-      }
     }
     return;
   }
-  StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
-  if (SplitBefore.first != StringRef::npos) {
-    // Here we need to reflow.
-    assert(Tokens[LineIndex - 1] == Tokens[LineIndex] &&
-           "Reflowing whitespace within a token");
-    // This is the offset of the end of the last line relative to the start of
-    // the token text in the token.
-    unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
-                                       Content[LineIndex - 1].size() -
-                                       tokenAt(LineIndex).TokenText.data();
-    unsigned WhitespaceLength = TrimmedContent.data() -
-                                tokenAt(LineIndex).TokenText.data() -
-                                WhitespaceOffsetInToken;
-    Whitespaces.replaceWhitespaceInToken(
-        tokenAt(LineIndex), WhitespaceOffsetInToken,
-        /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"",
-        /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0,
-        /*Spaces=*/0);
-    // Check if we need to also insert a break at the whitespace range.
-    // Note that we don't need a penalty for this break, since it doesn't change
-    // the total number of lines.
-    unsigned ReflownColumn =
-        getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn);
-    if (ReflownColumn > ColumnLimit)
-      insertBreak(LineIndex, 0, SplitBefore, Whitespaces);
-    return;
-  }
-
   // Here no reflow with the previous line will happen.
   // Fix the decoration of the line at LineIndex.
   StringRef Prefix = Decoration;
@@ -689,8 +620,7 @@ void BreakableBlockComment::replaceWhite
 }
 
 BreakableToken::Split
-BreakableBlockComment::getSplitAfterLastLine(unsigned TailOffset,
-                                             unsigned ColumnLimit) const {
+BreakableBlockComment::getSplitAfterLastLine(unsigned TailOffset) const {
   if (DelimitersOnNewline) {
     // Replace the trailing whitespace of the last line with a newline.
     // In case the last line is empty, the ending '*/' is already on its own
@@ -716,15 +646,6 @@ bool BreakableBlockComment::mayReflow(un
          !switchesFormatting(tokenAt(LineIndex));
 }
 
-unsigned
-BreakableBlockComment::getContentStartColumn(unsigned LineIndex,
-                                             unsigned TailOffset) const {
-  // If we break, we always break at the predefined indent.
-  if (TailOffset != 0)
-    return IndentAtLineBreak;
-  return std::max(0, ContentColumn[LineIndex]);
-}
-
 BreakableLineCommentSection::BreakableLineCommentSection(
     const FormatToken &Token, unsigned StartColumn,
     unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
@@ -813,20 +734,25 @@ BreakableLineCommentSection::BreakableLi
   }
 }
 
-unsigned BreakableLineCommentSection::getLineLengthAfterSplit(
-    unsigned LineIndex, unsigned TailOffset,
-    StringRef::size_type Length) const {
-  unsigned ContentStartColumn =
-      (TailOffset == 0 ? ContentColumn[LineIndex]
-                       : OriginalContentColumn[LineIndex]);
-  return ContentStartColumn + encoding::columnWidthWithTabs(
-                                  Content[LineIndex].substr(TailOffset, Length),
-                                  ContentStartColumn, Style.TabWidth, Encoding);
+unsigned
+BreakableLineCommentSection::getRangeLength(unsigned LineIndex, unsigned Offset,
+                                            StringRef::size_type Length,
+                                            unsigned StartColumn) const {
+  return encoding::columnWidthWithTabs(
+      Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth,
+      Encoding);
 }
 
-void BreakableLineCommentSection::insertBreak(unsigned LineIndex,
-                                              unsigned TailOffset, Split Split,
-                                              WhitespaceManager &Whitespaces) {
+unsigned BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex,
+                                                            bool Break) const {
+  if (Break)
+    return OriginalContentColumn[LineIndex];
+  return ContentColumn[LineIndex];
+}
+
+void BreakableLineCommentSection::insertBreak(
+    unsigned LineIndex, unsigned TailOffset, Split Split,
+    WhitespaceManager &Whitespaces) const {
   StringRef Text = Content[LineIndex].substr(TailOffset);
   // Compute the offset of the split relative to the beginning of the token
   // text.
@@ -845,34 +771,42 @@ void BreakableLineCommentSection::insert
       /*Spaces=*/IndentAtLineBreak - Prefix[LineIndex].size());
 }
 
-BreakableComment::Split BreakableLineCommentSection::getSplitBefore(
-    unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,
-    llvm::Regex &CommentPragmasRegex) const {
+BreakableComment::Split BreakableLineCommentSection::getReflowSplit(
+    unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const {
   if (!mayReflow(LineIndex, CommentPragmasRegex))
     return Split(StringRef::npos, 0);
-  return getReflowSplit(Content[LineIndex], ReflowPrefix, PreviousEndColumn,
-                        ColumnLimit);
-}
 
-unsigned BreakableLineCommentSection::getLineLengthAfterSplitBefore(
-    unsigned LineIndex, unsigned TailOffset, unsigned PreviousEndColumn,
-    unsigned ColumnLimit, Split SplitBefore) const {
-  if (SplitBefore.first == StringRef::npos ||
-      SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) {
-    // A piece of line, not the whole line, gets reflown.
-    return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
-  } else {
-    // The whole line gets reflown.
-    unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size();
-    return StartColumn +
-           encoding::columnWidthWithTabs(Content[LineIndex], StartColumn,
-                                         Style.TabWidth, Encoding);
-  }
+  size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);
+
+  // In a line comment section each line is a separate token; thus, after a
+  // split we replace all whitespace before the current line comment token
+  // (which does not need to be included in the split), plus the start of the
+  // line up to where the content starts.
+  return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
+}
+
+void BreakableLineCommentSection::reflow(unsigned LineIndex,
+                                         WhitespaceManager &Whitespaces) const {
+  // Reflow happens between tokens. Replace the whitespace between the
+  // tokens by the empty string.
+  Whitespaces.replaceWhitespace(
+      *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0,
+      /*StartOfTokenColumn=*/StartColumn, /*InPPDirective=*/false);
+  // Replace the indent and prefix of the token with the reflow prefix.
+  unsigned WhitespaceLength =
+      Content[LineIndex].data() - tokenAt(LineIndex).TokenText.data();
+  Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex],
+                                       /*Offset=*/0,
+                                       /*ReplaceChars=*/WhitespaceLength,
+                                       /*PreviousPostfix=*/"",
+                                       /*CurrentPrefix=*/ReflowPrefix,
+                                       /*InPPDirective=*/false,
+                                       /*Newlines=*/0,
+                                       /*Spaces=*/0);
 }
 
-void BreakableLineCommentSection::replaceWhitespaceBefore(
-    unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,
-    Split SplitBefore, WhitespaceManager &Whitespaces) {
+void BreakableLineCommentSection::adaptStartOfLine(
+    unsigned LineIndex, WhitespaceManager &Whitespaces) const {
   // If this is the first line of a token, we need to inform Whitespace Manager
   // about it: either adapt the whitespace range preceding it, or mark it as an
   // untouchable token.
@@ -880,44 +814,25 @@ void BreakableLineCommentSection::replac
   // // line 1 \
   // // line 2
   if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
-    if (SplitBefore.first != StringRef::npos) {
-      // Reflow happens between tokens. Replace the whitespace between the
-      // tokens by the empty string.
-      Whitespaces.replaceWhitespace(
-          *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0,
-          /*StartOfTokenColumn=*/StartColumn, /*InPPDirective=*/false);
-      // Replace the indent and prefix of the token with the reflow prefix.
-      unsigned WhitespaceLength =
-          Content[LineIndex].data() - tokenAt(LineIndex).TokenText.data();
-      Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex],
-                                           /*Offset=*/0,
-                                           /*ReplaceChars=*/WhitespaceLength,
-                                           /*PreviousPostfix=*/"",
-                                           /*CurrentPrefix=*/ReflowPrefix,
-                                           /*InPPDirective=*/false,
-                                           /*Newlines=*/0,
-                                           /*Spaces=*/0);
-    } else {
-      // This is the first line for the current token, but no reflow with the
-      // previous token is necessary. However, we still may need to adjust the
-      // start column. Note that ContentColumn[LineIndex] is the expected
-      // content column after a possible update to the prefix, hence the prefix
-      // length change is included.
-      unsigned LineColumn =
-          ContentColumn[LineIndex] -
-          (Content[LineIndex].data() - Lines[LineIndex].data()) +
-          (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size());
-
-      // We always want to create a replacement instead of adding an untouchable
-      // token, even if LineColumn is the same as the original column of the
-      // token. This is because WhitespaceManager doesn't align trailing
-      // comments if they are untouchable.
-      Whitespaces.replaceWhitespace(*Tokens[LineIndex],
-                                    /*Newlines=*/1,
-                                    /*Spaces=*/LineColumn,
-                                    /*StartOfTokenColumn=*/LineColumn,
-                                    /*InPPDirective=*/false);
-    }
+    // This is the first line for the current token, but no reflow with the
+    // previous token is necessary. However, we still may need to adjust the
+    // start column. Note that ContentColumn[LineIndex] is the expected
+    // content column after a possible update to the prefix, hence the prefix
+    // length change is included.
+    unsigned LineColumn =
+        ContentColumn[LineIndex] -
+        (Content[LineIndex].data() - Lines[LineIndex].data()) +
+        (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size());
+
+    // We always want to create a replacement instead of adding an untouchable
+    // token, even if LineColumn is the same as the original column of the
+    // token. This is because WhitespaceManager doesn't align trailing
+    // comments if they are untouchable.
+    Whitespaces.replaceWhitespace(*Tokens[LineIndex],
+                                  /*Newlines=*/1,
+                                  /*Spaces=*/LineColumn,
+                                  /*StartOfTokenColumn=*/LineColumn,
+                                  /*InPPDirective=*/false);
   }
   if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) {
     // Adjust the prefix if necessary.
@@ -930,13 +845,6 @@ void BreakableLineCommentSection::replac
         tokenAt(LineIndex), OriginalPrefix[LineIndex].size(), 0, "", "",
         /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1);
   }
-  // Add a break after a reflow split has been introduced, if necessary.
-  // Note that this break doesn't need to be penalized, since it doesn't change
-  // the number of lines.
-  if (SplitBefore.first != StringRef::npos &&
-      SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) {
-    insertBreak(LineIndex, 0, SplitBefore, Whitespaces);
-  }
 }
 
 void BreakableLineCommentSection::updateNextToken(LineState &State) const {
@@ -953,20 +861,17 @@ bool BreakableLineCommentSection::mayRef
   if (Lines[LineIndex].startswith("//")) {
     IndentContent = Lines[LineIndex].substr(2);
   }
+  // FIXME: Decide whether we want to reflow non-regular indents:
+  // Currently, we only reflow when the OriginalPrefix[LineIndex] matches the
+  // OriginalPrefix[LineIndex-1]. That means we don't reflow
+  // // text that protrudes
+  // //    into text with different indent
+  // We do reflow in that case in block comments.
   return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&
          mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
          !switchesFormatting(tokenAt(LineIndex)) &&
          OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1];
 }
 
-unsigned
-BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex,
-                                                   unsigned TailOffset) const {
-  if (TailOffset != 0) {
-    return OriginalContentColumn[LineIndex];
-  }
-  return ContentColumn[LineIndex];
-}
-
 } // namespace format
 } // namespace clang

Modified: cfe/trunk/lib/Format/BreakableToken.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/BreakableToken.h?rev=319314&r1=319313&r2=319314&view=diff
==============================================================================
--- cfe/trunk/lib/Format/BreakableToken.h (original)
+++ cfe/trunk/lib/Format/BreakableToken.h Wed Nov 29 06:29:43 2017
@@ -33,19 +33,32 @@ bool switchesFormatting(const FormatToke
 
 struct FormatStyle;
 
-/// \brief Base class for strategies on how to break tokens.
+/// \brief Base class for tokens / ranges of tokens that can allow breaking
+/// within the tokens - for example, to avoid whitespace beyond the column
+/// limit, or to reflow text.
 ///
-/// This is organised around the concept of a \c Split, which is a whitespace
-/// range that signifies a position of the content of a token where a
-/// reformatting might be done. Operating with splits is divided into 3
-/// operations:
+/// Generally, a breakable token consists of logical lines, addressed by a line
+/// index. For example, in a sequence of line comments, each line comment is its
+/// own logical line; similarly, for a block comment, each line in the block
+/// comment is on its own logical line.
+///
+/// There are two methods to compute the layout of the token:
+/// - getRangeLength measures the number of columns needed for a range of text
+///   within a logical line, and
+/// - getContentStartColumn returns the start column at which we want the
+///   content of a logical line to start (potentially after introducing a line
+///   break).
+///
+/// The mechanism to adapt the layout of the breakable token is organised
+/// around the concept of a \c Split, which is a whitespace range that signifies
+/// a position of the content of a token where a reformatting might be done.
+///
+/// Operating with splits is divided into two operations:
 /// - getSplit, for finding a split starting at a position,
-/// - getLineLengthAfterSplit, for calculating the size in columns of the rest
-///   of the content after a split has been used for breaking, and
 /// - insertBreak, for executing the split using a whitespace manager.
 ///
 /// There is a pair of operations that are used to compress a long whitespace
-/// range with a single space if that will bring the line lenght under the
+/// range with a single space if that will bring the line length under the
 /// column limit:
 /// - getLineLengthAfterCompression, for calculating the size in columns of the
 ///   line after a whitespace range has been compressed, and
@@ -56,14 +69,12 @@ struct FormatStyle;
 /// For tokens where the whitespace before each line needs to be also
 /// reformatted, for example for tokens supporting reflow, there are analogous
 /// operations that might be executed before the main line breaking occurs:
-/// - getSplitBefore, for finding a split such that the content preceding it
+/// - getReflowSplit, for finding a split such that the content preceding it
 ///   needs to be specially reflown,
+/// - reflow, for executing the split using a whitespace manager,
 /// - introducesBreakBefore, for checking if reformatting the beginning
 ///   of the content introduces a line break before it,
-/// - getLineLengthAfterSplitBefore, for calculating the line length in columns
-///   of the remainder of the content after the beginning of the content has
-///   been reformatted, and
-/// - replaceWhitespaceBefore, for executing the reflow using a whitespace
+/// - adaptStartOfLine, for executing the reflow using a whitespace
 ///   manager.
 ///
 /// For tokens that require the whitespace after the last line to be
@@ -72,13 +83,9 @@ struct FormatStyle;
 /// that might be executed after the last line has been reformatted:
 /// - getSplitAfterLastLine, for finding a split after the last line that needs
 ///   to be reflown,
-/// - getLineLengthAfterSplitAfterLastLine, for calculating the line length in
-///   columns of the remainder of the token, and
 /// - replaceWhitespaceAfterLastLine, for executing the reflow using a
 ///   whitespace manager.
 ///
-/// FIXME: The interface seems set in stone, so we might want to just pull the
-/// strategy into the class, instead of controlling it from the outside.
 class BreakableToken {
 public:
   /// \brief Contains starting character index and length of split.
@@ -89,79 +96,103 @@ public:
   /// \brief Returns the number of lines in this token in the original code.
   virtual unsigned getLineCount() const = 0;
 
-  /// \brief Returns the number of columns required to format the piece of line
-  /// at \p LineIndex, from byte offset \p TailOffset with length \p Length.
+  /// \brief Returns the number of columns required to format the text in the
+  /// byte range [\p Offset, \p Offset \c + \p Length).
+  ///
+  /// \p Offset is the byte offset from the start of the content of the line
+  ///    at \p LineIndex.
+  ///
+  /// \p StartColumn is the column at which the text starts in the formatted
+  ///    file, needed to compute tab stops correctly.
+  virtual unsigned getRangeLength(unsigned LineIndex, unsigned Offset,
+                                  StringRef::size_type Length,
+                                  unsigned StartColumn) const = 0;
+
+  /// \brief Returns the number of columns required to format the text following
+  /// the byte \p Offset in the line \p LineIndex, including potentially
+  /// unbreakable sequences of tokens following after the end of the token.
+  ///
+  /// \p Offset is the byte offset from the start of the content of the line
+  ///    at \p LineIndex.
+  ///
+  /// \p StartColumn is the column at which the text starts in the formatted
+  ///    file, needed to compute tab stops correctly.
+  ///
+  /// For breakable tokens that never use extra space at the end of a line, this
+  /// is equivalent to getRangeLength with a Length of StringRef::npos.
+  virtual unsigned getRemainingLength(unsigned LineIndex, unsigned Offset,
+                                      unsigned StartColumn) const {
+    return getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn);
+  }
+
+  /// \brief Returns the column at which content in line \p LineIndex starts,
+  /// assuming no reflow.
   ///
-  /// Note that previous breaks are not taken into account. \p TailOffset is
-  /// always specified from the start of the (original) line.
-  /// \p Length can be set to StringRef::npos, which means "to the end of line".
-  virtual unsigned
-  getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
-                          StringRef::size_type Length) const = 0;
+  /// If \p Break is true, returns the column at which the line should start
+  /// after the line break.
+  /// If \p Break is false, returns the column at which the line itself will
+  /// start.
+  virtual unsigned getContentStartColumn(unsigned LineIndex,
+                                         bool Break) const = 0;
 
   /// \brief Returns a range (offset, length) at which to break the line at
   /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not
-  /// violate \p ColumnLimit.
+  /// violate \p ColumnLimit, assuming the text starting at \p TailOffset in
+  /// the token is formatted starting at ContentStartColumn in the reformatted
+  /// file.
   virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
-                         unsigned ColumnLimit,
+                         unsigned ColumnLimit, unsigned ContentStartColumn,
                          llvm::Regex &CommentPragmasRegex) const = 0;
 
   /// \brief Emits the previously retrieved \p Split via \p Whitespaces.
   virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
-                           WhitespaceManager &Whitespaces) = 0;
+                           WhitespaceManager &Whitespaces) const = 0;
 
-  /// \brief Returns the number of columns required to format the piece of line
-  /// at \p LineIndex, from byte offset \p TailOffset after the whitespace range
-  /// \p Split has been compressed into a single space.
-  unsigned getLineLengthAfterCompression(unsigned RemainingTokenColumns,
-                                         Split Split) const;
+  /// \brief Returns the number of columns needed to format
+  /// \p RemainingTokenColumns, assuming that Split is within the range measured
+  /// by \p RemainingTokenColumns, and that the whitespace in Split is reduced
+  /// to a single space.
+  unsigned getLengthAfterCompression(unsigned RemainingTokenColumns,
+                                     Split Split) const;
 
   /// \brief Replaces the whitespace range described by \p Split with a single
   /// space.
   virtual void compressWhitespace(unsigned LineIndex, unsigned TailOffset,
                                   Split Split,
-                                  WhitespaceManager &Whitespaces) = 0;
+                                  WhitespaceManager &Whitespaces) const = 0;
 
-  /// \brief Returns a whitespace range (offset, length) of the content at
-  /// \p LineIndex such that the content preceding this range needs to be
-  /// reformatted before any breaks are made to this line.
+  /// \brief Returns whether the token supports reflowing text.
+  virtual bool supportsReflow() const { return false; }
+
+  /// \brief Returns a whitespace range (offset, length) of the content at \p
+  /// LineIndex such that the content of that line is reflown to the end of the
+  /// previous one.
   ///
-  /// \p PreviousEndColumn is the end column of the previous line after
-  /// formatting.
+  /// Returning (StringRef::npos, 0) indicates reflowing is not possible.
   ///
-  /// A result having offset == StringRef::npos means that no piece of the line
-  /// needs to be reformatted before any breaks are made.
-  virtual Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn,
-                               unsigned ColumnLimit,
+  /// The range will include any whitespace preceding the specified line's
+  /// content.
+  ///
+  /// If the split is not contained within one token, for example when reflowing
+  /// line comments, returns (0, <length>).
+  virtual Split getReflowSplit(unsigned LineIndex,
                                llvm::Regex &CommentPragmasRegex) const {
     return Split(StringRef::npos, 0);
   }
 
+  /// \brief Reflows the current line into the end of the previous one.
+  virtual void reflow(unsigned LineIndex,
+                      WhitespaceManager &Whitespaces) const {}
+
   /// \brief Returns whether there will be a line break at the start of the
   /// token.
   virtual bool introducesBreakBeforeToken() const {
     return false;
   }
 
-  /// \brief Returns the number of columns required to format the piece of line
-  /// at \p LineIndex after the content preceding the whitespace range specified
-  /// \p SplitBefore has been reformatted, but before any breaks are made to
-  /// this line.
-  virtual unsigned getLineLengthAfterSplitBefore(unsigned LineIndex,
-                                                 unsigned TailOffset,
-                                                 unsigned PreviousEndColumn,
-                                                 unsigned ColumnLimit,
-                                                 Split SplitBefore) const {
-    return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
-  }
-
   /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex.
-  /// Performs a reformatting of the content at \p LineIndex preceding the
-  /// whitespace range \p SplitBefore.
-  virtual void replaceWhitespaceBefore(unsigned LineIndex,
-                                       unsigned PreviousEndColumn,
-                                       unsigned ColumnLimit, Split SplitBefore,
-                                       WhitespaceManager &Whitespaces) {}
+  virtual void adaptStartOfLine(unsigned LineIndex,
+                                WhitespaceManager &Whitespaces) const {}
 
   /// \brief Returns a whitespace range (offset, length) of the content at
   /// the last line that needs to be reformatted after the last line has been
@@ -169,28 +200,15 @@ public:
   ///
   /// A result having offset == StringRef::npos means that no reformat is
   /// necessary.
-  virtual Split getSplitAfterLastLine(unsigned TailOffset,
-                                      unsigned ColumnLimit) const {
+  virtual Split getSplitAfterLastLine(unsigned TailOffset) const {
     return Split(StringRef::npos, 0);
   }
 
-  /// \brief Returns the number of columns required to format the piece token
-  /// after the last line after a reformat of the whitespace range \p
-  /// \p SplitAfterLastLine on the last line has been performed.
-  virtual unsigned
-  getLineLengthAfterSplitAfterLastLine(unsigned TailOffset,
-                                       Split SplitAfterLastLine) const {
-    return getLineLengthAfterSplit(getLineCount() - 1,
-                                   TailOffset + SplitAfterLastLine.first +
-                                       SplitAfterLastLine.second,
-                                   StringRef::npos);
-  }
-
   /// \brief Replaces the whitespace from \p SplitAfterLastLine on the last line
   /// after the last line has been formatted by performing a reformatting.
-  virtual void replaceWhitespaceAfterLastLine(unsigned TailOffset,
-                                              Split SplitAfterLastLine,
-                                              WhitespaceManager &Whitespaces) {
+  void replaceWhitespaceAfterLastLine(unsigned TailOffset,
+                                      Split SplitAfterLastLine,
+                                      WhitespaceManager &Whitespaces) const {
     insertBreak(getLineCount() - 1, TailOffset, SplitAfterLastLine,
                 Whitespaces);
   }
@@ -212,32 +230,7 @@ protected:
   const FormatStyle &Style;
 };
 
-/// \brief Base class for single line tokens that can be broken.
-///
-/// \c getSplit() needs to be implemented by child classes.
-class BreakableSingleLineToken : public BreakableToken {
-public:
-  unsigned getLineCount() const override;
-  unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
-                                   StringRef::size_type Length) const override;
-
-protected:
-  BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn,
-                           StringRef Prefix, StringRef Postfix,
-                           bool InPPDirective, encoding::Encoding Encoding,
-                           const FormatStyle &Style);
-
-  // The column in which the token starts.
-  unsigned StartColumn;
-  // The prefix a line needs after a break in the token.
-  StringRef Prefix;
-  // The postfix a line needs before introducing a break.
-  StringRef Postfix;
-  // The token text excluding the prefix and postfix.
-  StringRef Line;
-};
-
-class BreakableStringLiteral : public BreakableSingleLineToken {
+class BreakableStringLiteral : public BreakableToken {
 public:
   /// \brief Creates a breakable token for a single line string literal.
   ///
@@ -249,11 +242,32 @@ public:
                          const FormatStyle &Style);
 
   Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
+                 unsigned ReflowColumn,
                  llvm::Regex &CommentPragmasRegex) const override;
   void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
-                   WhitespaceManager &Whitespaces) override;
+                   WhitespaceManager &Whitespaces) const override;
   void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
-                          WhitespaceManager &Whitespaces) override {}
+                          WhitespaceManager &Whitespaces) const override {}
+  unsigned getLineCount() const override;
+  unsigned getRangeLength(unsigned LineIndex, unsigned Offset,
+                          StringRef::size_type Length,
+                          unsigned StartColumn) const override;
+  unsigned getRemainingLength(unsigned LineIndex, unsigned Offset,
+                              unsigned StartColumn) const override;
+  unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override;
+
+protected:
+  // The column in which the token starts.
+  unsigned StartColumn;
+  // The prefix a line needs after a break in the token.
+  StringRef Prefix;
+  // The postfix a line needs before introducing a break.
+  StringRef Postfix;
+  // The token text excluding the prefix and postfix.
+  StringRef Line;
+  // Length of the sequence of tokens after this string literal that cannot
+  // contain line breaks.
+  unsigned UnbreakableTailLength;
 };
 
 class BreakableComment : public BreakableToken {
@@ -267,21 +281,15 @@ protected:
                    const FormatStyle &Style);
 
 public:
+  bool supportsReflow() const override { return true; }
   unsigned getLineCount() const override;
   Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
+                 unsigned ReflowColumn,
                  llvm::Regex &CommentPragmasRegex) const override;
   void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
-                          WhitespaceManager &Whitespaces) override;
+                          WhitespaceManager &Whitespaces) const override;
 
 protected:
-  virtual unsigned getContentStartColumn(unsigned LineIndex,
-                                         unsigned TailOffset) const = 0;
-
-  // Returns a split that divides Text into a left and right parts, such that
-  // the left part is suitable for reflowing after PreviousEndColumn.
-  Split getReflowSplit(StringRef Text, StringRef ReflowPrefix,
-                       unsigned PreviousEndColumn, unsigned ColumnLimit) const;
-
   // Returns the token containing the line at LineIndex.
   const FormatToken &tokenAt(unsigned LineIndex) const;
 
@@ -340,24 +348,22 @@ public:
                         bool InPPDirective, encoding::Encoding Encoding,
                         const FormatStyle &Style);
 
-  unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
-                                   StringRef::size_type Length) const override;
+  unsigned getRangeLength(unsigned LineIndex, unsigned Offset,
+                          StringRef::size_type Length,
+                          unsigned StartColumn) const override;
+  unsigned getRemainingLength(unsigned LineIndex, unsigned Offset,
+                              unsigned StartColumn) const override;
+  unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override;
   void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
-                   WhitespaceManager &Whitespaces) override;
-  Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn,
-                       unsigned ColumnLimit,
+                   WhitespaceManager &Whitespaces) const override;
+  Split getReflowSplit(unsigned LineIndex,
                        llvm::Regex &CommentPragmasRegex) const override;
+  void reflow(unsigned LineIndex,
+              WhitespaceManager &Whitespaces) const override;
   bool introducesBreakBeforeToken() const override;
-  unsigned getLineLengthAfterSplitBefore(unsigned LineIndex,
-                                         unsigned TailOffset,
-                                         unsigned PreviousEndColumn,
-                                         unsigned ColumnLimit,
-                                         Split SplitBefore) const override;
-  void replaceWhitespaceBefore(unsigned LineIndex, unsigned PreviousEndColumn,
-                               unsigned ColumnLimit, Split SplitBefore,
-                               WhitespaceManager &Whitespaces) override;
-  Split getSplitAfterLastLine(unsigned TailOffset,
-                              unsigned ColumnLimit) const override;
+  void adaptStartOfLine(unsigned LineIndex,
+                        WhitespaceManager &Whitespaces) const override;
+  Split getSplitAfterLastLine(unsigned TailOffset) const override;
 
   bool mayReflow(unsigned LineIndex,
                  llvm::Regex &CommentPragmasRegex) const override;
@@ -373,14 +379,6 @@ private:
   // considered part of the text).
   void adjustWhitespace(unsigned LineIndex, int IndentDelta);
 
-  // Computes the end column if the full Content from LineIndex gets reflown
-  // after PreviousEndColumn.
-  unsigned getReflownColumn(StringRef Content, unsigned LineIndex,
-                            unsigned PreviousEndColumn) const;
-
-  unsigned getContentStartColumn(unsigned LineIndex,
-                                 unsigned TailOffset) const override;
-
   // The column at which the text of a broken line should start.
   // Note that an optional decoration would go before that column.
   // IndentAtLineBreak is a uniform position for all lines in a block comment,
@@ -416,29 +414,23 @@ public:
                               bool InPPDirective, encoding::Encoding Encoding,
                               const FormatStyle &Style);
 
-  unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
-                                   StringRef::size_type Length) const override;
+  unsigned getRangeLength(unsigned LineIndex, unsigned Offset,
+                          StringRef::size_type Length,
+                          unsigned StartColumn) const override;
+  unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override;
   void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
-                   WhitespaceManager &Whitespaces) override;
-  Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn,
-                       unsigned ColumnLimit,
+                   WhitespaceManager &Whitespaces) const override;
+  Split getReflowSplit(unsigned LineIndex,
                        llvm::Regex &CommentPragmasRegex) const override;
-  unsigned getLineLengthAfterSplitBefore(unsigned LineIndex,
-                                         unsigned TailOffset,
-                                         unsigned PreviousEndColumn,
-                                         unsigned ColumnLimit,
-                                         Split SplitBefore) const override;
-  void replaceWhitespaceBefore(unsigned LineIndex, unsigned PreviousEndColumn,
-                               unsigned ColumnLimit, Split SplitBefore,
-                               WhitespaceManager &Whitespaces) override;
+  void reflow(unsigned LineIndex,
+              WhitespaceManager &Whitespaces) const override;
+  void adaptStartOfLine(unsigned LineIndex,
+                        WhitespaceManager &Whitespaces) const override;
   void updateNextToken(LineState &State) const override;
   bool mayReflow(unsigned LineIndex,
                  llvm::Regex &CommentPragmasRegex) const override;
 
 private:
-  unsigned getContentStartColumn(unsigned LineIndex,
-                                 unsigned TailOffset) const override;
-
   // OriginalPrefix[i] contains the original prefix of line i, including
   // trailing whitespace before the start of the content. The indentation
   // preceding the prefix is not included.

Modified: cfe/trunk/lib/Format/ContinuationIndenter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/ContinuationIndenter.cpp?rev=319314&r1=319313&r2=319314&view=diff
==============================================================================
--- cfe/trunk/lib/Format/ContinuationIndenter.cpp (original)
+++ cfe/trunk/lib/Format/ContinuationIndenter.cpp Wed Nov 29 06:29:43 2017
@@ -1484,23 +1484,25 @@ unsigned ContinuationIndenter::breakProt
                                                     LineState &State,
                                                     bool AllowBreak,
                                                     bool DryRun) {
-  std::unique_ptr<BreakableToken> Token =
+  std::unique_ptr<const BreakableToken> Token =
       createBreakableToken(Current, State, AllowBreak);
   if (!Token)
     return 0;
+  assert(Token->getLineCount() > 0);
   unsigned ColumnLimit = getColumnLimit(State);
-  unsigned StartColumn = State.Column - Current.ColumnWidth;
   if (Current.is(TT_LineComment)) {
     // We don't insert backslashes when breaking line comments.
     ColumnLimit = Style.ColumnLimit;
   }
   if (Current.UnbreakableTailLength >= ColumnLimit)
     return 0;
-
+  // ColumnWidth was already accounted into State.Column before calling
+  // breakProtrudingToken.
+  unsigned StartColumn = State.Column - Current.ColumnWidth;
   unsigned NewBreakPenalty = Current.isStringLiteral()
                                  ? Style.PenaltyBreakString
                                  : Style.PenaltyBreakComment;
-  unsigned RemainingSpace = ColumnLimit - Current.UnbreakableTailLength;
+  // Stores whether we introduce a break anywhere in the token.
   bool BreakInserted = Token->introducesBreakBeforeToken();
   // Store whether we inserted a new line break at the end of the previous
   // logical line.
@@ -1508,145 +1510,295 @@ unsigned ContinuationIndenter::breakProt
   // We use a conservative reflowing strategy. Reflow starts after a line is
   // broken or the corresponding whitespace compressed. Reflow ends as soon as a
   // line that doesn't get reflown with the previous line is reached.
-  bool ReflowInProgress = false;
-  unsigned Penalty = 0;
-  unsigned RemainingTokenColumns = 0;
+  bool Reflow = false;
+  // Keep track of where we are in the token:
+  // Where we are in the content of the current logical line.
   unsigned TailOffset = 0;
+  // The column number we're currently at.
+  unsigned ContentStartColumn =
+      Token->getContentStartColumn(0, /*Break=*/false);
+  // The number of columns left in the current logical line after TailOffset.
+  unsigned RemainingTokenColumns =
+      Token->getRemainingLength(0, TailOffset, ContentStartColumn);
+  // Adapt the start of the token, for example indent.
+  if (!DryRun)
+    Token->adaptStartOfLine(0, Whitespaces);
+
+  unsigned Penalty = 0;
   DEBUG(llvm::dbgs() << "Breaking protruding token at column " << StartColumn
                      << ".\n");
   for (unsigned LineIndex = 0, EndIndex = Token->getLineCount();
        LineIndex != EndIndex; ++LineIndex) {
-    DEBUG(llvm::dbgs() << "  Line: " << LineIndex
-                       << " (Reflow: " << ReflowInProgress << ")\n");
-    BreakableToken::Split SplitBefore(StringRef::npos, 0);
-    if (ReflowInProgress) {
-      SplitBefore = Token->getSplitBefore(LineIndex, RemainingTokenColumns,
-                                          RemainingSpace, CommentPragmasRegex);
-    }
-    ReflowInProgress = SplitBefore.first != StringRef::npos;
-    DEBUG({
-      if (ReflowInProgress)
-        llvm::dbgs() << "  Reflowing.\n";
-    });
-    TailOffset =
-        ReflowInProgress ? (SplitBefore.first + SplitBefore.second) : 0;
-    // If we found a reflow split and have added a new break before this line,
-    // we are going to remove the line break at the start of the next logical
-    // line.
-    // For example, here we'll add a new line break after 'text', and
-    // subsequently delete the line break between 'that' and 'reflows'.
-    //   // some text that
-    //   // reflows
-    // ->
-    //   // some text
-    //   // that reflows
-    // When adding the line break, we also added the penalty for it, so we need
-    // to subtract that penalty again when we remove the line break due to
-    // reflowing.
-    if (ReflowInProgress && NewBreakBefore) {
-      assert(Penalty >= NewBreakPenalty);
-      Penalty -= NewBreakPenalty;
-    }
+    DEBUG(llvm::dbgs() << "  Line: " << LineIndex << " (Reflow: " << Reflow
+                       << ")\n");
     NewBreakBefore = false;
-    if (!DryRun)
-      Token->replaceWhitespaceBefore(LineIndex, RemainingTokenColumns,
-                                     RemainingSpace, SplitBefore, Whitespaces);
-    RemainingTokenColumns = Token->getLineLengthAfterSplitBefore(
-        LineIndex, TailOffset, RemainingTokenColumns, ColumnLimit, SplitBefore);
-    while (RemainingTokenColumns > RemainingSpace) {
-      DEBUG(llvm::dbgs() << "    Over limit, need: " << RemainingTokenColumns
-                         << ", space: " << RemainingSpace << "\n");
-      BreakableToken::Split Split = Token->getSplit(
-          LineIndex, TailOffset, ColumnLimit, CommentPragmasRegex);
+    // If we did reflow the previous line, we'll try reflowing again. Otherwise
+    // we'll start reflowing if the current line is broken or whitespace is
+    // compressed.
+    bool TryReflow = Reflow;
+    // Break the current token until we can fit the rest of the line.
+    while (ContentStartColumn + RemainingTokenColumns > ColumnLimit) {
+      DEBUG(llvm::dbgs() << "    Over limit, need: "
+                         << (ContentStartColumn + RemainingTokenColumns)
+                         << ", space: " << ColumnLimit
+                         << ", reflown prefix: " << ContentStartColumn
+                         << ", offset in line: " << TailOffset << "\n");
+      // If the current token doesn't fit, find the latest possible split in the
+      // current line so that breaking at it will be under the column limit.
+      // FIXME: Use the earliest possible split while reflowing to correctly
+      // compress whitespace within a line.
+      BreakableToken::Split Split =
+          Token->getSplit(LineIndex, TailOffset, ColumnLimit,
+                          ContentStartColumn, CommentPragmasRegex);
       if (Split.first == StringRef::npos) {
-        // The last line's penalty is handled in addNextStateToQueue().
+        // No break opportunity - update the penalty and continue with the next
+        // logical line.
         if (LineIndex < EndIndex - 1)
+          // The last line's penalty is handled in addNextStateToQueue().
           Penalty += Style.PenaltyExcessCharacter *
-                     (RemainingTokenColumns - RemainingSpace);
+                     (ContentStartColumn + RemainingTokenColumns - ColumnLimit);
         DEBUG(llvm::dbgs() << "    No break opportunity.\n");
         break;
       }
       assert(Split.first != 0);
 
-      // Check if compressing the whitespace range will bring the line length
-      // under the limit. If that is the case, we perform whitespace compression
-      // instead of inserting a line break.
-      unsigned RemainingTokenColumnsAfterCompression =
-          Token->getLineLengthAfterCompression(RemainingTokenColumns, Split);
-      if (RemainingTokenColumnsAfterCompression <= RemainingSpace) {
-        RemainingTokenColumns = RemainingTokenColumnsAfterCompression;
-        ReflowInProgress = true;
-        if (!DryRun)
-          Token->compressWhitespace(LineIndex, TailOffset, Split, Whitespaces);
-        DEBUG(llvm::dbgs() << "    Compressing below limit.\n");
-        break;
-      }
-
-      // Compute both the penalties for:
-      // - not breaking, and leaving excess characters
-      // - adding a new line break
-      assert(RemainingTokenColumnsAfterCompression > RemainingSpace);
-      unsigned ExcessCharactersPenalty =
-          (RemainingTokenColumnsAfterCompression - RemainingSpace) *
-          Style.PenaltyExcessCharacter;
-
-      unsigned BreakPenalty = NewBreakPenalty;
-      unsigned ColumnsUsed =
-          Token->getLineLengthAfterSplit(LineIndex, TailOffset, Split.first);
-      if (ColumnsUsed > ColumnLimit)
-        BreakPenalty +=
-            Style.PenaltyExcessCharacter * (ColumnsUsed - ColumnLimit);
-
-      DEBUG(llvm::dbgs() << "    Penalty excess: " << ExcessCharactersPenalty
-                         << "\n            break : " << BreakPenalty << "\n");
-      // Only continue to add the line break if the penalty of the excess
-      // characters is larger than the penalty of the line break.
-      // FIXME: This does not take into account when we can later remove the
-      // line break again due to a reflow.
-      if (ExcessCharactersPenalty < BreakPenalty) {
-        if (!DryRun)
-          Token->compressWhitespace(LineIndex, TailOffset, Split, Whitespaces);
-        // Do not set ReflowInProgress: we do not have any space left to
-        // reflow into.
-        Penalty += ExcessCharactersPenalty;
-        break;
+      if (Token->supportsReflow()) {
+        // Check whether the next natural split point after the current one can
+        // still fit the line, either because we can compress away whitespace,
+        // or because the penalty the excess characters introduce is lower than
+        // the break penalty.
+        // We only do this for tokens that support reflowing, and thus allow us
+        // to change the whitespace arbitrarily (e.g. comments).
+        // Other tokens, like string literals, can be broken on arbitrary
+        // positions.
+
+        // First, compute the columns from TailOffset to the next possible split
+        // position.
+        // For example:
+        // ColumnLimit:     |
+        // // Some text   that    breaks
+        //    ^ tail offset
+        //             ^-- split
+        //    ^-------- to split columns
+        //                    ^--- next split
+        //    ^--------------- to next split columns
+        unsigned ToSplitColumns = Token->getRangeLength(
+            LineIndex, TailOffset, Split.first, ContentStartColumn);
+        DEBUG(llvm::dbgs() << "    ToSplit: " << ToSplitColumns << "\n");
+
+        BreakableToken::Split NextSplit = Token->getSplit(
+            LineIndex, TailOffset + Split.first + Split.second, ColumnLimit,
+            ContentStartColumn + ToSplitColumns + 1, CommentPragmasRegex);
+        // Compute the columns necessary to fit the next non-breakable sequence
+        // into the current line.
+        unsigned ToNextSplitColumns = 0;
+        if (NextSplit.first == StringRef::npos) {
+          ToNextSplitColumns = Token->getRemainingLength(LineIndex, TailOffset,
+                                                         ContentStartColumn);
+        } else {
+          ToNextSplitColumns = Token->getRangeLength(
+              LineIndex, TailOffset,
+              Split.first + Split.second + NextSplit.first, ContentStartColumn);
+        }
+        // Compress the whitespace between the break and the start of the next
+        // unbreakable sequence.
+        ToNextSplitColumns =
+            Token->getLengthAfterCompression(ToNextSplitColumns, Split);
+        DEBUG(llvm::dbgs() << "    ContentStartColumn: " << ContentStartColumn
+                           << "\n");
+        DEBUG(llvm::dbgs() << "    ToNextSplit: " << ToNextSplitColumns << "\n");
+        // If the whitespace compression makes us fit, continue on the current
+        // line.
+        bool ContinueOnLine =
+            ContentStartColumn + ToNextSplitColumns <= ColumnLimit;
+        unsigned ExcessCharactersPenalty = 0;
+        if (!ContinueOnLine) {
+          // Similarly, if the excess characters' penalty is lower than the
+          // penalty of introducing a new break, continue on the current line.
+          ExcessCharactersPenalty =
+              (ContentStartColumn + ToNextSplitColumns - ColumnLimit) *
+              Style.PenaltyExcessCharacter;
+          DEBUG(llvm::dbgs()
+                << "    Penalty excess: " << ExcessCharactersPenalty
+                << "\n            break : " << NewBreakPenalty << "\n");
+          if (ExcessCharactersPenalty < NewBreakPenalty)
+            ContinueOnLine = true;
+        }
+        if (ContinueOnLine) {
+          DEBUG(llvm::dbgs() << "    Continuing on line...\n");
+          // The current line fits after compressing the whitespace - reflow
+          // the next line into it if possible.
+          TryReflow = true;
+          if (!DryRun)
+            Token->compressWhitespace(LineIndex, TailOffset, Split,
+                                      Whitespaces);
+          // When we continue on the same line, leave one space between content.
+          ContentStartColumn += ToSplitColumns + 1;
+          Penalty += ExcessCharactersPenalty;
+          TailOffset += Split.first + Split.second;
+          RemainingTokenColumns = Token->getRemainingLength(
+              LineIndex, TailOffset, ContentStartColumn);
+          continue;
+        }
       }
-
-      unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit(
-          LineIndex, TailOffset + Split.first + Split.second, StringRef::npos);
+      DEBUG(llvm::dbgs() << "    Breaking...\n");
+      ContentStartColumn =
+          Token->getContentStartColumn(LineIndex, /*Break=*/true);
+      unsigned NewRemainingTokenColumns = Token->getRemainingLength(
+          LineIndex, TailOffset + Split.first + Split.second,
+          ContentStartColumn);
 
       // When breaking before a tab character, it may be moved by a few columns,
       // but will still be expanded to the next tab stop, so we don't save any
       // columns.
-      if (NewRemainingTokenColumns == RemainingTokenColumns)
+      if (NewRemainingTokenColumns == RemainingTokenColumns) {
         // FIXME: Do we need to adjust the penalty?
         break;
+      }
       assert(NewRemainingTokenColumns < RemainingTokenColumns);
 
+      DEBUG(llvm::dbgs() << "    Breaking at: " << TailOffset + Split.first
+                         << ", " << Split.second << "\n");
       if (!DryRun)
         Token->insertBreak(LineIndex, TailOffset, Split, Whitespaces);
 
-      Penalty += BreakPenalty;
+      Penalty += NewBreakPenalty;
       TailOffset += Split.first + Split.second;
       RemainingTokenColumns = NewRemainingTokenColumns;
-      ReflowInProgress = true;
       BreakInserted = true;
       NewBreakBefore = true;
     }
+    // In case there's another line, prepare the state for the start of the next
+    // line.
+    if (LineIndex + 1 != EndIndex) {
+      unsigned NextLineIndex = LineIndex + 1;
+      if (NewBreakBefore)
+        // After breaking a line, try to reflow the next line into the current
+        // one once RemainingTokenColumns fits.
+        TryReflow = true;
+      if (TryReflow) {
+        // We decided that we want to try reflowing the next line into the
+        // current one.
+        // We will now adjust the state as if the reflow is successful (in
+        // preparation for the next line), and see whether that works. If we
+        // decide that we cannot reflow, we will later reset the state to the
+        // start of the next line.
+        Reflow = false;
+        // As we did not continue breaking the line, RemainingTokenColumns is
+        // known to fit after ContentStartColumn. Adapt ContentStartColumn to
+        // the position at which we want to format the next line if we do
+        // actually reflow.
+        // When we reflow, we need to add a space between the end of the current
+        // line and the next line's start column.
+        ContentStartColumn += RemainingTokenColumns + 1;
+        // Get the split that we need to reflow next logical line into the end
+        // of the current one; the split will include any leading whitespace of
+        // the next logical line.
+        BreakableToken::Split SplitBeforeNext =
+            Token->getReflowSplit(NextLineIndex, CommentPragmasRegex);
+        DEBUG(llvm::dbgs() << "    Size of reflown text: " << ContentStartColumn
+                           << "\n    Potential reflow split: ");
+        if (SplitBeforeNext.first != StringRef::npos) {
+          DEBUG(llvm::dbgs() << SplitBeforeNext.first << ", "
+                             << SplitBeforeNext.second << "\n");
+          TailOffset = SplitBeforeNext.first + SplitBeforeNext.second;
+          // If the rest of the next line fits into the current line below the
+          // column limit, we can safely reflow.
+          RemainingTokenColumns = Token->getRemainingLength(
+              NextLineIndex, TailOffset, ContentStartColumn);
+          Reflow = true;
+          if (ContentStartColumn + RemainingTokenColumns > ColumnLimit) {
+            DEBUG(llvm::dbgs() << "    Over limit after reflow, need: "
+                               << (ContentStartColumn + RemainingTokenColumns)
+                               << ", space: " << ColumnLimit
+                               << ", reflown prefix: " << ContentStartColumn
+                               << ", offset in line: " << TailOffset << "\n");
+            // If the whole next line does not fit, try to find a point in
+            // the next line at which we can break so that attaching the part
+            // of the next line to that break point onto the current line is
+            // below the column limit.
+            BreakableToken::Split Split =
+                Token->getSplit(NextLineIndex, TailOffset, ColumnLimit,
+                                ContentStartColumn, CommentPragmasRegex);
+            if (Split.first == StringRef::npos) {
+              DEBUG(llvm::dbgs() << "    Did not find later break\n");
+              Reflow = false;
+            } else {
+              // Check whether the first split point gets us below the column
+              // limit. Note that we will execute this split below as part of
+              // the normal token breaking and reflow logic within the line.
+              unsigned ToSplitColumns = Token->getRangeLength(
+                  NextLineIndex, TailOffset, Split.first, ContentStartColumn);
+              if (ContentStartColumn + ToSplitColumns > ColumnLimit) {
+                DEBUG(llvm::dbgs() << "    Next split protrudes, need: "
+                                   << (ContentStartColumn + ToSplitColumns)
+                                   << ", space: " << ColumnLimit);
+                unsigned ExcessCharactersPenalty =
+                    (ContentStartColumn + ToSplitColumns - ColumnLimit) *
+                    Style.PenaltyExcessCharacter;
+                if (NewBreakPenalty < ExcessCharactersPenalty) {
+                  Reflow = false;
+                }
+              }
+            }
+          }
+        } else {
+          DEBUG(llvm::dbgs() << "not found.\n");
+        }
+      }
+      if (!Reflow) {
+        // If we didn't reflow into the next line, the only space to consider is
+        // the next logical line. Reset our state to match the start of the next
+        // line.
+        TailOffset = 0;
+        ContentStartColumn =
+            Token->getContentStartColumn(NextLineIndex, /*Break=*/false);
+        RemainingTokenColumns = Token->getRemainingLength(
+            NextLineIndex, TailOffset, ContentStartColumn);
+        // Adapt the start of the token, for example indent.
+        if (!DryRun)
+          Token->adaptStartOfLine(NextLineIndex, Whitespaces);
+      } else {
+        // If we found a reflow split and have added a new break before the next
+        // line, we are going to remove the line break at the start of the next
+        // logical line. For example, here we'll add a new line break after
+        // 'text', and subsequently delete the line break between 'that' and
+        // 'reflows'.
+        //   // some text that
+        //   // reflows
+        // ->
+        //   // some text
+        //   // that reflows
+        // When adding the line break, we also added the penalty for it, so we
+        // need to subtract that penalty again when we remove the line break due
+        // to reflowing.
+        if (NewBreakBefore) {
+          assert(Penalty >= NewBreakPenalty);
+          Penalty -= NewBreakPenalty;
+        }
+        if (!DryRun)
+          Token->reflow(NextLineIndex, Whitespaces);
+      }
+    }
   }
 
   BreakableToken::Split SplitAfterLastLine =
-      Token->getSplitAfterLastLine(TailOffset, ColumnLimit);
+      Token->getSplitAfterLastLine(TailOffset);
   if (SplitAfterLastLine.first != StringRef::npos) {
     DEBUG(llvm::dbgs() << "Replacing whitespace after last line.\n");
     if (!DryRun)
       Token->replaceWhitespaceAfterLastLine(TailOffset, SplitAfterLastLine,
                                             Whitespaces);
-    RemainingTokenColumns = Token->getLineLengthAfterSplitAfterLastLine(
-        TailOffset, SplitAfterLastLine);
+    ContentStartColumn =
+        Token->getContentStartColumn(Token->getLineCount() - 1, /*Break=*/true);
+    RemainingTokenColumns = Token->getRemainingLength(
+        Token->getLineCount() - 1,
+        TailOffset + SplitAfterLastLine.first + SplitAfterLastLine.second,
+        ContentStartColumn);
   }
 
-  State.Column = RemainingTokenColumns;
+  State.Column = ContentStartColumn + RemainingTokenColumns -
+                 Current.UnbreakableTailLength;
 
   if (BreakInserted) {
     // If we break the token inside a parameter list, we need to break before

Modified: cfe/trunk/unittests/Format/FormatTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=319314&r1=319313&r2=319314&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTest.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTest.cpp Wed Nov 29 06:29:43 2017
@@ -7732,6 +7732,12 @@ TEST_F(FormatTest, BreaksStringLiterals)
             format("#define A \"some text other\";", AlignLeft));
 }
 
+TEST_F(FormatTest, BreaksStringLiteralsAtColumnLimit) {
+  EXPECT_EQ("C a = \"some more \"\n"
+            "      \"text\";",
+            format("C a = \"some more text\";", getLLVMStyleWithColumns(18)));
+}
+
 TEST_F(FormatTest, FullyRemoveEmptyLines) {
   FormatStyle NoEmptyLines = getLLVMStyleWithColumns(80);
   NoEmptyLines.MaxEmptyLinesToKeep = 0;
@@ -9927,16 +9933,9 @@ TEST_F(FormatTest, OptimizeBreakPenaltyV
 
   Style.PenaltyExcessCharacter = 90;
   verifyFormat("int a; // the comment", Style);
-  EXPECT_EQ("int a; // the\n"
-            "       // comment aa",
+  EXPECT_EQ("int a; // the comment\n"
+            "       // aa",
             format("int a; // the comment aa", Style));
-  EXPECT_EQ("int a; // first line\n"
-            "       // second line\n"
-            "       // third line",
-            format("int a; // first line\n"
-                   "       // second line\n"
-                   "       // third line",
-                   Style));
   EXPECT_EQ("int a; /* first line\n"
             "        * second line\n"
             "        * third line\n"
@@ -9946,12 +9945,18 @@ TEST_F(FormatTest, OptimizeBreakPenaltyV
                    "        * third line\n"
                    "        */",
                    Style));
+  EXPECT_EQ("int a; // first line\n"
+            "       // second line\n"
+            "       // third line",
+            format("int a; // first line\n"
+                   "       // second line\n"
+                   "       // third line",
+                   Style));
   // FIXME: Investigate why this is not getting the same layout as the test
   // above.
   EXPECT_EQ("int a; /* first line\n"
-            "        * second\n"
-            "        * line third\n"
-            "        * line\n"
+            "        * second line\n"
+            "        * third line\n"
             "        */",
             format("int a; /* first line second line third line"
                    "\n*/",
@@ -9970,31 +9975,23 @@ TEST_F(FormatTest, OptimizeBreakPenaltyV
 
   // FIXME: Optimally, we'd keep bazfoo on the first line and reflow bar to the
   // next one.
-  EXPECT_EQ("// foo bar baz\n"
-            "// bazfoo bar foo\n"
-            "// bar\n",
+  EXPECT_EQ("// foo bar baz bazfoo\n"
+            "// bar foo bar\n",
             format("// foo bar baz      bazfoo bar\n"
                    "// foo            bar\n",
                    Style));
 
   EXPECT_EQ("// foo bar baz bazfoo\n"
-            "// foo bar baz\n"
-            "// bazfoo bar foo\n"
-            "// bar\n",
+            "// foo bar baz bazfoo\n"
+            "// bar foo bar\n",
             format("// foo bar baz      bazfoo\n"
                    "// foo bar baz      bazfoo bar\n"
                    "// foo bar\n",
                    Style));
 
-  // FIXME: Optimally, we'd keep 'bar' in the last line at the end of the line,
-  // as it does not actually protrude far enough to make breaking pay off.
-  // Unfortunately, due to how reflowing is currently implemented, we already
-  // check the column limit after the reflowing decision and extend the reflow
-  // range, so we will not take whitespace compression into account.
   EXPECT_EQ("// foo bar baz bazfoo\n"
-            "// foo bar baz\n"
-            "// bazfoo bar foo\n"
-            "// bar\n",
+            "// foo bar baz bazfoo\n"
+            "// bar foo bar\n",
             format("// foo bar baz      bazfoo\n"
                    "// foo bar baz      bazfoo bar\n"
                    "// foo           bar\n",
@@ -10595,10 +10592,12 @@ TEST_F(FormatTest, SplitsUTF8Strings) {
       "\"ä¸ å« ä¹ \"\n"
       "\"å\"",
       format("\"ä¸ äº ä¸ å äºå ä¸ å« ä¹ å\"", getLLVMStyleWithColumns(11)));
-  EXPECT_EQ("\"ä¸\täº \"\n"
-            "\"\tä¸ \"\n"
-            "\"å äº\tå \"\n"
-            "\"\tä¸ \"\n"
+  EXPECT_EQ("\"ä¸\t\"\n"
+            "\"äº \t\"\n"
+            "\"ä¸ å \"\n"
+            "\"äº\t\"\n"
+            "\"å \t\"\n"
+            "\"ä¸ \"\n"
             "\"å«ä¹å\tqq\"",
             format("\"ä¸\täº \tä¸ å äº\tå \tä¸ å«ä¹å\tqq\"",
                    getLLVMStyleWithColumns(11)));

Modified: cfe/trunk/unittests/Format/FormatTestComments.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTestComments.cpp?rev=319314&r1=319313&r2=319314&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTestComments.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTestComments.cpp Wed Nov 29 06:29:43 2017
@@ -1096,11 +1096,12 @@ TEST_F(FormatTestComments, KeepsLevelOfC
 }
 
 TEST_F(FormatTestComments, SplitsLongLinesInComments) {
+  // FIXME: Do we need to fix up the "  */" at the end?
+  // It doesn't look like any of our current logic triggers this.
   EXPECT_EQ("/* This is a long\n"
             " * comment that\n"
-            " * doesn't\n"
-            " * fit on one line.\n"
-            " */",
+            " * doesn't fit on\n"
+            " * one line.  */",
             format("/* "
                    "This is a long                                         "
                    "comment that "
@@ -2102,6 +2103,85 @@ TEST_F(FormatTestComments, ReflowsCommen
   EXPECT_EQ("///", format(" ///  ", getLLVMStyleWithColumns(20)));
 }
 
+TEST_F(FormatTestComments, ReflowsCommentsPrecise) {
+  // FIXME: This assumes we do not continue compressing whitespace once we are
+  // in reflow mode. Consider compressing whitespace.
+
+  // Test that we stop reflowing precisely at the column limit.
+  // After reflowing, "// reflows into   foo" does not fit the column limit,
+  // so we compress the whitespace.
+  EXPECT_EQ("// some text that\n"
+            "// reflows into foo\n",
+            format("// some text that reflows\n"
+                   "// into   foo\n",
+                   getLLVMStyleWithColumns(20)));
+  // Given one more column, "// reflows into   foo" does fit the limit, so we
+  // do not compress the whitespace.
+  EXPECT_EQ("// some text that\n"
+            "// reflows into   foo\n",
+            format("// some text that reflows\n"
+                   "// into   foo\n",
+                   getLLVMStyleWithColumns(21)));
+
+  // Make sure that we correctly account for the space added in the reflow case
+  // when making the reflowing decision.
+  // First, when the next line ends precisely one column over the limit, do not
+  // reflow.
+  EXPECT_EQ("// some text that\n"
+            "// reflows\n"
+            "// into1234567\n",
+            format("// some text that reflows\n"
+                   "// into1234567\n",
+                   getLLVMStyleWithColumns(21)));
+  // Secondly, when the next line ends later, but the first word in that line
+  // is precisely one column over the limit, do not reflow.
+  EXPECT_EQ("// some text that\n"
+            "// reflows\n"
+            "// into1234567 f\n",
+            format("// some text that reflows\n"
+                   "// into1234567 f\n",
+                   getLLVMStyleWithColumns(21)));
+}
+
+TEST_F(FormatTestComments, ReflowsCommentsWithExtraWhitespace) {
+  // Baseline.
+  EXPECT_EQ("// some text\n"
+            "// that re flows\n",
+            format("// some text that\n"
+                   "// re flows\n",
+                   getLLVMStyleWithColumns(16)));
+  EXPECT_EQ("// some text\n"
+            "// that re flows\n",
+            format("// some text that\n"
+                   "// re    flows\n",
+                   getLLVMStyleWithColumns(16)));
+  EXPECT_EQ("/* some text\n"
+            " * that re flows\n"
+            " */\n",
+            format("/* some text that\n"
+                   "*      re       flows\n"
+                   "*/\n",
+                   getLLVMStyleWithColumns(16)));
+  // FIXME: We do not reflow if the indent of two subsequent lines differs;
+  // given that this is different behavior from block comments, do we want
+  // to keep this?
+  EXPECT_EQ("// some text\n"
+            "// that\n"
+            "//     re flows\n",
+            format("// some text that\n"
+                   "//     re       flows\n",
+                   getLLVMStyleWithColumns(16)));
+  // Space within parts of a line that fit.
+  // FIXME: Use the earliest possible split while reflowing to compress the
+  // whitespace within the line.
+  EXPECT_EQ("// some text that\n"
+            "// does re   flow\n"
+            "// more  here\n",
+            format("// some text that does\n"
+                   "// re   flow  more  here\n",
+                   getLLVMStyleWithColumns(21)));
+}
+
 TEST_F(FormatTestComments, IgnoresIf0Contents) {
   EXPECT_EQ("#if 0\n"
             "}{)(&*(^%%#%@! fsadj f;ldjs ,:;| <<<>>>][)(][\n"
@@ -2484,6 +2564,7 @@ TEST_F(FormatTestComments, BreaksAfterMu
       "         long */\n"
       "      b);",
       format("a = f(a, /* long long */ b);", getLLVMStyleWithColumns(16)));
+
   EXPECT_EQ(
       "a = f(\n"
       "    a,\n"
@@ -2888,7 +2969,7 @@ TEST_F(FormatTestComments, AlignsBlockCo
                    getLLVMStyleWithColumns(20)));
 }
 
-TEST_F(FormatTestComments, NoCrush_Bug34236) {
+TEST_F(FormatTestComments, NoCrash_Bug34236) {
   // This is a test case from a crasher reported in:
   // https://bugs.llvm.org/show_bug.cgi?id=34236
   // Temporarily disable formatting for readability.
@@ -2896,8 +2977,7 @@ TEST_F(FormatTestComments, NoCrush_Bug34
   EXPECT_EQ(
 "/*                                                                */ /*\n"
 "                                                                      *       a\n"
-"                                                                      * b c\n"
-"                                                                      * d*/",
+"                                                                      * b c d*/",
       format(
 "/*                                                                */ /*\n"
 " *       a b\n"