r293055 - [clang-format] Implement comment reflowing.

Wed Jan 25 05:59:00 PST 2017

Author: krasimir
Date: Wed Jan 25 07:58:58 2017
New Revision: 293055

URL: http://llvm.org/viewvc/llvm-project?rev=293055&view=rev
Log:
[clang-format] Implement comment reflowing.

Summary:
This presents a version of the comment reflowing with less mutable state inside
the comment breakable token subclasses. The state has been pushed into the
driving breakProtrudingToken method. For this, the API of BreakableToken is enriched
by the methods getSplitBefore and getLineLengthAfterSplitBefore.

Reviewers: klimek

Reviewed By: klimek

Subscribers: djasper, klimek, mgorny, cfe-commits, ioeric

Differential Revision: https://reviews.llvm.org/D28764

Removed:
    cfe/trunk/lib/Format/Comments.cpp
    cfe/trunk/lib/Format/Comments.h
Modified:
    cfe/trunk/lib/Format/BreakableToken.cpp
    cfe/trunk/lib/Format/BreakableToken.h
    cfe/trunk/lib/Format/CMakeLists.txt
    cfe/trunk/lib/Format/ContinuationIndenter.cpp
    cfe/trunk/lib/Format/TokenAnnotator.cpp
    cfe/trunk/lib/Format/UnwrappedLineParser.cpp
    cfe/trunk/lib/Format/WhitespaceManager.cpp
    cfe/trunk/unittests/Format/FormatTest.cpp
    cfe/trunk/unittests/Format/FormatTestSelective.cpp

Modified: cfe/trunk/lib/Format/BreakableToken.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/BreakableToken.cpp?rev=293055&r1=293054&r2=293055&view=diff
==============================================================================

--- cfe/trunk/lib/Format/BreakableToken.cpp (original)
+++ cfe/trunk/lib/Format/BreakableToken.cpp Wed Jan 25 07:58:58 2017
@@ -14,7 +14,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "BreakableToken.h"
-#include "Comments.h"
+#include "ContinuationIndenter.h"
 #include "clang/Basic/CharInfo.h"
 #include "clang/Format/Format.h"
 #include "llvm/ADT/STLExtras.h"
@@ -40,6 +40,21 @@ static bool IsBlank(char C) {
   }
 }
 
+static StringRef getLineCommentIndentPrefix(StringRef Comment) {
+  static const char *const KnownPrefixes[] = {"///", "//", "//!"};
+  StringRef LongestPrefix;
+  for (StringRef KnownPrefix : KnownPrefixes) {
+    if (Comment.startswith(KnownPrefix)) {
+      size_t PrefixLength = KnownPrefix.size();
+      while (PrefixLength < Comment.size() && Comment[PrefixLength] == ' ')
+        ++PrefixLength;
+      if (PrefixLength > LongestPrefix.size())
+        LongestPrefix = Comment.substr(0, PrefixLength);
+    }
+  }
+  return LongestPrefix;
+}
+
 static BreakableToken::Split getCommentSplit(StringRef Text,
                                              unsigned ContentStartColumn,
                                              unsigned ColumnLimit,
@@ -132,12 +147,35 @@ getStringSplit(StringRef Text, unsigned
   return BreakableToken::Split(StringRef::npos, 0);
 }
 
+bool switchesFormatting(const FormatToken &Token) {
+  assert((Token.is(TT_BlockComment) || Token.is(TT_LineComment)) &&
+         "formatting regions are switched by comment tokens");
+  StringRef Content = Token.TokenText.substr(2).ltrim();
+  return Content.startswith("clang-format on") ||
+         Content.startswith("clang-format off");
+}
+
+unsigned
+BreakableToken::getLineLengthAfterCompression(unsigned RemainingTokenColumns,
+                                              Split Split) const {
+  // Example: consider the content
+  // lala  lala
+  // - RemainingTokenColumns is the original number of columns, 10;
+  // - Split is (4, 2), denoting the two spaces between the two words;
+  //
+  // We compute the number of columns when the split is compressed into a single
+  // space, like:
+  // lala lala
+  return RemainingTokenColumns + 1 - Split.second;
+}
+
 unsigned BreakableSingleLineToken::getLineCount() const { return 1; }
 
 unsigned BreakableSingleLineToken::getLineLengthAfterSplit(
-    unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {
+    unsigned LineIndex, unsigned TailOffset,
+    StringRef::size_type Length) const {
   return StartColumn + Prefix.size() + Postfix.size() +
-         encoding::columnWidthWithTabs(Line.substr(Offset, Length),
+         encoding::columnWidthWithTabs(Line.substr(TailOffset, Length),
                                        StartColumn + Prefix.size(),
                                        Style.TabWidth, Encoding);
 }
@@ -183,71 +221,123 @@ void BreakableStringLiteral::insertBreak
       Prefix, InPPDirective, 1, IndentLevel, LeadingSpaces);
 }
 
-BreakableLineComment::BreakableLineComment(
-    const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn,
-    bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style)
-    : BreakableSingleLineToken(Token, IndentLevel, StartColumn,
-                               getLineCommentIndentPrefix(Token.TokenText), "",
-                               InPPDirective, Encoding, Style) {
-  OriginalPrefix = Prefix;
-  if (Token.TokenText.size() > Prefix.size() &&
-      isAlphanumeric(Token.TokenText[Prefix.size()])) {
-    if (Prefix == "//")
-      Prefix = "// ";
-    else if (Prefix == "///")
-      Prefix = "/// ";
-    else if (Prefix == "//!")
-      Prefix = "//! ";
-  }
-}
-
-BreakableToken::Split
-BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset,
-                               unsigned ColumnLimit) const {
-  return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(),
+BreakableComment::BreakableComment(const FormatToken &Token,
+                                   unsigned IndentLevel, unsigned StartColumn,
+                                   unsigned OriginalStartColumn,
+                                   bool FirstInLine, bool InPPDirective,
+                                   encoding::Encoding Encoding,
+                                   const FormatStyle &Style)
+    : BreakableToken(Token, IndentLevel, InPPDirective, Encoding, Style),
+      StartColumn(StartColumn), OriginalStartColumn(OriginalStartColumn),
+      FirstInLine(FirstInLine) {}
+
+unsigned BreakableComment::getLineCount() const { return Lines.size(); }
+
+BreakableToken::Split BreakableComment::getSplit(unsigned LineIndex,
+                                                 unsigned TailOffset,
+                                                 unsigned ColumnLimit) const {
+  return getCommentSplit(Content[LineIndex].substr(TailOffset),
+                         getContentStartColumn(LineIndex, TailOffset),
                          ColumnLimit, Style.TabWidth, Encoding);
 }
 
-void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
-                                       Split Split,
-                                       WhitespaceManager &Whitespaces) {
-  Whitespaces.replaceWhitespaceInToken(
-      Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second,
-      Postfix, Prefix, InPPDirective, /*Newlines=*/1, IndentLevel, StartColumn);
-}
-
-void BreakableLineComment::replaceWhitespace(unsigned LineIndex,
-                                             unsigned TailOffset, Split Split,
-                                             WhitespaceManager &Whitespaces) {
+void BreakableComment::compressWhitespace(unsigned LineIndex,
+                                          unsigned TailOffset, Split Split,
+                                          WhitespaceManager &Whitespaces) {
+  StringRef Text = Content[LineIndex].substr(TailOffset);
+  // Text is relative to the content line, but Whitespaces operates relative to
+  // the start of the corresponding token, so compute the start of the Split
+  // that needs to be compressed into a single space relative to the start of
+  // its token.
+  unsigned BreakOffsetInToken =
+      Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
+  unsigned CharsToRemove = Split.second;
   Whitespaces.replaceWhitespaceInToken(
-      Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second, "",
-      "", /*InPPDirective=*/false, /*Newlines=*/0, /*IndentLevel=*/0,
-      /*Spaces=*/1);
+      tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", "",
+      /*InPPDirective=*/false,
+      /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1);
 }
 
-void BreakableLineComment::replaceWhitespaceBefore(
-    unsigned LineIndex, WhitespaceManager &Whitespaces) {
-  if (OriginalPrefix != Prefix) {
-    Whitespaces.replaceWhitespaceInToken(Tok, OriginalPrefix.size(), 0, "", "",
-                                         /*InPPDirective=*/false,
-                                         /*Newlines=*/0, /*IndentLevel=*/0,
-                                         /*Spaces=*/1);
-  }
+BreakableToken::Split
+BreakableComment::getReflowSplit(StringRef Text, StringRef ReflowPrefix,
+                                 unsigned PreviousEndColumn,
+                                 unsigned ColumnLimit) const {
+  unsigned ReflowStartColumn = PreviousEndColumn + ReflowPrefix.size();
+  StringRef TrimmedText = Text.rtrim(Blanks);
+  // This is the width of the resulting line in case the full line of Text gets
+  // reflown up starting at ReflowStartColumn.
+  unsigned FullWidth = ReflowStartColumn + encoding::columnWidthWithTabs(
+                                               TrimmedText, ReflowStartColumn,
+                                               Style.TabWidth, Encoding);
+  // If the full line fits up, we return a reflow split after it,
+  // otherwise we compute the largest piece of text that fits after
+  // ReflowStartColumn.
+  Split ReflowSplit =
+      FullWidth <= ColumnLimit
+          ? Split(TrimmedText.size(), Text.size() - TrimmedText.size())
+          : getCommentSplit(Text, ReflowStartColumn, ColumnLimit,
+                            Style.TabWidth, Encoding);
+
+  // We need to be extra careful here, because while it's OK to keep a long line
+  // if it can't be broken into smaller pieces (like when the first word of a
+  // long line is longer than the column limit), it's not OK to reflow that long
+  // word up. So we recompute the size of the previous line after reflowing and
+  // only return the reflow split if that's under the line limit.
+  if (ReflowSplit.first != StringRef::npos &&
+      // Check if the width of the newly reflown line is under the limit.
+      PreviousEndColumn + ReflowPrefix.size() +
+              encoding::columnWidthWithTabs(Text.substr(0, ReflowSplit.first),
+                                            PreviousEndColumn +
+                                                ReflowPrefix.size(),
+                                            Style.TabWidth, Encoding) <=
+          ColumnLimit) {
+    return ReflowSplit;
+  }
+  return Split(StringRef::npos, 0);
+}
+
+const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const {
+  return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok;
+}
+
+static bool mayReflowContent(StringRef Content) {
+  Content = Content.trim(Blanks);
+  // Simple heuristic for what to reflow: content should contain at least two
+  // characters and either the first or second character must be
+  // non-punctuation.
+  return Content.size() >= 2 && !Content.endswith("\\") &&
+         // Note that this is UTF-8 safe, since if isPunctuation(Content[0]) is
+         // true, then the first code point must be 1 byte long.
+         (!isPunctuation(Content[0]) || !isPunctuation(Content[1]));
+}
+
+bool BreakableComment::mayReflow(unsigned LineIndex) const {
+  return LineIndex > 0 && mayReflowContent(Content[LineIndex]) &&
+         !Tok.Finalized && !switchesFormatting(tokenAt(LineIndex)) &&
+         (!Tok.is(TT_LineComment) ||
+          OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1]);
 }
 
 BreakableBlockComment::BreakableBlockComment(
     const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn,
     unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
     encoding::Encoding Encoding, const FormatStyle &Style)
-    : BreakableToken(Token, IndentLevel, InPPDirective, Encoding, Style) {
-  StringRef TokenText(Token.TokenText);
+    : BreakableComment(Token, IndentLevel, StartColumn, OriginalStartColumn,
+                       FirstInLine, InPPDirective, Encoding, Style) {
+  assert(Tok.is(TT_BlockComment) &&
+         "block comment section must start with a block comment");
+
+  StringRef TokenText(Tok.TokenText);
   assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
   TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");
 
   int IndentDelta = StartColumn - OriginalStartColumn;
-  LeadingWhitespace.resize(Lines.size());
-  StartOfLineColumn.resize(Lines.size());
-  StartOfLineColumn[0] = StartColumn + 2;
+  Content.resize(Lines.size());
+  Content[0] = Lines[0];
+  ContentColumn.resize(Lines.size());
+  // Account for the initial '/*'.
+  ContentColumn[0] = StartColumn + 2;
+  Tokens.resize(Lines.size());
   for (size_t i = 1; i < Lines.size(); ++i)
     adjustWhitespace(i, IndentDelta);
 
@@ -262,19 +352,20 @@ BreakableBlockComment::BreakableBlockCom
   }
   for (size_t i = 1, e = Lines.size(); i < e && !Decoration.empty(); ++i) {
     // If the last line is empty, the closing "*/" will have a star.
-    if (i + 1 == e && Lines[i].empty())
+    if (i + 1 == e && Content[i].empty())
       break;
-    if (!Lines[i].empty() && i + 1 != e && Decoration.startswith(Lines[i]))
+    if (!Content[i].empty() && i + 1 != e &&
+        Decoration.startswith(Content[i]))
       continue;
-    while (!Lines[i].startswith(Decoration))
+    while (!Content[i].startswith(Decoration))
       Decoration = Decoration.substr(0, Decoration.size() - 1);
   }
 
   LastLineNeedsDecoration = true;
-  IndentAtLineBreak = StartOfLineColumn[0] + 1;
-  for (size_t i = 1; i < Lines.size(); ++i) {
-    if (Lines[i].empty()) {
-      if (i + 1 == Lines.size()) {
+  IndentAtLineBreak = ContentColumn[0] + 1;
+  for (size_t i = 1, e = Lines.size(); i < e; ++i) {
+    if (Content[i].empty()) {
+      if (i + 1 == e) {
         // Empty last line means that we already have a star as a part of the
         // trailing */. We also need to preserve whitespace, so that */ is
         // correctly indented.
@@ -282,7 +373,7 @@ BreakableBlockComment::BreakableBlockCom
       } else if (Decoration.empty()) {
         // For all other lines, set the start column to 0 if they're empty, so
         // we do not insert trailing whitespace anywhere.
-        StartOfLineColumn[i] = 0;
+        ContentColumn[i] = 0;
       }
       continue;
     }
@@ -290,21 +381,23 @@ BreakableBlockComment::BreakableBlockCom
     // The first line already excludes the star.
     // For all other lines, adjust the line to exclude the star and
     // (optionally) the first whitespace.
-    unsigned DecorationSize =
-        Decoration.startswith(Lines[i]) ? Lines[i].size() : Decoration.size();
-    StartOfLineColumn[i] += DecorationSize;
-    Lines[i] = Lines[i].substr(DecorationSize);
-    LeadingWhitespace[i] += DecorationSize;
-    if (!Decoration.startswith(Lines[i]))
+    unsigned DecorationSize = Decoration.startswith(Content[i])
+                                  ? Content[i].size()
+                                  : Decoration.size();
+    ContentColumn[i] += DecorationSize;
+    Content[i] = Content[i].substr(DecorationSize);
+    if (!Decoration.startswith(Content[i]))
       IndentAtLineBreak =
-          std::min<int>(IndentAtLineBreak, std::max(0, StartOfLineColumn[i]));
+          std::min<int>(IndentAtLineBreak, std::max(0, ContentColumn[i]));
   }
-  IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size());
+  IndentAtLineBreak =
+      std::max<unsigned>(IndentAtLineBreak, Decoration.size());
+
   DEBUG({
     llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n";
     for (size_t i = 0; i < Lines.size(); ++i) {
-      llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i]
-                   << "\n";
+      llvm::dbgs() << i << " |" << Content[i] << "| "
+                   << (Content[i].data() - Lines[i].data()) << "\n";
     }
   });
 }
@@ -334,78 +427,157 @@ void BreakableBlockComment::adjustWhites
 
   StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine);
   // Adjust Lines to only contain relevant text.
-  Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine);
-  Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine);
-  // Adjust LeadingWhitespace to account all whitespace between the lines
-  // to the current line.
-  LeadingWhitespace[LineIndex] =
-      Lines[LineIndex].begin() - Lines[LineIndex - 1].end();
+  size_t PreviousContentOffset =
+      Content[LineIndex - 1].data() - Lines[LineIndex - 1].data();
+  Content[LineIndex - 1] = Lines[LineIndex - 1].substr(
+      PreviousContentOffset, EndOfPreviousLine - PreviousContentOffset);
+  Content[LineIndex] = Lines[LineIndex].substr(StartOfLine);
 
   // Adjust the start column uniformly across all lines.
-  StartOfLineColumn[LineIndex] =
+  ContentColumn[LineIndex] =
       encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) +
       IndentDelta;
 }
 
-unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); }
-
 unsigned BreakableBlockComment::getLineLengthAfterSplit(
-    unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const {
-  unsigned ContentStartColumn = getContentStartColumn(LineIndex, Offset);
-  return ContentStartColumn +
-         encoding::columnWidthWithTabs(Lines[LineIndex].substr(Offset, Length),
-                                       ContentStartColumn, Style.TabWidth,
-                                       Encoding) +
-         // The last line gets a "*/" postfix.
-         (LineIndex + 1 == Lines.size() ? 2 : 0);
-}
-
-BreakableToken::Split
-BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset,
-                                unsigned ColumnLimit) const {
-  return getCommentSplit(Lines[LineIndex].substr(TailOffset),
-                         getContentStartColumn(LineIndex, TailOffset),
-                         ColumnLimit, Style.TabWidth, Encoding);
+    unsigned LineIndex, unsigned TailOffset,
+    StringRef::size_type Length) const {
+  unsigned ContentStartColumn = getContentStartColumn(LineIndex, TailOffset);
+  unsigned LineLength =
+      ContentStartColumn + encoding::columnWidthWithTabs(
+                               Content[LineIndex].substr(TailOffset, Length),
+                               ContentStartColumn, Style.TabWidth, Encoding);
+  // The last line gets a "*/" postfix.
+  if (LineIndex + 1 == Lines.size()) {
+    LineLength += 2;
+    // We never need a decoration when breaking just the trailing "*/" postfix.
+    // Note that checking that Length == 0 is not enough, since Length could
+    // also be StringRef::npos.
+    if (Content[LineIndex].substr(TailOffset, Length).empty()) {
+      LineLength -= Decoration.size();
+    }
+  }
+  return LineLength;
 }
 
 void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
                                         Split Split,
                                         WhitespaceManager &Whitespaces) {
-  StringRef Text = Lines[LineIndex].substr(TailOffset);
+  StringRef Text = Content[LineIndex].substr(TailOffset);
   StringRef Prefix = Decoration;
+  // We need this to account for the case when we have a decoration "* " for all
+  // the lines except for the last one, where the star in "*/" acts as a
+  // decoration.
+  unsigned LocalIndentAtLineBreak = IndentAtLineBreak;
   if (LineIndex + 1 == Lines.size() &&
       Text.size() == Split.first + Split.second) {
     // For the last line we need to break before "*/", but not to add "* ".
     Prefix = "";
+    if (LocalIndentAtLineBreak >= 2)
+      LocalIndentAtLineBreak -= 2;
   }
-
-  unsigned BreakOffsetInToken =
-      Text.data() - Tok.TokenText.data() + Split.first;
-  unsigned CharsToRemove = Split.second;
-  assert(IndentAtLineBreak >= Decoration.size());
-  Whitespaces.replaceWhitespaceInToken(
-      Tok, BreakOffsetInToken, CharsToRemove, "", Prefix, InPPDirective, 1,
-      IndentLevel, IndentAtLineBreak - Decoration.size());
-}
-
-void BreakableBlockComment::replaceWhitespace(unsigned LineIndex,
-                                              unsigned TailOffset, Split Split,
-                                              WhitespaceManager &Whitespaces) {
-  StringRef Text = Lines[LineIndex].substr(TailOffset);
+  // The split offset is from the beginning of the line. Convert it to an offset
+  // from the beginning of the token text.
   unsigned BreakOffsetInToken =
-      Text.data() - Tok.TokenText.data() + Split.first;
+      Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
   unsigned CharsToRemove = Split.second;
+  assert(LocalIndentAtLineBreak >= Prefix.size());
   Whitespaces.replaceWhitespaceInToken(
-      Tok, BreakOffsetInToken, CharsToRemove, "", "", /*InPPDirective=*/false,
-      /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1);
+      tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", Prefix,
+      InPPDirective, /*Newlines=*/1, IndentLevel,
+      /*Spaces=*/LocalIndentAtLineBreak - Prefix.size());
+}
+
+BreakableToken::Split BreakableBlockComment::getSplitBefore(
+    unsigned LineIndex,
+    unsigned PreviousEndColumn,
+    unsigned ColumnLimit) const {
+  if (!mayReflow(LineIndex))
+    return Split(StringRef::npos, 0);
+  StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
+  return getReflowSplit(TrimmedContent, ReflowPrefix, PreviousEndColumn,
+                        ColumnLimit);
+}
+
+unsigned BreakableBlockComment::getReflownColumn(
+    StringRef Content,
+    unsigned LineIndex,
+    unsigned PreviousEndColumn) const {
+    unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size();
+    // If this is the last line, it will carry around its '*/' postfix.
+    unsigned PostfixLength = (LineIndex + 1 == Lines.size() ? 2 : 0);
+    // The line is composed of previous text, reflow prefix, reflown text and
+    // postfix.
+    unsigned ReflownColumn =
+        StartColumn + encoding::columnWidthWithTabs(Content, StartColumn,
+                                                    Style.TabWidth, Encoding) +
+        PostfixLength;
+    return ReflownColumn;
+}
+
+unsigned BreakableBlockComment::getLineLengthAfterSplitBefore(
+    unsigned LineIndex, unsigned TailOffset,
+    unsigned PreviousEndColumn,
+    unsigned ColumnLimit,
+    Split SplitBefore) const {
+    if (SplitBefore.first == StringRef::npos ||
+        SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) {
+      // A piece of line, not the whole, gets reflown.
+      return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
+  } else {
+    // The whole line gets reflown, need to check if we need to insert a break
+    // for the postfix or not.
+    StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
+    unsigned ReflownColumn =
+        getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn);
+    if (ReflownColumn <= ColumnLimit) {
+      return ReflownColumn;
+    }
+    return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
+  }
 }
-
 void BreakableBlockComment::replaceWhitespaceBefore(
-    unsigned LineIndex, WhitespaceManager &Whitespaces) {
-  if (LineIndex == 0)
+    unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,
+    Split SplitBefore, WhitespaceManager &Whitespaces) {
+  if (LineIndex == 0) return;
+  StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
+  if (SplitBefore.first != StringRef::npos) {
+    // Here we need to reflow.
+    assert(Tokens[LineIndex - 1] == Tokens[LineIndex] &&
+           "Reflowing whitespace within a token");
+    // This is the offset of the end of the last line relative to the start of
+    // the token text in the token.
+    unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
+        Content[LineIndex - 1].size() -
+        tokenAt(LineIndex).TokenText.data();
+    unsigned WhitespaceLength = TrimmedContent.data() -
+        tokenAt(LineIndex).TokenText.data() -
+        WhitespaceOffsetInToken;
+    Whitespaces.replaceWhitespaceInToken(
+        tokenAt(LineIndex), WhitespaceOffsetInToken,
+        /*ReplaceChars=*/WhitespaceLength,
+        /*PreviousPostfix=*/"",
+        /*CurrentPrefix=*/ReflowPrefix, InPPDirective,
+        /*Newlines=*/0, IndentLevel, /*Spaces=*/0);
+    // Check if we need to also insert a break at the whitespace range.
+    // For this we first adapt the reflow split relative to the beginning of the
+    // content.
+    // Note that we don't need a penalty for this break, since it doesn't change
+    // the total number of lines.
+    Split BreakSplit = SplitBefore;
+    BreakSplit.first += TrimmedContent.data() - Content[LineIndex].data();
+    unsigned ReflownColumn =
+        getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn);
+    if (ReflownColumn > ColumnLimit) {
+      insertBreak(LineIndex, 0, BreakSplit, Whitespaces);
+    }
     return;
+  }
+
+  // Here no reflow with the previous line will happen.
+  // Fix the decoration of the line at LineIndex.
   StringRef Prefix = Decoration;
-  if (Lines[LineIndex].empty()) {
+  if (Content[LineIndex].empty()) {
     if (LineIndex + 1 == Lines.size()) {
       if (!LastLineNeedsDecoration) {
         // If the last line was empty, we don't need a prefix, as the */ will
@@ -418,19 +590,23 @@ void BreakableBlockComment::replaceWhite
       Prefix = Prefix.substr(0, 1);
     }
   } else {
-    if (StartOfLineColumn[LineIndex] == 1) {
+    if (ContentColumn[LineIndex] == 1) {
       // This line starts immediately after the decorating *.
       Prefix = Prefix.substr(0, 1);
     }
   }
-
-  unsigned WhitespaceOffsetInToken = Lines[LineIndex].data() -
-                                     Tok.TokenText.data() -
-                                     LeadingWhitespace[LineIndex];
+  // This is the offset of the end of the last line relative to the start of the
+  // token text in the token.
+  unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
+                                     Content[LineIndex - 1].size() -
+                                     tokenAt(LineIndex).TokenText.data();
+  unsigned WhitespaceLength = Content[LineIndex].data() -
+                              tokenAt(LineIndex).TokenText.data() -
+                              WhitespaceOffsetInToken;
   Whitespaces.replaceWhitespaceInToken(
-      Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix,
-      InPPDirective, 1, IndentLevel,
-      StartOfLineColumn[LineIndex] - Prefix.size());
+      tokenAt(LineIndex), WhitespaceOffsetInToken, WhitespaceLength, "",
+      Prefix, InPPDirective, /*Newlines=*/1, IndentLevel,
+      ContentColumn[LineIndex] - Prefix.size());
 }
 
 unsigned
@@ -439,7 +615,219 @@ BreakableBlockComment::getContentStartCo
   // If we break, we always break at the predefined indent.
   if (TailOffset != 0)
     return IndentAtLineBreak;
-  return std::max(0, StartOfLineColumn[LineIndex]);
+  return std::max(0, ContentColumn[LineIndex]);
+}
+
+BreakableLineCommentSection::BreakableLineCommentSection(
+    const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn,
+    unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
+    encoding::Encoding Encoding, const FormatStyle &Style)
+    : BreakableComment(Token, IndentLevel, StartColumn, OriginalStartColumn,
+                       FirstInLine, InPPDirective, Encoding, Style) {
+  assert(Tok.is(TT_LineComment) &&
+         "line comment section must start with a line comment");
+  FormatToken *LineTok = nullptr;
+  for (const FormatToken *CurrentTok = &Tok;
+       CurrentTok && CurrentTok->is(TT_LineComment);
+       CurrentTok = CurrentTok->Next) {
+    LastLineTok = LineTok;
+    StringRef TokenText(CurrentTok->TokenText);
+    assert(TokenText.startswith("//"));
+    size_t FirstLineIndex = Lines.size();
+    TokenText.split(Lines, "\n");
+    Content.resize(Lines.size());
+    ContentColumn.resize(Lines.size());
+    OriginalContentColumn.resize(Lines.size());
+    Tokens.resize(Lines.size());
+    Prefix.resize(Lines.size());
+    OriginalPrefix.resize(Lines.size());
+    for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) {
+      StringRef IndentPrefix = getLineCommentIndentPrefix(Lines[i]);
+      OriginalPrefix[i] = Prefix[i] = IndentPrefix;
+      if (Lines[i].size() > Prefix[i].size() &&
+          isAlphanumeric(Lines[i][Prefix[i].size()])) {
+        if (Prefix[i] == "//")
+          Prefix[i] = "// ";
+        else if (Prefix[i] == "///")
+          Prefix[i] = "/// ";
+        else if (Prefix[i] == "//!")
+          Prefix[i] = "//! ";
+      }
+
+      Tokens[i] = LineTok;
+      Content[i] = Lines[i].substr(IndentPrefix.size());
+      OriginalContentColumn[i] =
+          StartColumn +
+          encoding::columnWidthWithTabs(OriginalPrefix[i],
+                                        StartColumn,
+                                        Style.TabWidth,
+                                        Encoding);
+      ContentColumn[i] =
+          StartColumn +
+          encoding::columnWidthWithTabs(Prefix[i],
+                                        StartColumn,
+                                        Style.TabWidth,
+                                        Encoding);
+
+      // Calculate the end of the non-whitespace text in this line.
+      size_t EndOfLine = Content[i].find_last_not_of(Blanks);
+      if (EndOfLine == StringRef::npos)
+        EndOfLine = Content[i].size();
+      else
+        ++EndOfLine;
+      Content[i] = Content[i].substr(0, EndOfLine);
+    }
+    LineTok = CurrentTok->Next;
+  }
+}
+
+unsigned BreakableLineCommentSection::getLineLengthAfterSplit(
+    unsigned LineIndex, unsigned TailOffset,
+    StringRef::size_type Length) const {
+  unsigned ContentStartColumn =
+      (TailOffset == 0 ? ContentColumn[LineIndex]
+                       : OriginalContentColumn[LineIndex]);
+  return ContentStartColumn + encoding::columnWidthWithTabs(
+                                  Content[LineIndex].substr(TailOffset, Length),
+                                  ContentStartColumn, Style.TabWidth, Encoding);
+}
+
+void BreakableLineCommentSection::insertBreak(unsigned LineIndex,
+                                              unsigned TailOffset, Split Split,
+                                              WhitespaceManager &Whitespaces) {
+  StringRef Text = Content[LineIndex].substr(TailOffset);
+  // Compute the offset of the split relative to the beginning of the token
+  // text.
+  unsigned BreakOffsetInToken =
+      Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
+  unsigned CharsToRemove = Split.second;
+  // Compute the size of the new indent, including the size of the new prefix of
+  // the newly broken line.
+  unsigned IndentAtLineBreak = OriginalContentColumn[LineIndex] +
+                               Prefix[LineIndex].size() -
+                               OriginalPrefix[LineIndex].size();
+  assert(IndentAtLineBreak >= Prefix[LineIndex].size());
+  Whitespaces.replaceWhitespaceInToken(
+      tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",
+      Prefix[LineIndex], InPPDirective, /*Newlines=*/1, IndentLevel,
+      /*Spaces=*/IndentAtLineBreak - Prefix[LineIndex].size());
+}
+
+BreakableComment::Split BreakableLineCommentSection::getSplitBefore(
+    unsigned LineIndex,
+    unsigned PreviousEndColumn,
+    unsigned ColumnLimit) const {
+  if (!mayReflow(LineIndex)) return Split(StringRef::npos, 0);
+  return getReflowSplit(Content[LineIndex], ReflowPrefix, PreviousEndColumn,
+                        ColumnLimit);
+}
+
+unsigned BreakableLineCommentSection::getLineLengthAfterSplitBefore(
+    unsigned LineIndex, unsigned TailOffset,
+    unsigned PreviousEndColumn,
+    unsigned ColumnLimit,
+    Split SplitBefore) const {
+  if (SplitBefore.first == StringRef::npos ||
+      SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) {
+    // A piece of line, not the whole line, gets reflown.
+    return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
+  } else {
+    // The whole line gets reflown.
+    unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size();
+    return StartColumn + encoding::columnWidthWithTabs(Content[LineIndex],
+                                                       StartColumn,
+                                                       Style.TabWidth,
+                                                       Encoding);
+  }
+}
+
+void BreakableLineCommentSection::replaceWhitespaceBefore(
+    unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit,
+    Split SplitBefore, WhitespaceManager &Whitespaces) {
+  // If this is the first line of a token, we need to inform Whitespace Manager
+  // about it: either adapt the whitespace range preceding it, or mark it as an
+  // untouchable token.
+  // This happens for instance here:
+  // // line 1 \
+  // // line 2
+  if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
+    if (SplitBefore.first != StringRef::npos) {
+      // Reflow happens between tokens. Replace the whitespace between the
+      // tokens by the empty string.
+      Whitespaces.replaceWhitespace(*Tokens[LineIndex],
+                                    /*Newlines=*/0,
+                                    /*IndentLevel=*/IndentLevel,
+                                    /*Spaces=*/0,
+                                    /*StartOfTokenColumn=*/StartColumn,
+                                    /*InPPDirective=*/false);
+      // Replace the indent and prefix of the token with the reflow prefix.
+      unsigned WhitespaceLength =
+          Content[LineIndex].data() - tokenAt(LineIndex).TokenText.data();
+      Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex],
+                                           /*Offset=*/0,
+                                           /*ReplaceChars=*/WhitespaceLength,
+                                           /*PreviousPostfix=*/"",
+                                           /*CurrentPrefix=*/ReflowPrefix,
+                                           /*InPPDirective=*/false,
+                                           /*Newlines=*/0,
+                                           /*IndentLevel=*/IndentLevel,
+                                           /*Spaces=*/0);
+    } else {
+      // This is the first line for the current token, but no reflow with the
+      // previous token is necessary. However, we still may need to adjust the
+      // start column.
+      unsigned LineColumn =
+          ContentColumn[LineIndex] -
+          (Content[LineIndex].data() - Lines[LineIndex].data());
+      if (tokenAt(LineIndex).OriginalColumn != LineColumn) {
+        Whitespaces.replaceWhitespace(*Tokens[LineIndex],
+                                      /*Newlines=*/1,
+                                      /*IndentLevel=*/IndentLevel,
+                                      /*Spaces=*/LineColumn,
+                                      /*StartOfTokenColumn=*/LineColumn,
+                                      /*InPPDirective=*/false);
+      } else {
+        // The whitespace preceding the first line of this token does not need
+        // to be touched.
+        Whitespaces.addUntouchableToken(tokenAt(LineIndex),
+                                        /*InPPDirective=*/false);
+      }
+    }
+  } else if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) {
+    // This is not the first line of the token. Adjust the prefix if necessary.
+
+    // Take care of the space possibly introduced after a decoration.
+    assert(Prefix[LineIndex] == (OriginalPrefix[LineIndex] + " ").str() &&
+           "Expecting a block comment decoration to differ from original by "
+           "at most a space");
+    Whitespaces.replaceWhitespaceInToken(
+        tokenAt(LineIndex), OriginalPrefix[LineIndex].size(), 0, "", "",
+        /*InPPDirective=*/false,
+        /*Newlines=*/0, /*IndentLevel=*/0,
+        /*Spaces=*/1);
+  }
+  // Add a break after a reflow split has been introduced, if necessary.
+  // Note that this break doesn't need to be penalized, since it doesn't change
+  // the number of lines.
+  if (SplitBefore.first != StringRef::npos &&
+      SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) {
+    insertBreak(LineIndex, 0, SplitBefore, Whitespaces);
+  }
+}
+
+void BreakableLineCommentSection::updateNextToken(LineState& State) const {
+  if (LastLineTok) {
+    State.NextToken = LastLineTok->Next;
+  }
+}
+
+unsigned
+BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex,
+                                                   unsigned TailOffset) const {
+  if (TailOffset != 0) {
+    return OriginalContentColumn[LineIndex];
+  }
+  return ContentColumn[LineIndex];
 }
 
 } // namespace format

Modified: cfe/trunk/lib/Format/BreakableToken.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/BreakableToken.h?rev=293055&r1=293054&r2=293055&view=diff
==============================================================================
--- cfe/trunk/lib/Format/BreakableToken.h (original)
+++ cfe/trunk/lib/Format/BreakableToken.h Wed Jan 25 07:58:58 2017
@@ -8,9 +8,10 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// \brief Declares BreakableToken, BreakableStringLiteral, and
-/// BreakableBlockComment classes, that contain token type-specific logic to
-/// break long lines in tokens.
+/// \brief Declares BreakableToken, BreakableStringLiteral, BreakableComment,
+/// BreakableBlockComment and BreakableLineCommentSection classes, that contain
+/// token type-specific logic to break long lines in tokens and reflow content
+/// between tokens.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -25,10 +26,43 @@
 namespace clang {
 namespace format {
 
+/// \brief Checks if \p Token switches formatting, like /* clang-format off */.
+/// \p Token must be a comment.
+bool switchesFormatting(const FormatToken &Token);
+
 struct FormatStyle;
 
 /// \brief Base class for strategies on how to break tokens.
 ///
+/// This is organised around the concept of a \c Split, which is a whitespace
+/// range that signifies a position of the content of a token where a
+/// reformatting might be done. Operating with splits is divided into 3
+/// operations:
+/// - getSplit, for finding a split starting at a position,
+/// - getLineLengthAfterSplit, for calculating the size in columns of the rest
+///   of the content after a split has been used for breaking, and
+/// - insertBreak, for executing the split using a whitespace manager.
+///
+/// There is a pair of operations that are used to compress a long whitespace
+/// range with a single space if that will bring the line lenght under the
+/// column limit:
+/// - getLineLengthAfterCompression, for calculating the size in columns of the
+///   line after a whitespace range has been compressed, and
+/// - compressWhitespace, for executing the whitespace compression using a
+///   whitespace manager; note that the compressed whitespace may be in the
+///   middle of the original line and of the reformatted line.
+///
+/// For tokens where the whitespace before each line needs to be also
+/// reformatted, for example for tokens supporting reflow, there are analogous
+/// operations that might be executed before the main line breaking occurs:
+/// - getSplitBefore, for finding a split such that the content preceding it
+///   needs to be specially reflown,
+/// - getLineLengthAfterSplitBefore, for calculating the line length in columns
+///   of the remainder of the content after the beginning of the content has
+///   been reformatted, and
+/// - replaceWhitespaceBefore, for executing the reflow using a whitespace
+///   manager.
+///
 /// FIXME: The interface seems set in stone, so we might want to just pull the
 /// strategy into the class, instead of controlling it from the outside.
 class BreakableToken {
@@ -42,13 +76,13 @@ public:
   virtual unsigned getLineCount() const = 0;
 
   /// \brief Returns the number of columns required to format the piece of line
-  /// at \p LineIndex, from byte offset \p Offset with length \p Length.
+  /// at \p LineIndex, from byte offset \p TailOffset with length \p Length.
   ///
-  /// Note that previous breaks are not taken into account. \p Offset is always
-  /// specified from the start of the (original) line.
+  /// Note that previous breaks are not taken into account. \p TailOffset is
+  /// always specified from the start of the (original) line.
   /// \p Length can be set to StringRef::npos, which means "to the end of line".
   virtual unsigned
-  getLineLengthAfterSplit(unsigned LineIndex, unsigned Offset,
+  getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
                           StringRef::size_type Length) const = 0;
 
   /// \brief Returns a range (offset, length) at which to break the line at
@@ -61,16 +95,59 @@ public:
   virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
                            WhitespaceManager &Whitespaces) = 0;
 
+  /// \brief Returns the number of columns required to format the piece of line
+  /// at \p LineIndex, from byte offset \p TailOffset after the whitespace range
+  /// \p Split has been compressed into a single space.
+  unsigned getLineLengthAfterCompression(unsigned RemainingTokenColumns,
+                                         Split Split) const;
+
   /// \brief Replaces the whitespace range described by \p Split with a single
   /// space.
-  virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset,
-                                 Split Split,
-                                 WhitespaceManager &Whitespaces) = 0;
+  virtual void compressWhitespace(unsigned LineIndex, unsigned TailOffset,
+                                  Split Split,
+                                  WhitespaceManager &Whitespaces) = 0;
+
+  /// \brief Returns a whitespace range (offset, length) of the content at
+  /// \p LineIndex such that the content preceding this range needs to be
+  /// reformatted before any breaks are made to this line.
+  ///
+  /// \p PreviousEndColumn is the end column of the previous line after
+  /// formatting.
+  ///
+  /// A result having offset == StringRef::npos means that no piece of the line
+  /// needs to be reformatted before any breaks are made.
+  virtual Split getSplitBefore(unsigned LineIndex,
+                               unsigned PreviousEndColumn,
+                               unsigned ColumnLimit) const {
+    return Split(StringRef::npos, 0);
+  }
+
+  /// \brief Returns the number of columns required to format the piece of line
+  /// at \p LineIndex after the content preceding the whitespace range specified
+  /// \p SplitBefore has been reformatted, but before any breaks are made to
+  /// this line.
+  virtual unsigned getLineLengthAfterSplitBefore(unsigned LineIndex,
+                                         unsigned TailOffset,
+                                         unsigned PreviousEndColumn,
+                                         unsigned ColumnLimit,
+                                         Split SplitBefore) const {
+    return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
+  }
 
   /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex.
+  /// Performs a reformatting of the content at \p LineIndex preceding the
+  /// whitespace range \p SplitBefore.
   virtual void replaceWhitespaceBefore(unsigned LineIndex,
+                                       unsigned PreviousEndColumn,
+                                       unsigned ColumnLimit,
+                                       Split SplitBefore,
                                        WhitespaceManager &Whitespaces) {}
 
+  /// \brief Updates the next token of \p State to the next token after this
+  /// one. This can be used when this token manages a set of underlying tokens
+  /// as a unit and is responsible for the formatting of the them.
+  virtual void updateNextToken(LineState &State) const {}
+
 protected:
   BreakableToken(const FormatToken &Tok, unsigned IndentLevel,
                  bool InPPDirective, encoding::Encoding Encoding,
@@ -126,98 +203,143 @@ public:
                  unsigned ColumnLimit) const override;
   void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
                    WhitespaceManager &Whitespaces) override;
-  void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
-                         WhitespaceManager &Whitespaces) override {}
+  void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
+                          WhitespaceManager &Whitespaces) override {}
 };
 
-class BreakableLineComment : public BreakableSingleLineToken {
-public:
-  /// \brief Creates a breakable token for a line comment.
+class BreakableComment : public BreakableToken {
+protected:
+  /// \brief Creates a breakable token for a comment.
   ///
   /// \p StartColumn specifies the column in which the comment will start
-  /// after formatting.
-  BreakableLineComment(const FormatToken &Token, unsigned IndentLevel,
-                       unsigned StartColumn, bool InPPDirective,
-                       encoding::Encoding Encoding, const FormatStyle &Style);
+  /// after formatting, while \p OriginalStartColumn specifies in which
+  /// column the comment started before formatting.
+  /// If the comment starts a line after formatting, set \p FirstInLine to true.
+  BreakableComment(const FormatToken &Token, unsigned IndentLevel,
+                   unsigned StartColumn, unsigned OriginalStartColumn,
+                   bool FirstInLine, bool InPPDirective,
+                   encoding::Encoding Encoding, const FormatStyle &Style);
 
+public:
+  unsigned getLineCount() const override;
   Split getSplit(unsigned LineIndex, unsigned TailOffset,
                  unsigned ColumnLimit) const override;
-  void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
-                   WhitespaceManager &Whitespaces) override;
-  void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
-                         WhitespaceManager &Whitespaces) override;
-  void replaceWhitespaceBefore(unsigned LineIndex,
-                               WhitespaceManager &Whitespaces) override;
+  void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
+                          WhitespaceManager &Whitespaces) override;
 
-private:
-  // The prefix without an additional space if one was added.
-  StringRef OriginalPrefix;
+protected:
+  virtual unsigned getContentStartColumn(unsigned LineIndex,
+                                         unsigned TailOffset) const = 0;
+
+  // Returns a split that divides Text into a left and right parts, such that
+  // the left part is suitable for reflowing after PreviousEndColumn.
+  Split getReflowSplit(StringRef Text, StringRef ReflowPrefix,
+                       unsigned PreviousEndColumn, unsigned ColumnLimit) const;
+
+  // Returns the token containing the line at LineIndex.
+  const FormatToken &tokenAt(unsigned LineIndex) const;
+
+  // Checks if the content of line LineIndex may be reflown with the previous
+  // line.
+  bool mayReflow(unsigned LineIndex) const;
+
+  // Contains the original text of the lines of the block comment.
+  //
+  // In case of a block comments, excludes the leading /* in the first line and
+  // trailing */ in the last line. In case of line comments, excludes the
+  // leading // and spaces.
+  SmallVector<StringRef, 16> Lines;
+
+  // Contains the text of the lines excluding all leading and trailing
+  // whitespace between the lines. Note that the decoration (if present) is also
+  // not considered part of the text.
+  SmallVector<StringRef, 16> Content;
+
+  // Tokens[i] contains a reference to the token containing Lines[i] if the
+  // whitespace range before that token is managed by this block.
+  // Otherwise, Tokens[i] is a null pointer.
+  SmallVector<FormatToken *, 16> Tokens;
+
+  // ContentColumn[i] is the target column at which Content[i] should be.
+  // Note that this excludes a leading "* " or "*" in case of block comments
+  // where all lines have a "*" prefix, or the leading "// " or "//" in case of
+  // line comments.
+  //
+  // In block comments, the first line's target column is always positive. The
+  // remaining lines' target columns are relative to the first line to allow
+  // correct indentation of comments in \c WhitespaceManager. Thus they can be
+  // negative as well (in case the first line needs to be unindented more than
+  // there's actual whitespace in another line).
+  SmallVector<int, 16> ContentColumn;
+
+  // The intended start column of the first line of text from this section.
+  unsigned StartColumn;
+
+  // The original start column of the first line of text from this section.
+  unsigned OriginalStartColumn;
+
+  // Whether the first token of this section is the first token in its unwrapped
+  // line.
+  bool FirstInLine;
+
+  // In case of line comments, holds the original prefix, including trailing
+  // whitespace.
+  SmallVector<StringRef, 16> OriginalPrefix;
+
+  // The prefix to use in front a line that has been reflown up.
+  // For example, when reflowing the second line after the first here:
+  // // comment 1
+  // // comment 2
+  // we expect:
+  // // comment 1 comment 2
+  // and not:
+  // // comment 1comment 2
+  StringRef ReflowPrefix = " ";
 };
 
-class BreakableBlockComment : public BreakableToken {
+class BreakableBlockComment : public BreakableComment {
 public:
-  /// \brief Creates a breakable token for a block comment.
-  ///
-  /// \p StartColumn specifies the column in which the comment will start
-  /// after formatting, while \p OriginalStartColumn specifies in which
-  /// column the comment started before formatting.
-  /// If the comment starts a line after formatting, set \p FirstInLine to true.
   BreakableBlockComment(const FormatToken &Token, unsigned IndentLevel,
-                        unsigned StartColumn, unsigned OriginaStartColumn,
+                        unsigned StartColumn, unsigned OriginalStartColumn,
                         bool FirstInLine, bool InPPDirective,
                         encoding::Encoding Encoding, const FormatStyle &Style);
 
-  unsigned getLineCount() const override;
-  unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
+  unsigned getLineLengthAfterSplit(unsigned LineIndex,
+                                   unsigned TailOffset,
                                    StringRef::size_type Length) const override;
-  Split getSplit(unsigned LineIndex, unsigned TailOffset,
-                 unsigned ColumnLimit) const override;
   void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
                    WhitespaceManager &Whitespaces) override;
-  void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
-                         WhitespaceManager &Whitespaces) override;
-  void replaceWhitespaceBefore(unsigned LineIndex,
+  Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn,
+                       unsigned ColumnLimit) const override;
+  unsigned getLineLengthAfterSplitBefore(unsigned LineIndex,
+                                         unsigned TailOffset,
+                                         unsigned PreviousEndColumn,
+                                         unsigned ColumnLimit,
+                                         Split SplitBefore) const override;
+  void replaceWhitespaceBefore(unsigned LineIndex, unsigned PreviousEndColumn,
+                               unsigned ColumnLimit,
+                               Split SplitBefore,
                                WhitespaceManager &Whitespaces) override;
 
 private:
-  // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex],
-  // so that all whitespace between the lines is accounted to Lines[LineIndex]
-  // as leading whitespace:
-  // - Lines[LineIndex] points to the text after that whitespace
-  // - Lines[LineIndex-1] shrinks by its trailing whitespace
-  // - LeadingWhitespace[LineIndex] is updated with the complete whitespace
-  //   between the end of the text of Lines[LineIndex-1] and Lines[LineIndex]
+  // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex].
+  //
+  // Updates Content[LineIndex-1] and Content[LineIndex] by stripping off
+  // leading and trailing whitespace.
   //
-  // Sets StartOfLineColumn to the intended column in which the text at
+  // Sets ContentColumn to the intended column in which the text at
   // Lines[LineIndex] starts (note that the decoration, if present, is not
   // considered part of the text).
   void adjustWhitespace(unsigned LineIndex, int IndentDelta);
 
-  // Returns the column at which the text in line LineIndex starts, when broken
-  // at TailOffset. Note that the decoration (if present) is not considered part
-  // of the text.
-  unsigned getContentStartColumn(unsigned LineIndex, unsigned TailOffset) const;
-
-  // Contains the text of the lines of the block comment, excluding the leading
-  // /* in the first line and trailing */ in the last line, and excluding all
-  // trailing whitespace between the lines. Note that the decoration (if
-  // present) is also not considered part of the text.
-  SmallVector<StringRef, 16> Lines;
+  // Computes the end column if the full Content from LineIndex gets reflown
+  // after PreviousEndColumn.
+  unsigned getReflownColumn(StringRef Content,
+                            unsigned LineIndex,
+                            unsigned PreviousEndColumn) const;
 
-  // LeadingWhitespace[i] is the number of characters regarded as whitespace in
-  // front of Lines[i]. Note that this can include "* " sequences, which we
-  // regard as whitespace when all lines have a "*" prefix.
-  SmallVector<unsigned, 16> LeadingWhitespace;
-
-  // StartOfLineColumn[i] is the target column at which Line[i] should be.
-  // Note that this excludes a leading "* " or "*" in case all lines have
-  // a "*" prefix.
-  // The first line's target column is always positive. The remaining lines'
-  // target columns are relative to the first line to allow correct indentation
-  // of comments in \c WhitespaceManager. Thus they can be negative as well (in
-  // case the first line needs to be unindented more than there's actual
-  // whitespace in another line).
-  SmallVector<int, 16> StartOfLineColumn;
+  unsigned getContentStartColumn(unsigned LineIndex,
+                                 unsigned TailOffset) const override;
 
   // The column at which the text of a broken line should start.
   // Note that an optional decoration would go before that column.
@@ -239,6 +361,53 @@ private:
   StringRef Decoration;
 };
 
+class BreakableLineCommentSection : public BreakableComment {
+public:
+  BreakableLineCommentSection(const FormatToken &Token, unsigned IndentLevel,
+                              unsigned StartColumn,
+                              unsigned OriginalStartColumn, bool FirstInLine,
+                              bool InPPDirective, encoding::Encoding Encoding,
+                              const FormatStyle &Style);
+
+  unsigned getLineLengthAfterSplit(unsigned LineIndex,
+                                   unsigned TailOffset,
+                                   StringRef::size_type Length) const override;
+  void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
+                   WhitespaceManager &Whitespaces) override;
+  Split getSplitBefore(unsigned LineIndex, unsigned PreviousEndColumn,
+                       unsigned ColumnLimit) const override;
+  unsigned getLineLengthAfterSplitBefore(unsigned LineIndex, unsigned TailOffset,
+                                         unsigned PreviousEndColumn,
+                                         unsigned ColumnLimit,
+                                         Split SplitBefore) const override;
+  void replaceWhitespaceBefore(unsigned LineIndex, unsigned PreviousEndColumn,
+                               unsigned ColumnLimit, Split SplitBefore,
+                               WhitespaceManager &Whitespaces) override;
+  void updateNextToken(LineState& State) const override;
+
+private:
+  unsigned getContentStartColumn(unsigned LineIndex,
+                                 unsigned TailOffset) const override;
+
+  // Prefix[i] contains the intended leading "//" with trailing spaces to
+  // account for the indentation of content within the comment at line i after
+  // formatting. It can be different than the original prefix when the original
+  // line starts like this:
+  // //content
+  // Then the original prefix is "//", but the prefix is "// ".
+  SmallVector<StringRef, 16> Prefix;
+
+  SmallVector<unsigned, 16> OriginalContentColumn;
+
+  /// \brief The token to which the last line of this breakable token belongs
+  /// to; nullptr if that token is the initial token.
+  ///
+  /// The distinction is because if the token of the last line of this breakable
+  /// token is distinct from the initial token, this breakable token owns the
+  /// whitespace before the token of the last line, and the whitespace manager
+  /// must be able to modify it.
+  FormatToken *LastLineTok = nullptr;
+};
 } // namespace format
 } // namespace clang
 

Modified: cfe/trunk/lib/Format/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/CMakeLists.txt?rev=293055&r1=293054&r2=293055&view=diff
==============================================================================
--- cfe/trunk/lib/Format/CMakeLists.txt (original)
+++ cfe/trunk/lib/Format/CMakeLists.txt Wed Jan 25 07:58:58 2017
@@ -3,7 +3,6 @@ set(LLVM_LINK_COMPONENTS support)
 add_clang_library(clangFormat
   AffectedRangeManager.cpp
   BreakableToken.cpp
-  Comments.cpp
   ContinuationIndenter.cpp
   Format.cpp
   FormatToken.cpp

Removed: cfe/trunk/lib/Format/Comments.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Comments.cpp?rev=293054&view=auto
==============================================================================
--- cfe/trunk/lib/Format/Comments.cpp (original)
+++ cfe/trunk/lib/Format/Comments.cpp (removed)
@@ -1,36 +0,0 @@
-//===--- Comments.cpp - Comment Manipulation  -------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// \brief Implements comment manipulation.
-///
-//===----------------------------------------------------------------------===//
-
-#include "Comments.h"
-
-namespace clang {
-namespace format {
-
-StringRef getLineCommentIndentPrefix(StringRef Comment) {
-  static const char *const KnownPrefixes[] = {"///", "//", "//!"};
-  StringRef LongestPrefix;
-  for (StringRef KnownPrefix : KnownPrefixes) {
-    if (Comment.startswith(KnownPrefix)) {
-      size_t PrefixLength = KnownPrefix.size();
-      while (PrefixLength < Comment.size() && Comment[PrefixLength] == ' ')
-        ++PrefixLength;
-      if (PrefixLength > LongestPrefix.size())
-        LongestPrefix = Comment.substr(0, PrefixLength);
-    }
-  }
-  return LongestPrefix;
-}
-
-} // namespace format
-} // namespace clang

Removed: cfe/trunk/lib/Format/Comments.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Comments.h?rev=293054&view=auto
==============================================================================
--- cfe/trunk/lib/Format/Comments.h (original)
+++ cfe/trunk/lib/Format/Comments.h (removed)
@@ -1,33 +0,0 @@
-//===--- Comments.cpp - Comment manipulation  -----------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// \brief Declares comment manipulation functionality.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CLANG_LIB_FORMAT_COMMENTS_H
-#define LLVM_CLANG_LIB_FORMAT_COMMENTS_H
-
-#include "clang/Basic/LLVM.h"
-#include "llvm/ADT/StringRef.h"
-
-namespace clang {
-namespace format {
-
-/// \brief Returns the comment prefix of the line comment \p Comment.
-///
-/// The comment prefix consists of a leading known prefix, like "//" or "///",
-/// together with the following whitespace.
-StringRef getLineCommentIndentPrefix(StringRef Comment);
-
-} // namespace format
-} // namespace clang
-
-#endif

Modified: cfe/trunk/lib/Format/ContinuationIndenter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/ContinuationIndenter.cpp?rev=293055&r1=293054&r2=293055&view=diff
==============================================================================
--- cfe/trunk/lib/Format/ContinuationIndenter.cpp (original)
+++ cfe/trunk/lib/Format/ContinuationIndenter.cpp Wed Jan 25 07:58:58 2017
@@ -20,7 +20,7 @@
 #include "clang/Format/Format.h"
 #include "llvm/Support/Debug.h"
 
-#define DEBUG_TYPE "format-formatter"
+#define DEBUG_TYPE "format-indenter"
 
 namespace clang {
 namespace format {
@@ -1154,7 +1154,11 @@ unsigned ContinuationIndenter::breakProt
     }
   } else if (Current.is(TT_BlockComment)) {
     if (!Current.isTrailingComment() || !Style.ReflowComments ||
-        CommentPragmasRegex.match(Current.TokenText.substr(2)))
+        CommentPragmasRegex.match(Current.TokenText.substr(2)) ||
+        // If a comment token switches formatting, like
+        // /* clang-format on */, we don't want to break it further,
+        // but we may still want to adjust its indentation.
+        switchesFormatting(Current))
       return addMultilineToken(Current, State);
     Token.reset(new BreakableBlockComment(
         Current, State.Line->Level, StartColumn, Current.OriginalColumn,
@@ -1163,11 +1167,13 @@ unsigned ContinuationIndenter::breakProt
              (Current.Previous == nullptr ||
               Current.Previous->isNot(TT_ImplicitStringLiteral))) {
     if (!Style.ReflowComments ||
-        CommentPragmasRegex.match(Current.TokenText.substr(2)))
+        CommentPragmasRegex.match(Current.TokenText.substr(2)) ||
+        switchesFormatting(Current))
       return 0;
-    Token.reset(new BreakableLineComment(Current, State.Line->Level,
-                                         StartColumn, /*InPPDirective=*/false,
-                                         Encoding, Style));
+    Token.reset(new BreakableLineCommentSection(
+        Current, State.Line->Level, StartColumn, Current.OriginalColumn,
+        !Current.Previous,
+        /*InPPDirective=*/false, Encoding, Style));
     // We don't insert backslashes when breaking line comments.
     ColumnLimit = Style.ColumnLimit;
   } else {
@@ -1178,15 +1184,27 @@ unsigned ContinuationIndenter::breakProt
 
   unsigned RemainingSpace = ColumnLimit - Current.UnbreakableTailLength;
   bool BreakInserted = false;
+  // We use a conservative reflowing strategy. Reflow starts after a line is
+  // broken or the corresponding whitespace compressed. Reflow ends as soon as a
+  // line that doesn't get reflown with the previous line is reached.
+  bool ReflowInProgress = false;
   unsigned Penalty = 0;
   unsigned RemainingTokenColumns = 0;
   for (unsigned LineIndex = 0, EndIndex = Token->getLineCount();
        LineIndex != EndIndex; ++LineIndex) {
+    BreakableToken::Split SplitBefore(StringRef::npos, 0);
+    if (ReflowInProgress) {
+      SplitBefore = Token->getSplitBefore(LineIndex, RemainingTokenColumns,
+                                          RemainingSpace);
+    }
+    ReflowInProgress = SplitBefore.first != StringRef::npos;
+    unsigned TailOffset =
+        ReflowInProgress ? (SplitBefore.first + SplitBefore.second) : 0;
     if (!DryRun)
-      Token->replaceWhitespaceBefore(LineIndex, Whitespaces);
-    unsigned TailOffset = 0;
-    RemainingTokenColumns =
-        Token->getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos);
+      Token->replaceWhitespaceBefore(LineIndex, RemainingTokenColumns,
+                                     RemainingSpace, SplitBefore, Whitespaces);
+    RemainingTokenColumns = Token->getLineLengthAfterSplitBefore(
+        LineIndex, TailOffset, RemainingTokenColumns, ColumnLimit, SplitBefore);
     while (RemainingTokenColumns > RemainingSpace) {
       BreakableToken::Split Split =
           Token->getSplit(LineIndex, TailOffset, ColumnLimit);
@@ -1198,17 +1216,23 @@ unsigned ContinuationIndenter::breakProt
         break;
       }
       assert(Split.first != 0);
-      unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit(
-          LineIndex, TailOffset + Split.first + Split.second, StringRef::npos);
 
-      // We can remove extra whitespace instead of breaking the line.
-      if (RemainingTokenColumns + 1 - Split.second <= RemainingSpace) {
-        RemainingTokenColumns = 0;
+      // Check if compressing the whitespace range will bring the line length
+      // under the limit. If that is the case, we perform whitespace compression
+      // instead of inserting a line break.
+      unsigned RemainingTokenColumnsAfterCompression =
+          Token->getLineLengthAfterCompression(RemainingTokenColumns, Split);
+      if (RemainingTokenColumnsAfterCompression <= RemainingSpace) {
+        RemainingTokenColumns = RemainingTokenColumnsAfterCompression;
+        ReflowInProgress = true;
         if (!DryRun)
-          Token->replaceWhitespace(LineIndex, TailOffset, Split, Whitespaces);
+          Token->compressWhitespace(LineIndex, TailOffset, Split, Whitespaces);
         break;
       }
 
+      unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit(
+          LineIndex, TailOffset + Split.first + Split.second, StringRef::npos);
+
       // When breaking before a tab character, it may be moved by a few columns,
       // but will still be expanded to the next tab stop, so we don't save any
       // columns.
@@ -1226,6 +1250,7 @@ unsigned ContinuationIndenter::breakProt
       }
       TailOffset += Split.first + Split.second;
       RemainingTokenColumns = NewRemainingTokenColumns;
+      ReflowInProgress = true;
       BreakInserted = true;
     }
   }
@@ -1246,6 +1271,9 @@ unsigned ContinuationIndenter::breakProt
 
     State.Stack.back().LastSpace = StartColumn;
   }
+
+  Token->updateNextToken(State);
+
   return Penalty;
 }
 

Modified: cfe/trunk/lib/Format/TokenAnnotator.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/TokenAnnotator.cpp?rev=293055&r1=293054&r2=293055&view=diff
==============================================================================
--- cfe/trunk/lib/Format/TokenAnnotator.cpp (original)
+++ cfe/trunk/lib/Format/TokenAnnotator.cpp Wed Jan 25 07:58:58 2017
@@ -1594,8 +1594,14 @@ void TokenAnnotator::setCommentLineLevel
   for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(),
                                                           E = Lines.rend();
        I != E; ++I) {
-    if (NextNonCommentLine && (*I)->First->is(tok::comment) &&
-        (*I)->First->Next == nullptr)
+    bool CommentLine = (*I)->First;
+    for (const FormatToken *Tok = (*I)->First; Tok; Tok = Tok->Next) {
+      if (!Tok->is(tok::comment)) {
+        CommentLine = false;
+        break;
+      }
+    }
+    if (NextNonCommentLine && CommentLine)
       (*I)->Level = NextNonCommentLine->Level;
     else
       NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : nullptr;

Modified: cfe/trunk/lib/Format/UnwrappedLineParser.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.cpp?rev=293055&r1=293054&r2=293055&view=diff
==============================================================================
--- cfe/trunk/lib/Format/UnwrappedLineParser.cpp (original)
+++ cfe/trunk/lib/Format/UnwrappedLineParser.cpp Wed Jan 25 07:58:58 2017
@@ -1998,7 +1998,9 @@ LLVM_ATTRIBUTE_UNUSED static void printD
   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
                                                     E = Line.Tokens.end();
        I != E; ++I) {
-    llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] ";
+    llvm::dbgs() << I->Tok->Tok.getName() << "["
+                 << "T=" << I->Tok->Type
+                 << ", OC=" << I->Tok->OriginalColumn << "] ";
   }
   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
                                                     E = Line.Tokens.end();
@@ -2038,13 +2040,60 @@ bool UnwrappedLineParser::isOnNewLine(co
          FormatTok.NewlinesBefore > 0;
 }
 
+static bool isLineComment(const FormatToken &FormatTok) {
+  return FormatTok.is(tok::comment) &&
+         FormatTok.TokenText.startswith("//");
+}
+
+// Checks if \p FormatTok is a line comment that continues the line comment
+// section on \p Line.
+static bool continuesLineComment(const FormatToken &FormatTok,
+                                 const UnwrappedLine &Line) {
+  if (Line.Tokens.empty())
+    return false;
+  const FormatToken &FirstLineTok = *Line.Tokens.front().Tok;
+  // If Line starts with a line comment, then FormatTok continues the comment
+  // section if its original column is greater or equal to the  original start
+  // column of the line.
+  //
+  // If Line starts with a a different token, then FormatTok continues the
+  // comment section if its original column greater than the original start
+  // column of the line.
+  //
+  // For example, the second line comment continues the first in these cases:
+  // // first line
+  // // second line
+  // and:
+  // // first line
+  //  // second line
+  // and:
+  // int i; // first line
+  //  // second line
+  //
+  // The second line comment doesn't continue the first in these cases:
+  //   // first line
+  //  // second line
+  // and:
+  // int i; // first line
+  // // second line
+  unsigned MinContinueColumn =
+      FirstLineTok.OriginalColumn +
+      ((isLineComment(FirstLineTok) && !FirstLineTok.Next) ? 0 : 1);
+  return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
+         isLineComment(*(Line.Tokens.back().Tok)) &&
+         FormatTok.OriginalColumn >= MinContinueColumn;
+}
+
 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
   bool JustComments = Line->Tokens.empty();
   for (SmallVectorImpl<FormatToken *>::const_iterator
            I = CommentsBeforeNextToken.begin(),
            E = CommentsBeforeNextToken.end();
        I != E; ++I) {
-    if (isOnNewLine(**I) && JustComments)
+    // Line comments that belong to the same line comment section are put on the
+    // same line since later we might want to reflow content between them.
+    // See BreakableToken.
+    if (isOnNewLine(**I) && JustComments && !continuesLineComment(**I, *Line))
       addUnwrappedLine();
     pushToken(*I);
   }
@@ -2110,7 +2159,8 @@ void UnwrappedLineParser::readToken() {
 
     if (!FormatTok->Tok.is(tok::comment))
       return;
-    if (isOnNewLine(*FormatTok) || FormatTok->IsFirst) {
+    if (!continuesLineComment(*FormatTok, *Line) &&
+        (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
       CommentsInCurrentLine = false;
     }
     if (CommentsInCurrentLine) {

Modified: cfe/trunk/lib/Format/WhitespaceManager.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/WhitespaceManager.cpp?rev=293055&r1=293054&r2=293055&view=diff
==============================================================================
--- cfe/trunk/lib/Format/WhitespaceManager.cpp (original)
+++ cfe/trunk/lib/Format/WhitespaceManager.cpp Wed Jan 25 07:58:58 2017
@@ -127,7 +127,38 @@ void WhitespaceManager::calculateLineBre
     Changes[i - 1].IsTrailingComment =
         (Changes[i].NewlinesBefore > 0 || Changes[i].Kind == tok::eof ||
          (Changes[i].IsInsideToken && Changes[i].Kind == tok::comment)) &&
-        Changes[i - 1].Kind == tok::comment;
+        Changes[i - 1].Kind == tok::comment &&
+        // FIXME: This is a dirty hack. The problem is that
+        // BreakableLineCommentSection does comment reflow changes and here is
+        // the aligning of trailing comments. Consider the case where we reflow
+        // the second line up in this example:
+        // 
+        // // line 1
+        // // line 2
+        // 
+        // That amounts to 2 changes by BreakableLineCommentSection:
+        //  - the first, delimited by (), for the whitespace between the tokens,
+        //  - and second, delimited by [], for the whitespace at the beginning
+        //  of the second token:
+        // 
+        // // line 1(
+        // )[// ]line 2
+        //
+        // So in the end we have two changes like this:
+        //
+        // // line1()[ ]line 2
+        //
+        // Note that the OriginalWhitespaceStart of the second change is the
+        // same as the PreviousOriginalWhitespaceEnd of the first change.
+        // In this case, the below check ensures that the second change doesn't
+        // get treated as a trailing comment change here, since this might
+        // trigger additional whitespace to be wrongly inserted before "line 2"
+        // by the comment aligner here.
+        //
+        // For a proper solution we need a mechanism to say to WhitespaceManager
+        // that a particular change breaks the current sequence of trailing
+        // comments.
+        OriginalWhitespaceStart != PreviousOriginalWhitespaceEnd;
   }
   // FIXME: The last token is currently not always an eof token; in those
   // cases, setting TokenLength of the last token to 0 is wrong.

Modified: cfe/trunk/unittests/Format/FormatTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=293055&r1=293054&r2=293055&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTest.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTest.cpp Wed Jan 25 07:58:58 2017
@@ -1783,6 +1783,463 @@ TEST_F(FormatTest, CommentsInStaticIniti
                "    0x00, 0x00, 0x00, 0x00};            // comment\n");
 }
 
+TEST_F(FormatTest, ReflowsComments) {
+  // Break a long line and reflow with the full next line.
+  EXPECT_EQ("// long long long\n"
+            "// long long",
+            format("// long long long long\n"
+                   "// long",
+                   getLLVMStyleWithColumns(20)));
+
+  // Keep the trailing newline while reflowing.
+  EXPECT_EQ("// long long long\n"
+            "// long long\n",
+            format("// long long long long\n"
+                   "// long\n",
+                   getLLVMStyleWithColumns(20)));
+
+  // Break a long line and reflow with a part of the next line.
+  EXPECT_EQ("// long long long\n"
+            "// long long\n"
+            "// long_long",
+            format("// long long long long\n"
+                   "// long long_long",
+                   getLLVMStyleWithColumns(20)));
+
+  // Break but do not reflow if the first word from the next line is too long.
+  EXPECT_EQ("// long long long\n"
+            "// long\n"
+            "// long_long_long\n",
+            format("// long long long long\n"
+                   "// long_long_long\n",
+                   getLLVMStyleWithColumns(20)));
+
+  // Don't break or reflow short lines.
+  verifyFormat("// long\n"
+               "// long long long lo\n"
+               "// long long long lo\n"
+               "// long",
+               getLLVMStyleWithColumns(20));
+
+  // Keep prefixes and decorations while reflowing.
+  EXPECT_EQ("/// long long long\n"
+            "/// long long\n",
+            format("/// long long long long\n"
+                   "/// long\n",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("//! long long long\n"
+            "//! long long\n",
+            format("//! long long long long\n"
+                   "//! long\n",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/* long long long\n"
+            " * long long */",
+            format("/* long long long long\n"
+                   " * long */",
+                   getLLVMStyleWithColumns(20)));
+
+  // Don't bring leading whitespace up while reflowing.
+  EXPECT_EQ("/*  long long long\n"
+            " * long long long\n"
+            " */",
+            format("/*  long long long long\n"
+                   " *  long long\n"
+                   " */",
+                   getLLVMStyleWithColumns(20)));
+
+  // Reflow the last line of a block comment with its trailing '*/'.
+  EXPECT_EQ("/* long long long\n"
+            "   long long */",
+            format("/* long long long long\n"
+                   "   long */",
+                   getLLVMStyleWithColumns(20)));
+
+  // Reflow two short lines; keep the postfix of the last one.
+  EXPECT_EQ("/* long long long\n"
+            " * long long long */",
+            format("/* long long long long\n"
+                   " * long\n"
+                   " * long */",
+                   getLLVMStyleWithColumns(20)));
+
+  // Put the postfix of the last short reflow line on a newline if it doesn't
+  // fit.
+  EXPECT_EQ("/* long long long\n"
+            " * long long longg\n"
+            " */",
+            format("/* long long long long\n"
+                   " * long\n"
+                   " * longg */",
+                   getLLVMStyleWithColumns(20)));
+
+  // Break single line block comments that are first in the line with ' *'
+  // decoration.
+  EXPECT_EQ("/* long long long\n"
+            " * long */",
+            format("/* long long long long */", getLLVMStyleWithColumns(20)));
+
+  // Break single line block comment that are not first in the line with '  '
+  // decoration.
+  EXPECT_EQ("int i; /* long long\n"
+            "          long */",
+            format("int i; /* long long long */", getLLVMStyleWithColumns(20)));
+
+  // Reflow a line that goes just over the column limit.
+  EXPECT_EQ("// long long long\n"
+            "// lon long",
+            format("// long long long lon\n"
+                   "// long",
+                   getLLVMStyleWithColumns(20)));
+
+  // Stop reflowing if the next line has a different indentation than the
+  // previous line.
+  EXPECT_EQ("// long long long\n"
+            "// long\n"
+            "//  long long\n"
+            "//  long",
+            format("// long long long long\n"
+                   "//  long long\n"
+                   "//  long",
+                   getLLVMStyleWithColumns(20)));
+
+  // Reflow into the last part of a really long line that has been broken into
+  // multiple lines.
+  EXPECT_EQ("// long long long\n"
+            "// long long long\n"
+            "// long long long\n",
+            format("// long long long long long long long long\n"
+                   "// long\n",
+                   getLLVMStyleWithColumns(20)));
+
+  // Break the first line, then reflow the beginning of the second and third
+  // line up.
+  EXPECT_EQ("// long long long\n"
+            "// lon1 lon2 lon2\n"
+            "// lon2 lon3 lon3",
+            format("// long long long lon1\n"
+                   "// lon2 lon2 lon2\n"
+                   "// lon3 lon3",
+                   getLLVMStyleWithColumns(20)));
+
+  // Reflow the beginning of the second line, then break the rest.
+  EXPECT_EQ("// long long long\n"
+            "// lon1 lon2 lon2\n"
+            "// lon2 lon2 lon2\n"
+            "// lon3",
+            format("// long long long lon1\n"
+                   "// lon2 lon2 lon2 lon2 lon2 lon3",
+                   getLLVMStyleWithColumns(20)));
+
+  // Shrink the first line, then reflow the second line up.
+  EXPECT_EQ("// long long long", format("// long              long\n"
+                                        "// long",
+                                        getLLVMStyleWithColumns(20)));
+
+  // Don't shrink leading whitespace.
+  EXPECT_EQ("int i; ///           a",
+            format("int i; ///           a", getLLVMStyleWithColumns(20)));
+
+  // Shrink trailing whitespace if there is no postfix and reflow.
+  EXPECT_EQ("// long long long\n"
+            "// long long",
+            format("// long long long long    \n"
+                   "// long",
+                   getLLVMStyleWithColumns(20)));
+
+  // Shrink trailing whitespace to a single one if there is postfix.
+  EXPECT_EQ("/* long long long */",
+            format("/* long long long     */", getLLVMStyleWithColumns(20)));
+
+  // Break a block comment postfix if exceeding the line limit.
+  EXPECT_EQ("/*               long\n"
+            " */",
+            format("/*               long */", getLLVMStyleWithColumns(20)));
+
+  // Reflow indented comments.
+  EXPECT_EQ("{\n"
+            "  // long long long\n"
+            "  // long long\n"
+            "  int i; /* long lon\n"
+            "            g long\n"
+            "          */\n"
+            "}",
+            format("{\n"
+                   "  // long long long long\n"
+                   "  // long\n"
+                   "  int i; /* long lon g\n"
+                   "            long */\n"
+                   "}",
+                   getLLVMStyleWithColumns(20)));
+
+  // Don't realign trailing comments after reflow has happened.
+  EXPECT_EQ("// long long long\n"
+            "// long long\n"
+            "long i; // long",
+            format("// long long long long\n"
+                   "// long\n"
+                   "long i; // long",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("// long long long\n"
+            "// longng long long\n"
+            "// long lo",
+            format("// long long long longng\n"
+                   "// long long long\n"
+                   "// lo",
+                   getLLVMStyleWithColumns(20)));
+
+  // Reflow lines after a broken line.
+  EXPECT_EQ("int a; // Trailing\n"
+            "       // comment on\n"
+            "       // 2 or 3\n"
+            "       // lines.\n",
+            format("int a; // Trailing comment\n"
+                   "       // on 2\n"
+                   "       // or 3\n"
+                   "       // lines.\n",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/// This long line\n"
+            "/// gets reflown.\n",
+            format("/// This long line gets\n"
+                   "/// reflown.\n",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("//! This long line\n"
+            "//! gets reflown.\n",
+            format(" //! This long line gets\n"
+                   " //! reflown.\n",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/* This long line\n"
+            " * gets reflown.\n"
+            " */\n",
+            format("/* This long line gets\n"
+                   " * reflown.\n"
+                   " */\n",
+                   getLLVMStyleWithColumns(20)));
+
+  // Reflow after indentation makes a line too long.
+  EXPECT_EQ("{\n"
+            "  // long long long\n"
+            "  // lo long\n"
+            "}\n",
+            format("{\n"
+                   "// long long long lo\n"
+                   "// long\n"
+                   "}\n",
+                   getLLVMStyleWithColumns(20)));
+
+  // Break and reflow multiple lines.
+  EXPECT_EQ("/*\n"
+            " * Reflow the end of\n"
+            " * line by 11 22 33\n"
+            " * 4.\n"
+            " */\n",
+            format("/*\n"
+                   " * Reflow the end of line\n"
+                   " * by\n"
+                   " * 11\n"
+                   " * 22\n"
+                   " * 33\n"
+                   " * 4.\n"
+                   " */\n",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/// First line gets\n"
+            "/// broken. Second\n"
+            "/// line gets\n"
+            "/// reflown and\n"
+            "/// broken. Third\n"
+            "/// gets reflown.\n",
+            format("/// First line gets broken.\n"
+                   "/// Second line gets reflown and broken.\n"
+                   "/// Third gets reflown.\n",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("int i; // first long\n"
+            "       // long snd\n"
+            "       // long.\n",
+            format("int i; // first long long\n"
+                   "       // snd long.\n",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("{\n"
+            "  // first long line\n"
+            "  // line second\n"
+            "  // long line line\n"
+            "  // third long line\n"
+            "  // line\n"
+            "}\n",
+            format("{\n"
+                   "  // first long line line\n"
+                   "  // second long line line\n"
+                   "  // third long line line\n"
+                   "}\n",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("int i; /* first line\n"
+            "        * second\n"
+            "        * line third\n"
+            "        * line\n"
+            "        */",
+            format("int i; /* first line\n"
+                   "        * second line\n"
+                   "        * third line\n"
+                   "        */",
+                   getLLVMStyleWithColumns(20)));
+
+  // Reflow the last two lines of a section that starts with a line having
+  // different indentation.
+  EXPECT_EQ(
+      "//     long\n"
+      "// long long long\n"
+      "// long long",
+      format("//     long\n"
+             "// long long long long\n"
+             "// long",
+             getLLVMStyleWithColumns(20)));
+
+  // Keep the block comment endling '*/' while reflowing.
+  EXPECT_EQ("/* Long long long\n"
+            " * line short */\n",
+            format("/* Long long long line\n"
+                   " * short */\n",
+                   getLLVMStyleWithColumns(20)));
+
+  // Don't reflow between separate blocks of comments.
+  EXPECT_EQ("/* First comment\n"
+            " * block will */\n"
+            "/* Snd\n"
+            " */\n",
+            format("/* First comment block\n"
+                   " * will */\n"
+                   "/* Snd\n"
+                   " */\n",
+                   getLLVMStyleWithColumns(20)));
+
+  // Don't reflow across blank comment lines.
+  EXPECT_EQ("int i; // This long\n"
+            "       // line gets\n"
+            "       // broken.\n"
+            "       //  \n"
+            "       // keep.\n",
+            format("int i; // This long line gets broken.\n"
+                   "       //  \n"
+                   "       // keep.\n",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("{\n"
+            "  /// long long long\n"
+            "  /// long long\n"
+            "  ///\n"
+            "  /// long\n"
+            "}",
+            format("{\n"
+                   "  /// long long long long\n"
+                   "  /// long\n"
+                   "  ///\n"
+                   "  /// long\n"
+                   "}",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("//! long long long\n"
+            "//! long\n"
+            "\n"
+            "//! long",
+            format("//! long long long long\n"
+                   "\n"
+                   "//! long",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/* long long long\n"
+            "   long\n"
+            "\n"
+            "   long */",
+            format("/* long long long long\n"
+                   "\n"
+                   "   long */",
+                   getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/* long long long\n"
+            " * long\n"
+            " *\n"
+            " * long */",
+            format("/* long long long long\n"
+                   " *\n"
+                   " * long */",
+                   getLLVMStyleWithColumns(20)));
+  
+  // Don't reflow lines having content that is a single character.
+  EXPECT_EQ("// long long long\n"
+            "// long\n"
+            "// l",
+            format("// long long long long\n"
+                   "// l",
+                   getLLVMStyleWithColumns(20)));
+
+  // Don't reflow lines starting with two punctuation characters.
+  EXPECT_EQ("// long long long\n"
+            "// long\n"
+            "// ... --- ...",
+            format(
+                "// long long long long\n"
+                "// ... --- ...",
+                getLLVMStyleWithColumns(20)));
+  // Reflow lines that have a non-punctuation character among their first 2
+  // characters.
+  EXPECT_EQ("// long long long\n"
+            "// long 'long'",
+            format(
+                "// long long long long\n"
+                "// 'long'",
+                getLLVMStyleWithColumns(20)));
+
+  // Don't reflow between separate blocks of comments.
+  EXPECT_EQ("/* First comment\n"
+            " * block will */\n"
+            "/* Snd\n"
+            " */\n",
+            format("/* First comment block\n"
+                   " * will */\n"
+                   "/* Snd\n"
+                   " */\n",
+                   getLLVMStyleWithColumns(20)));
+
+  // Don't reflow lines having different indentation.
+  EXPECT_EQ("// long long long\n"
+            "// long\n"
+            "//  long",
+            format("// long long long long\n"
+                   "//  long",
+                   getLLVMStyleWithColumns(20)));
+
+  // Don't break or reflow after implicit string literals.
+  verifyFormat("#include <t> // l l l\n"
+               "             // l",
+               getLLVMStyleWithColumns(20));
+
+  // Don't break or reflow comments on import lines.
+  EXPECT_EQ("#include \"t\" /* l l l\n"
+            "                * l */",
+            format("#include \"t\" /* l l l\n"
+                   "                * l */",
+                   getLLVMStyleWithColumns(20)));
+
+  // Don't reflow between different trailing comment sections.
+  EXPECT_EQ("int i; // long long\n"
+            "       // long\n"
+            "int j; // long long\n"
+            "       // long\n",
+            format("int i; // long long long\n"
+                   "int j; // long long long\n",
+                   getLLVMStyleWithColumns(20)));
+
+  // Don't reflow if the first word on the next line is longer than the
+  // available space at current line.
+  EXPECT_EQ("int i; // trigger\n"
+            "       // reflow\n"
+            "       // longsec\n",
+            format("int i; // trigger reflow\n"
+                   "       // longsec\n",
+                   getLLVMStyleWithColumns(20)));
+
+  // Keep empty comment lines.
+  EXPECT_EQ("/**/", format(" /**/", getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/* */", format(" /* */", getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("/*  */", format(" /*  */", getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("//", format(" //  ", getLLVMStyleWithColumns(20)));
+  EXPECT_EQ("///", format(" ///  ", getLLVMStyleWithColumns(20)));
+}
+
 TEST_F(FormatTest, IgnoresIf0Contents) {
   EXPECT_EQ("#if 0\n"
             "}{)(&*(^%%#%@! fsadj f;ldjs ,:;| <<<>>>][)(][\n"
@@ -7238,11 +7695,9 @@ TEST_F(FormatTest, BlockCommentsAtEndOfL
             format("a = {1111 /*      */\n"
                    "};",
                    getLLVMStyleWithColumns(15)));
-
-  // FIXME: The formatting is still wrong here.
   EXPECT_EQ("a = {\n"
             "    1111 /*      a\n"
-            "            */\n"
+            "          */\n"
             "};",
             format("a = {1111 /*      a */\n"
                    "};",
@@ -10933,6 +11388,26 @@ TEST_F(FormatTest, DisableRegions) {
                    "  int j;\n"
                    " /* clang-format on */\n"
                    "   int   k;"));
+
+  // Don't reflow comments within disabled regions.
+  EXPECT_EQ(
+      "// clang-format off\n"
+      "// long long long long long long line\n"
+      "/* clang-format on */\n"
+      "/* long long long\n"
+      " * long long long\n"
+      " * line */\n"
+      "int i;\n"
+      "/* clang-format off */\n"
+      "/* long long long long long long line */\n",
+      format("// clang-format off\n"
+             "// long long long long long long line\n"
+             "/* clang-format on */\n"
+             "/* long long long long long long line */\n"
+             "int i;\n"
+             "/* clang-format off */\n"
+             "/* long long long long long long line */\n",
+             getLLVMStyleWithColumns(20)));
 }
 
 TEST_F(FormatTest, DoNotCrashOnInvalidInput) {

Modified: cfe/trunk/unittests/Format/FormatTestSelective.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTestSelective.cpp?rev=293055&r1=293054&r2=293055&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTestSelective.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTestSelective.cpp Wed Jan 25 07:58:58 2017
@@ -111,13 +111,19 @@ TEST_F(FormatTestSelective, FormatsComme
             format("int   a; // comment\n"
                    "int    b; // comment",
                    0, 0));
-  EXPECT_EQ("int   a; // comment\n"
-            "         // line 2\n"
+  EXPECT_EQ("int a; // comment\n"
+            "       // line 2\n"
             "int b;",
             format("int   a; // comment\n"
                    "            // line 2\n"
                    "int b;",
                    28, 0));
+  EXPECT_EQ("int   a; // comment\n"
+            "// comment 2\n"
+            "int b;",
+            format("int   a; // comment\n"
+                   "// comment 2\n"
+                   "int b;", 28, 0));
   EXPECT_EQ("int aaaaaa; // comment\n"
             "int b;\n"
             "int c; // unrelated comment",