r182738 - Major refactoring of BreakableToken.

Mon May 27 08:23:34 PDT 2013

Author: klimek
Date: Mon May 27 10:23:34 2013
New Revision: 182738

URL: http://llvm.org/viewvc/llvm-project?rev=182738&view=rev
Log:
Major refactoring of BreakableToken.

Unify handling of whitespace when breaking protruding tokens with other
whitespace replacements.

As a side effect, the BreakableToken structure changed significantly:
- have a common base class for single-line breakable tokens, as they are
  much more similar
- revamp handling of multi-line comments; we now calculate the
  information about lines in multi-line comments similar to normal
  tokens, and always issue replacements

As a result, we were able to get rid of special casing of trailing
whitespace deletion for comments in the whitespace manager and the
BreakableToken and fixed bugs related to tab handling and escaped
newlines.

Modified:
    cfe/trunk/lib/Format/BreakableToken.cpp
    cfe/trunk/lib/Format/BreakableToken.h
    cfe/trunk/lib/Format/Format.cpp
    cfe/trunk/lib/Format/UnwrappedLineParser.h
    cfe/trunk/lib/Format/WhitespaceManager.cpp
    cfe/trunk/lib/Format/WhitespaceManager.h
    cfe/trunk/unittests/Format/FormatTest.cpp

Modified: cfe/trunk/lib/Format/BreakableToken.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/BreakableToken.cpp?rev=182738&r1=182737&r2=182738&view=diff
==============================================================================

--- cfe/trunk/lib/Format/BreakableToken.cpp (original)
+++ cfe/trunk/lib/Format/BreakableToken.cpp Mon May 27 10:23:34 2013
@@ -13,27 +13,82 @@
 ///
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "format-token-breaker"
+
 #include "BreakableToken.h"
+#include "clang/Format/Format.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
 #include <algorithm>
 
 namespace clang {
 namespace format {
+namespace {
+
+// FIXME: Move helper string functions to where it makes sense.
+
+unsigned getOctalLength(StringRef Text) {
+  unsigned I = 1;
+  while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) {
+    ++I;
+  }
+  return I;
+}
+
+unsigned getHexLength(StringRef Text) {
+  unsigned I = 2; // Point after '\x'.
+  while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') ||
+                             (Text[I] >= 'a' && Text[I] <= 'f') ||
+                             (Text[I] >= 'A' && Text[I] <= 'F'))) {
+    ++I;
+  }
+  return I;
+}
 
-BreakableToken::Split BreakableComment::getSplit(unsigned LineIndex,
-                                                 unsigned TailOffset,
-                                                 unsigned ColumnLimit) const {
-  StringRef Text = getLine(LineIndex).substr(TailOffset);
-  unsigned ContentStartColumn = getContentStartColumn(LineIndex, TailOffset);
+unsigned getEscapeSequenceLength(StringRef Text) {
+  assert(Text[0] == '\\');
+  if (Text.size() < 2)
+    return 1;
+
+  switch (Text[1]) {
+  case 'u':
+    return 6;
+  case 'U':
+    return 10;
+  case 'x':
+    return getHexLength(Text);
+  default:
+    if (Text[1] >= '0' && Text[1] <= '7')
+      return getOctalLength(Text);
+    return 2;
+  }
+}
+
+StringRef::size_type getStartOfCharacter(StringRef Text,
+                                         StringRef::size_type Offset) {
+  StringRef::size_type NextEscape = Text.find('\\');
+  while (NextEscape != StringRef::npos && NextEscape < Offset) {
+    StringRef::size_type SequenceLength =
+        getEscapeSequenceLength(Text.substr(NextEscape));
+    if (Offset < NextEscape + SequenceLength)
+      return NextEscape;
+    NextEscape = Text.find('\\', NextEscape + SequenceLength);
+  }
+  return Offset;
+}
+
+BreakableToken::Split getCommentSplit(StringRef Text,
+                                      unsigned ContentStartColumn,
+                                      unsigned ColumnLimit) {
   if (ColumnLimit <= ContentStartColumn + 1)
-    return Split(StringRef::npos, 0);
+    return BreakableToken::Split(StringRef::npos, 0);
 
   unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;
   StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit);
   if (SpaceOffset == StringRef::npos ||
-      Text.find_last_not_of(' ', SpaceOffset) == StringRef::npos) {
+      // Don't break at leading whitespace.
+      Text.find_last_not_of(' ', SpaceOffset) == StringRef::npos)
     SpaceOffset = Text.find(' ', MaxSplit);
-  }
   if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {
     StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim();
     StringRef AfterCut = Text.substr(SpaceOffset).ltrim();
@@ -43,142 +98,247 @@ BreakableToken::Split BreakableComment::
   return BreakableToken::Split(StringRef::npos, 0);
 }
 
-void BreakableComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
-                                   Split Split, bool InPPDirective,
-                                   WhitespaceManager &Whitespaces) {
-  StringRef Text = getLine(LineIndex).substr(TailOffset);
-  StringRef AdditionalPrefix = Decoration;
-  if (Text.size() == Split.first + Split.second) {
-    // For all but the last line handle trailing space in trimLine.
-    if (LineIndex < Lines.size() - 1)
-      return;
-    // For the last line we need to break before "*/", but not to add "* ".
-    AdditionalPrefix = "";
-  }
+BreakableToken::Split getStringSplit(StringRef Text,
+                                     unsigned ContentStartColumn,
+                                     unsigned ColumnLimit) {
+
+  if (ColumnLimit <= ContentStartColumn)
+    return BreakableToken::Split(StringRef::npos, 0);
+  unsigned MaxSplit = ColumnLimit - ContentStartColumn;
+  // FIXME: Reduce unit test case.
+  if (Text.empty())
+    return BreakableToken::Split(StringRef::npos, 0);
+  MaxSplit = std::min<unsigned>(MaxSplit, Text.size() - 1);
+  StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit);
+  if (SpaceOffset != StringRef::npos && SpaceOffset != 0)
+    return BreakableToken::Split(SpaceOffset + 1, 0);
+  StringRef::size_type SlashOffset = Text.rfind('/', MaxSplit);
+  if (SlashOffset != StringRef::npos && SlashOffset != 0)
+    return BreakableToken::Split(SlashOffset + 1, 0);
+  StringRef::size_type SplitPoint = getStartOfCharacter(Text, MaxSplit);
+  if (SplitPoint == StringRef::npos || SplitPoint == 0)
+    return BreakableToken::Split(StringRef::npos, 0);
+  return BreakableToken::Split(SplitPoint, 0);
+}
 
-  unsigned BreakOffset = Text.data() - TokenText.data() + Split.first;
-  unsigned CharsToRemove = Split.second;
-  Whitespaces.breakToken(Tok, BreakOffset, CharsToRemove, "", AdditionalPrefix,
-                         InPPDirective, IndentAtLineBreak);
+} // namespace
+
+unsigned BreakableSingleLineToken::getLineCount() const { return 1; }
+
+unsigned
+BreakableSingleLineToken::getLineLengthAfterSplit(unsigned LineIndex,
+                                                  unsigned TailOffset) const {
+  return StartColumn + Prefix.size() + Postfix.size() + Line.size() -
+         TailOffset;
 }
 
-BreakableBlockComment::BreakableBlockComment(const SourceManager &SourceMgr,
-                                             const AnnotatedToken &Token,
-                                             unsigned StartColumn)
-    : BreakableComment(SourceMgr, Token.FormatTok, StartColumn + 2) {
-  assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
+void BreakableSingleLineToken::insertBreak(unsigned LineIndex,
+                                           unsigned TailOffset, Split Split,
+                                           bool InPPDirective,
+                                           WhitespaceManager &Whitespaces) {
+  Whitespaces.breakToken(Tok, Prefix.size() + TailOffset + Split.first,
+                         Split.second, Postfix, Prefix, InPPDirective,
+                         StartColumn);
+}
 
-  OriginalStartColumn =
-      SourceMgr.getSpellingColumnNumber(Tok.getStartOfNonWhitespace()) - 1;
+BreakableSingleLineToken::BreakableSingleLineToken(const FormatToken &Tok,
+                                                   unsigned StartColumn,
+                                                   StringRef Prefix,
+                                                   StringRef Postfix)
+    : BreakableToken(Tok), StartColumn(StartColumn), Prefix(Prefix),
+      Postfix(Postfix) {
+  assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));
+  Line = Tok.TokenText.substr(
+      Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
+}
+
+BreakableStringLiteral::BreakableStringLiteral(const FormatToken &Tok,
+                                               unsigned StartColumn)
+    : BreakableSingleLineToken(Tok, StartColumn, "\"", "\"") {}
+
+BreakableToken::Split
+BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset,
+                                 unsigned ColumnLimit) const {
+  return getStringSplit(Line.substr(TailOffset), StartColumn + 2, ColumnLimit);
+}
 
+static StringRef getLineCommentPrefix(StringRef Comment) {
+  const char *KnownPrefixes[] = { "/// ", "///", "// ", "//" };
+  for (size_t i = 0, e = llvm::array_lengthof(KnownPrefixes); i != e; ++i)
+    if (Comment.startswith(KnownPrefixes[i]))
+      return KnownPrefixes[i];
+  return "";
+}
+
+BreakableLineComment::BreakableLineComment(const FormatToken &Token,
+                                           unsigned StartColumn)
+    : BreakableSingleLineToken(Token, StartColumn,
+                               getLineCommentPrefix(Token.TokenText), "") {}
+
+BreakableToken::Split
+BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset,
+                               unsigned ColumnLimit) const {
+  return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(),
+                         ColumnLimit);
+}
+
+BreakableBlockComment::BreakableBlockComment(const FormatStyle &Style,
+                                             const FormatToken &Token,
+                                             unsigned StartColumn,
+                                             unsigned OriginalStartColumn,
+                                             bool FirstInLine)
+    : BreakableToken(Token) {
+  StringRef TokenText(Token.TokenText);
+  assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
   TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");
 
+  int IndentDelta = StartColumn - OriginalStartColumn;
   bool NeedsStar = true;
-  CommonPrefixLength = UINT_MAX;
-  if (Lines.size() == 1) {
-    if (Token.Parent == 0) {
-      // Standalone block comments will be aligned and prefixed with *s.
-      CommonPrefixLength = OriginalStartColumn + 1;
-    } else {
-      // Trailing comments can start on arbitrary column, and available
-      // horizontal space can be too small to align consecutive lines with
-      // the first one. We could, probably, align them to current
-      // indentation level, but now we just wrap them without indentation
-      // and stars.
-      CommonPrefixLength = 0;
-      NeedsStar = false;
+  LeadingWhitespace.resize(Lines.size());
+  StartOfLineColumn.resize(Lines.size());
+  if (Lines.size() == 1 && !FirstInLine) {
+    // Comments for which FirstInLine is false can start on arbitrary column,
+    // and available horizontal space can be too small to align consecutive
+    // lines with the first one.
+    // FIXME: We could, probably, align them to current indentation level, but
+    // now we just wrap them without stars.
+    NeedsStar = false;
+  }
+  StartOfLineColumn[0] = StartColumn + 2;
+  for (size_t i = 1; i < Lines.size(); ++i) {
+    adjustWhitespace(Style, i, IndentDelta);
+    if (Lines[i].empty())
+      // If the last line is empty, the closing "*/" will have a star.
+      NeedsStar = NeedsStar && i + 1 == Lines.size();
+    else
+      NeedsStar = NeedsStar && Lines[i][0] == '*';
+  }
+  Decoration = NeedsStar ? "* " : "";
+  IndentAtLineBreak = StartOfLineColumn[0] + 1;
+  for (size_t i = 1; i < Lines.size(); ++i) {
+    if (Lines[i].empty()) {
+      if (!NeedsStar && i + 1 != Lines.size())
+        // For all but the last line (which always ends in */), set the
+        // start column to 0 if they're empty, so we do not insert
+        // trailing whitespace anywhere.
+        StartOfLineColumn[i] = 0;
+      continue;
     }
-  } else {
-    for (size_t i = 1; i < Lines.size(); ++i) {
-      size_t FirstNonWhitespace = Lines[i].find_first_not_of(" ");
-      if (FirstNonWhitespace != StringRef::npos) {
-        NeedsStar = NeedsStar && (Lines[i][FirstNonWhitespace] == '*');
-        CommonPrefixLength =
-            std::min<unsigned>(CommonPrefixLength, FirstNonWhitespace);
-      }
+    if (NeedsStar) {
+      // The first line already excludes the star.
+      // For all other lines, adjust the line to exclude the star and
+      // (optionally) the first whitespace.
+      int Offset = Lines[i].startswith("* ") ? 2 : 1;
+      StartOfLineColumn[i] += Offset;
+      Lines[i] = Lines[i].substr(Offset);
+      LeadingWhitespace[i] += Offset;
     }
+    IndentAtLineBreak = std::min<int>(IndentAtLineBreak, StartOfLineColumn[i]);
   }
-  if (CommonPrefixLength == UINT_MAX)
-    CommonPrefixLength = 0;
+  DEBUG({
+    for (size_t i = 0; i < Lines.size(); ++i) {
+      llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i]
+                   << "\n";
+    }
+  });
+}
 
-  Decoration = NeedsStar ? "* " : "";
+void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style,
+                                             unsigned LineIndex,
+                                             int IndentDelta) {
+  // Calculate the end of the non-whitespace text in the previous line.
+  size_t EndOfPreviousLine = Lines[LineIndex - 1].find_last_not_of(" \\\t");
+  if (EndOfPreviousLine == StringRef::npos)
+    EndOfPreviousLine = 0;
+  else
+    ++EndOfPreviousLine;
+  // Calculate the start of the non-whitespace text in the current line.
+  size_t StartOfLine = Lines[LineIndex].find_first_not_of(" \t");
+  if (StartOfLine == StringRef::npos)
+    StartOfLine = Lines[LineIndex].size();
+  // FIXME: Tabs are not always 8 characters. Make configurable in the style.
+  unsigned Column = 0;
+  StringRef OriginalIndentText = Lines[LineIndex].substr(0, StartOfLine);
+  for (int i = 0, e = OriginalIndentText.size(); i != e; ++i) {
+    if (Lines[LineIndex][i] == '\t')
+      Column += 8 - (Column % 8);
+    else
+      ++Column;
+  }
 
-  IndentAtLineBreak =
-      std::max<int>(StartColumn - OriginalStartColumn + CommonPrefixLength, 0);
+  // Adjust Lines to only contain relevant text.
+  Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine);
+  Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine);
+  // Adjust LeadingWhitespace to account all whitespace between the lines
+  // to the current line.
+  LeadingWhitespace[LineIndex] =
+      Lines[LineIndex].begin() - Lines[LineIndex - 1].end();
+  // Adjust the start column uniformly accross all lines.
+  StartOfLineColumn[LineIndex] = std::max<int>(0, Column + IndentDelta);
 }
 
-void BreakableBlockComment::alignLines(WhitespaceManager &Whitespaces) {
-  SourceLocation TokenLoc = Tok.getStartOfNonWhitespace();
-  int IndentDelta = (StartColumn - 2) - OriginalStartColumn;
-  if (IndentDelta > 0) {
-    std::string WhiteSpace(IndentDelta, ' ');
-    for (size_t i = 1; i < Lines.size(); ++i) {
-      Whitespaces.addReplacement(
-          TokenLoc.getLocWithOffset(Lines[i].data() - TokenText.data()), 0,
-          WhiteSpace);
-    }
-  } else if (IndentDelta < 0) {
-    std::string WhiteSpace(-IndentDelta, ' ');
-    // Check that the line is indented enough.
-    for (size_t i = 1; i < Lines.size(); ++i) {
-      if (!Lines[i].startswith(WhiteSpace))
-        return;
-    }
-    for (size_t i = 1; i < Lines.size(); ++i) {
-      Whitespaces.addReplacement(
-          TokenLoc.getLocWithOffset(Lines[i].data() - TokenText.data()),
-          -IndentDelta, "");
-    }
-  }
+unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); }
 
-  for (unsigned i = 1; i < Lines.size(); ++i)
-    Lines[i] = Lines[i].substr(CommonPrefixLength + Decoration.size());
+unsigned
+BreakableBlockComment::getLineLengthAfterSplit(unsigned LineIndex,
+                                               unsigned TailOffset) const {
+  return getContentStartColumn(LineIndex, TailOffset) +
+         (Lines[LineIndex].size() - TailOffset) +
+         // The last line gets a "*/" postfix.
+         (LineIndex + 1 == Lines.size() ? 2 : 0);
 }
 
-void BreakableBlockComment::trimLine(unsigned LineIndex, unsigned TailOffset,
-                                     unsigned InPPDirective,
-                                     WhitespaceManager &Whitespaces) {
-  if (LineIndex == Lines.size() - 1)
-    return;
-  StringRef Text = Lines[LineIndex].substr(TailOffset);
+BreakableToken::Split
+BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset,
+                                unsigned ColumnLimit) const {
+  return getCommentSplit(Lines[LineIndex].substr(TailOffset),
+                         getContentStartColumn(LineIndex, TailOffset),
+                         ColumnLimit);
+}
 
-  // FIXME: The algorithm for trimming a line should naturally yield a
-  // non-change if there is nothing to trim; removing this line breaks the
-  // algorithm; investigate the root cause, and make sure to either document
-  // why exactly this is needed for remove it.
-  if (!Text.endswith(" ") && !InPPDirective)
-    return;
+void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
+                                        Split Split, bool InPPDirective,
+                                        WhitespaceManager &Whitespaces) {
+  StringRef Text = Lines[LineIndex].substr(TailOffset);
+  StringRef Prefix = Decoration;
+  if (LineIndex + 1 == Lines.size() &&
+      Text.size() == Split.first + Split.second) {
+    // For the last line we need to break before "*/", but not to add "* ".
+    Prefix = "";
+  }
 
-  StringRef TrimmedLine = Text.rtrim();
-  unsigned BreakOffset = TrimmedLine.end() - TokenText.data();
-  unsigned CharsToRemove = Text.size() - TrimmedLine.size() + 1;
-  // FIXME: It seems like we're misusing the call to breakToken to remove
-  // whitespace instead of breaking a token. We should make this an explicit
-  // call option to the WhitespaceManager, or handle trimming and alignment
-  // of comments completely within in the WhitespaceManger. Passing '0' here
-  // and relying on this not breaking assumptions of the WhitespaceManager seems
-  // like a bad idea.
-  Whitespaces.breakToken(Tok, BreakOffset, CharsToRemove, "", "", InPPDirective,
-                         0);
+  unsigned BreakOffsetInToken =
+      Text.data() - Tok.TokenText.data() + Split.first;
+  unsigned CharsToRemove = Split.second;
+  Whitespaces.breakToken(Tok, BreakOffsetInToken, CharsToRemove, "", Prefix,
+                         InPPDirective, IndentAtLineBreak - Decoration.size());
 }
 
-BreakableLineComment::BreakableLineComment(const SourceManager &SourceMgr,
-                                           const AnnotatedToken &Token,
-                                           unsigned StartColumn)
-    : BreakableComment(SourceMgr, Token.FormatTok, StartColumn) {
-  assert(TokenText.startswith("//"));
-  Decoration = getLineCommentPrefix(TokenText);
-  Lines.push_back(TokenText.substr(Decoration.size()));
-  IndentAtLineBreak = StartColumn;
-  this->StartColumn += Decoration.size(); // Start column of the contents.
+void
+BreakableBlockComment::replaceWhitespaceBefore(unsigned LineIndex,
+                                               unsigned InPPDirective,
+                                               WhitespaceManager &Whitespaces) {
+  if (LineIndex == 0)
+    return;
+  StringRef Prefix = Decoration;
+  if (LineIndex + 1 == Lines.size() && Lines[LineIndex].empty())
+    Prefix = "";
+
+  unsigned WhitespaceOffsetInToken =
+      Lines[LineIndex].data() - Tok.TokenText.data() -
+      LeadingWhitespace[LineIndex];
+  Whitespaces.breakToken(
+      Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix,
+      InPPDirective, StartOfLineColumn[LineIndex] - Prefix.size());
 }
 
-StringRef BreakableLineComment::getLineCommentPrefix(StringRef Comment) {
-  const char *KnownPrefixes[] = { "/// ", "///", "// ", "//" };
-  for (size_t i = 0; i < llvm::array_lengthof(KnownPrefixes); ++i)
-    if (Comment.startswith(KnownPrefixes[i]))
-      return KnownPrefixes[i];
-  return "";
+unsigned
+BreakableBlockComment::getContentStartColumn(unsigned LineIndex,
+                                             unsigned TailOffset) const {
+  // If we break, we always break at the predefined indent.
+  if (TailOffset != 0)
+    return IndentAtLineBreak;
+  return StartOfLineColumn[LineIndex];
 }
 
 } // namespace format

Modified: cfe/trunk/lib/Format/BreakableToken.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/BreakableToken.h?rev=182738&r1=182737&r2=182738&view=diff
==============================================================================
--- cfe/trunk/lib/Format/BreakableToken.h (original)
+++ cfe/trunk/lib/Format/BreakableToken.h Mon May 27 10:23:34 2013
@@ -24,215 +24,174 @@
 namespace clang {
 namespace format {
 
+struct FormatStyle;
+
+/// \brief Base class for strategies on how to break tokens.
+///
+/// FIXME: The interface seems set in stone, so we might want to just pull the
+/// strategy into the class, instead of controlling it from the outside.
 class BreakableToken {
 public:
-  BreakableToken(const SourceManager &SourceMgr, const FormatToken &Tok,
-                 unsigned StartColumn)
-      : Tok(Tok), StartColumn(StartColumn),
-        TokenText(SourceMgr.getCharacterData(Tok.getStartOfNonWhitespace()),
-                  Tok.TokenLength) {}
+  // Contains starting character index and length of split.
+  typedef std::pair<StringRef::size_type, unsigned> Split;
+
   virtual ~BreakableToken() {}
+
+  /// \brief Returns the number of lines in this token in the original code.
   virtual unsigned getLineCount() const = 0;
-  virtual unsigned getLineSize(unsigned Index) const = 0;
+
+  /// \brief Returns the rest of the length of the line at \p LineIndex,
+  /// when broken at \p TailOffset.
+  ///
+  /// Note that previous breaks are not taken into account. \p TailOffset
+  /// is always specified from the start of the (original) line.
   virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
                                            unsigned TailOffset) const = 0;
 
-  // Contains starting character index and length of split.
-  typedef std::pair<StringRef::size_type, unsigned> Split;
+  /// \brief Returns a range (offset, length) at which to break the line at
+  /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not
+  /// violate \p ColumnLimit.
   virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
                          unsigned ColumnLimit) const = 0;
+
+  /// \brief Emits the previously retrieved \p Split via \p Whitespaces.
   virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
                            bool InPPDirective,
                            WhitespaceManager &Whitespaces) = 0;
-  virtual void trimLine(unsigned LineIndex, unsigned TailOffset,
-                        unsigned InPPDirective,
-                        WhitespaceManager &Whitespaces) {}
+
+  /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex.
+  virtual void replaceWhitespaceBefore(unsigned LineIndex,
+                                       unsigned InPPDirective,
+                                       WhitespaceManager &Whitespaces) {}
+
 protected:
+  BreakableToken(const FormatToken &Tok) : Tok(Tok) {}
+
   const FormatToken &Tok;
-  unsigned StartColumn;
-  StringRef TokenText;
 };
 
-class BreakableStringLiteral : public BreakableToken {
+/// \brief Base class for single line tokens that can be broken.
+///
+/// \c getSplit() needs to be implemented by child classes.
+class BreakableSingleLineToken : public BreakableToken {
 public:
-  BreakableStringLiteral(const SourceManager &SourceMgr, const FormatToken &Tok,
-                         unsigned StartColumn)
-      : BreakableToken(SourceMgr, Tok, StartColumn) {
-    assert(TokenText.startswith("\"") && TokenText.endswith("\""));
-  }
-
-  virtual unsigned getLineCount() const { return 1; }
-
-  virtual unsigned getLineSize(unsigned Index) const {
-    return Tok.TokenLength - 2; // Should be in sync with getLine
-  }
-
+  virtual unsigned getLineCount() const;
   virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
-                                           unsigned TailOffset) const {
-    return getDecorationLength() + getLine().size() - TailOffset;
-  }
-
-  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
-                         unsigned ColumnLimit) const {
-    StringRef Text = getLine().substr(TailOffset);
-    if (ColumnLimit <= getDecorationLength())
-      return Split(StringRef::npos, 0);
-    unsigned MaxSplit = ColumnLimit - getDecorationLength();
-    // FIXME: Reduce unit test case.
-    if (Text.empty())
-      return Split(StringRef::npos, 0);
-    MaxSplit = std::min<unsigned>(MaxSplit, Text.size() - 1);
-    StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit);
-    if (SpaceOffset != StringRef::npos && SpaceOffset != 0)
-      return Split(SpaceOffset + 1, 0);
-    StringRef::size_type SlashOffset = Text.rfind('/', MaxSplit);
-    if (SlashOffset != StringRef::npos && SlashOffset != 0)
-      return Split(SlashOffset + 1, 0);
-    StringRef::size_type SplitPoint = getStartOfCharacter(Text, MaxSplit);
-    if (SplitPoint != StringRef::npos && SplitPoint > 1)
-      // Do not split at 0.
-      return Split(SplitPoint, 0);
-    return Split(StringRef::npos, 0);
-  }
-
+                                           unsigned TailOffset) const;
   virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
-                           bool InPPDirective, WhitespaceManager &Whitespaces) {
-    Whitespaces.breakToken(Tok, 1 + TailOffset + Split.first, Split.second,
-                           "\"", "\"", InPPDirective, StartColumn);
-  }
+                           bool InPPDirective, WhitespaceManager &Whitespaces);
 
-private:
-  StringRef getLine() const {
-    // Get string without quotes.
-    // FIXME: Handle string prefixes.
-    return TokenText.substr(1, TokenText.size() - 2);
-  }
-
-  unsigned getDecorationLength() const { return StartColumn + 2; }
-
-  static StringRef::size_type getStartOfCharacter(StringRef Text,
-                                                  StringRef::size_type Offset) {
-    StringRef::size_type NextEscape = Text.find('\\');
-    while (NextEscape != StringRef::npos && NextEscape < Offset) {
-      StringRef::size_type SequenceLength =
-          getEscapeSequenceLength(Text.substr(NextEscape));
-      if (Offset < NextEscape + SequenceLength)
-        return NextEscape;
-      NextEscape = Text.find('\\', NextEscape + SequenceLength);
-    }
-    return Offset;
-  }
-
-  static unsigned getEscapeSequenceLength(StringRef Text) {
-    assert(Text[0] == '\\');
-    if (Text.size() < 2)
-      return 1;
-
-    switch (Text[1]) {
-    case 'u':
-      return 6;
-    case 'U':
-      return 10;
-    case 'x':
-      return getHexLength(Text);
-    default:
-      if (Text[1] >= '0' && Text[1] <= '7')
-        return getOctalLength(Text);
-      return 2;
-    }
-  }
-
-  static unsigned getHexLength(StringRef Text) {
-    unsigned I = 2; // Point after '\x'.
-    while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') ||
-                               (Text[I] >= 'a' && Text[I] <= 'f') ||
-                               (Text[I] >= 'A' && Text[I] <= 'F'))) {
-      ++I;
-    }
-    return I;
-  }
-
-  static unsigned getOctalLength(StringRef Text) {
-    unsigned I = 1;
-    while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) {
-      ++I;
-    }
-    return I;
-  }
+protected:
+  BreakableSingleLineToken(const FormatToken &Tok, unsigned StartColumn,
+                           StringRef Prefix, StringRef Postfix);
 
+  // The column in which the token starts.
+  unsigned StartColumn;
+  // The prefix a line needs after a break in the token.
+  StringRef Prefix;
+  // The postfix a line needs before introducing a break.
+  StringRef Postfix;
+  // The token text excluding the prefix and postfix.
+  StringRef Line;
 };
 
-class BreakableComment : public BreakableToken {
+class BreakableStringLiteral : public BreakableSingleLineToken {
 public:
-  virtual unsigned getLineSize(unsigned Index) const {
-    return getLine(Index).size();
-  }
-
-  virtual unsigned getLineCount() const { return Lines.size(); }
-
-  virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
-                                           unsigned TailOffset) const {
-    return getContentStartColumn(LineIndex, TailOffset) +
-           getLine(LineIndex).size() - TailOffset;
-  }
+  /// \brief Creates a breakable token for a single line string literal.
+  ///
+  /// \p StartColumn specifies the column in which the token will start
+  /// after formatting.
+  BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn);
 
   virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
                          unsigned ColumnLimit) const;
-  virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
-                           bool InPPDirective, WhitespaceManager &Whitespaces);
+};
 
-protected:
-  BreakableComment(const SourceManager &SourceMgr, const FormatToken &Tok,
-                   unsigned StartColumn)
-      : BreakableToken(SourceMgr, Tok, StartColumn) {}
-
-  // Get comment lines without /* */, common prefix and trailing whitespace.
-  // Last line is not trimmed, as it is terminated by */, so its trailing
-  // whitespace is not really trailing.
-  StringRef getLine(unsigned Index) const {
-    return Index < Lines.size() - 1 ? Lines[Index].rtrim() : Lines[Index];
-  }
-
-  unsigned getContentStartColumn(unsigned LineIndex,
-                                 unsigned TailOffset) const {
-    return (TailOffset == 0 && LineIndex == 0)
-               ? StartColumn
-               : IndentAtLineBreak + Decoration.size();
-  }
+class BreakableLineComment : public BreakableSingleLineToken {
+public:
+  /// \brief Creates a breakable token for a line comment.
+  ///
+  /// \p StartColumn specifies the column in which the comment will start
+  /// after formatting.
+  BreakableLineComment(const FormatToken &Token, unsigned StartColumn);
 
-  unsigned IndentAtLineBreak;
-  StringRef Decoration;
-  SmallVector<StringRef, 16> Lines;
+  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
+                         unsigned ColumnLimit) const;
 };
 
-class BreakableBlockComment : public BreakableComment {
+class BreakableBlockComment : public BreakableToken {
 public:
-  BreakableBlockComment(const SourceManager &SourceMgr,
-                        const AnnotatedToken &Token, unsigned StartColumn);
-
-  void alignLines(WhitespaceManager &Whitespaces);
+  /// \brief Creates a breakable token for a block comment.
+  ///
+  /// \p StartColumn specifies the column in which the comment will start
+  /// after formatting, while \p OriginalStartColumn specifies in which
+  /// column the comment started before formatting.
+  /// If the comment starts a line after formatting, set \p FirstInLine to true.
+  BreakableBlockComment(const FormatStyle &Style, const FormatToken &Token,
+                        unsigned StartColumn, unsigned OriginaStartColumn,
+                        bool FirstInLine);
 
+  virtual unsigned getLineCount() const;
   virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
-                                           unsigned TailOffset) const {
-    return BreakableComment::getLineLengthAfterSplit(LineIndex, TailOffset) +
-           (LineIndex + 1 < Lines.size() ? 0 : 2);
-  }
-
-  virtual void trimLine(unsigned LineIndex, unsigned TailOffset,
-                        unsigned InPPDirective, WhitespaceManager &Whitespaces);
+                                           unsigned TailOffset) const;
+  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
+                         unsigned ColumnLimit) const;
+  virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
+                           bool InPPDirective, WhitespaceManager &Whitespaces);
+  virtual void replaceWhitespaceBefore(unsigned LineIndex,
+                                       unsigned InPPDirective,
+                                       WhitespaceManager &Whitespaces);
 
 private:
-  unsigned OriginalStartColumn;
-  unsigned CommonPrefixLength;
-};
+  // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex],
+  // so that all whitespace between the lines is accounted to Lines[LineIndex]
+  // as leading whitespace:
+  // - Lines[LineIndex] points to the text after that whitespace
+  // - Lines[LineIndex-1] shrinks by its trailing whitespace
+  // - LeadingWhitespace[LineIndex] is updated with the complete whitespace
+  //   between the end of the text of Lines[LineIndex-1] and Lines[LineIndex]
+  //
+  // Sets StartOfLineColumn to the intended column in which the text at
+  // Lines[LineIndex] starts (note that the decoration, if present, is not
+  // considered part of the text).
+  void adjustWhitespace(const FormatStyle &Style, unsigned LineIndex,
+                        int IndentDelta);
+
+  // Returns the column at which the text in line LineIndex starts, when broken
+  // at TailOffset. Note that the decoration (if present) is not considered part
+  // of the text.
+  unsigned getContentStartColumn(unsigned LineIndex, unsigned TailOffset) const;
+
+  // Contains the text of the lines of the block comment, excluding the leading
+  // /* in the first line and trailing */ in the last line, and excluding all
+  // trailing whitespace between the lines. Note that the decoration (if
+  // present) is also not considered part of the text.
+  SmallVector<StringRef, 16> Lines;
 
-class BreakableLineComment : public BreakableComment {
-public:
-  BreakableLineComment(const SourceManager &SourceMgr,
-                       const AnnotatedToken &Token, unsigned StartColumn);
+  // LeadingWhitespace[i] is the number of characters regarded as whitespace in
+  // front of Lines[i]. Note that this can include "* " sequences, which we
+  // regard as whitespace when all lines have a "*" prefix.
+  SmallVector<unsigned, 16> LeadingWhitespace;
+
+  // StartOfLineColumn[i] is the target column at which Line[i] should be.
+  // Note that this excludes a leading "* " or "*" in case all lines have
+  // a "*" prefix.
+  SmallVector<unsigned, 16> StartOfLineColumn;
+
+  // The column at which the text of a broken line should start.
+  // Note that an optional decoration would go before that column.
+  // IndentAtLineBreak is a uniform position for all lines in a block comment,
+  // regardless of their relative position.
+  // FIXME: Revisit the decision to do this; the main reason was to support
+  // patterns like
+  // /**************//**
+  //  * Comment
+  // We could also support such patterns by special casing the first line
+  // instead.
+  unsigned IndentAtLineBreak;
 
-private:
-  static StringRef getLineCommentPrefix(StringRef Comment);
+  // Either "* " if all lines begin with a "*", or empty.
+  StringRef Decoration;
 };
 
 } // namespace format

Modified: cfe/trunk/lib/Format/Format.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=182738&r1=182737&r2=182738&view=diff
==============================================================================
--- cfe/trunk/lib/Format/Format.cpp (original)
+++ cfe/trunk/lib/Format/Format.cpp Mon May 27 10:23:34 2013
@@ -802,6 +802,9 @@ private:
     unsigned UnbreakableTailLength = Current.UnbreakableTailLength;
     llvm::OwningPtr<BreakableToken> Token;
     unsigned StartColumn = State.Column - Current.FormatTok.TokenLength;
+    unsigned OriginalStartColumn = SourceMgr.getSpellingColumnNumber(
+        Current.FormatTok.getStartOfNonWhitespace()) - 1;
+
     if (Current.is(tok::string_literal) &&
         Current.Type != TT_ImplicitStringLiteral) {
       // Only break up default narrow strings.
@@ -810,18 +813,16 @@ private:
       if (!LiteralData || *LiteralData != '"')
         return 0;
 
-      Token.reset(new BreakableStringLiteral(SourceMgr, Current.FormatTok,
-                                             StartColumn));
+      Token.reset(new BreakableStringLiteral(Current.FormatTok, StartColumn));
     } else if (Current.Type == TT_BlockComment) {
       BreakableBlockComment *BBC =
-          new BreakableBlockComment(SourceMgr, Current, StartColumn);
-      if (!DryRun)
-        BBC->alignLines(Whitespaces);
+          new BreakableBlockComment(Style, Current.FormatTok, StartColumn,
+                                    OriginalStartColumn, !Current.Parent);
       Token.reset(BBC);
     } else if (Current.Type == TT_LineComment &&
                (Current.Parent == NULL ||
                 Current.Parent->Type != TT_ImplicitStringLiteral)) {
-      Token.reset(new BreakableLineComment(SourceMgr, Current, StartColumn));
+      Token.reset(new BreakableLineComment(Current.FormatTok, StartColumn));
     } else {
       return 0;
     }
@@ -832,8 +833,12 @@ private:
     bool BreakInserted = false;
     unsigned Penalty = 0;
     unsigned PositionAfterLastLineInToken = 0;
-    for (unsigned LineIndex = 0; LineIndex < Token->getLineCount();
-         ++LineIndex) {
+    for (unsigned LineIndex = 0, EndIndex = Token->getLineCount();
+         LineIndex != EndIndex; ++LineIndex) {
+      if (!DryRun) {
+        Token->replaceWhitespaceBefore(LineIndex, Line.InPPDirective,
+                                       Whitespaces);
+      }
       unsigned TailOffset = 0;
       unsigned RemainingTokenLength =
           Token->getLineLengthAfterSplit(LineIndex, TailOffset);
@@ -845,8 +850,7 @@ private:
         assert(Split.first != 0);
         unsigned NewRemainingTokenLength = Token->getLineLengthAfterSplit(
             LineIndex, TailOffset + Split.first + Split.second);
-        if (NewRemainingTokenLength >= RemainingTokenLength)
-          break;
+        assert(NewRemainingTokenLength < RemainingTokenLength);
         if (!DryRun) {
           Token->insertBreak(LineIndex, TailOffset, Split, Line.InPPDirective,
                              Whitespaces);
@@ -857,9 +861,6 @@ private:
         BreakInserted = true;
       }
       PositionAfterLastLineInToken = RemainingTokenLength;
-      if (!DryRun) {
-        Token->trimLine(LineIndex, TailOffset, Line.InPPDirective, Whitespaces);
-      }
     }
 
     if (BreakInserted) {
@@ -1084,8 +1085,8 @@ private:
 class LexerBasedFormatTokenSource : public FormatTokenSource {
 public:
   LexerBasedFormatTokenSource(Lexer &Lex, SourceManager &SourceMgr)
-      : GreaterStashed(false), Lex(Lex), SourceMgr(SourceMgr),
-        IdentTable(Lex.getLangOpts()) {
+      : GreaterStashed(false), TrailingWhitespace(0), Lex(Lex),
+        SourceMgr(SourceMgr), IdentTable(Lex.getLangOpts()) {
     Lex.SetKeepWhitespaceMode(true);
   }
 
@@ -1102,12 +1103,13 @@ public:
     FormatTok = FormatToken();
     Lex.LexFromRawLexer(FormatTok.Tok);
     StringRef Text = rawTokenText(FormatTok.Tok);
-    SourceLocation WhitespaceStart = FormatTok.Tok.getLocation();
+    SourceLocation WhitespaceStart =
+        FormatTok.Tok.getLocation().getLocWithOffset(-TrailingWhitespace);
     if (SourceMgr.getFileOffset(WhitespaceStart) == 0)
       FormatTok.IsFirst = true;
 
     // Consume and record whitespace until we find a significant token.
-    unsigned WhitespaceLength = 0;
+    unsigned WhitespaceLength = TrailingWhitespace;
     while (FormatTok.Tok.is(tok::unknown)) {
       unsigned Newlines = Text.count('\n');
       if (Newlines > 0)
@@ -1130,9 +1132,10 @@ public:
     // Now FormatTok is the next non-whitespace token.
     FormatTok.TokenLength = Text.size();
 
+    TrailingWhitespace = 0;
     if (FormatTok.Tok.is(tok::comment)) {
-      FormatTok.TrailingWhiteSpaceLength = Text.size() - Text.rtrim().size();
-      FormatTok.TokenLength -= FormatTok.TrailingWhiteSpaceLength;
+      TrailingWhitespace = Text.size() - Text.rtrim().size();
+      FormatTok.TokenLength -= TrailingWhitespace;
     }
 
     // In case the token starts with escaped newlines, we want to
@@ -1163,6 +1166,9 @@ public:
 
     FormatTok.WhitespaceRange = SourceRange(
         WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
+    FormatTok.TokenText = StringRef(
+        SourceMgr.getCharacterData(FormatTok.getStartOfNonWhitespace()),
+        FormatTok.TokenLength);
     return FormatTok;
   }
 
@@ -1171,6 +1177,7 @@ public:
 private:
   FormatToken FormatTok;
   bool GreaterStashed;
+  unsigned TrailingWhitespace;
   Lexer &Lex;
   SourceManager &SourceMgr;
   IdentifierTable IdentTable;

Modified: cfe/trunk/lib/Format/UnwrappedLineParser.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.h?rev=182738&r1=182737&r2=182738&view=diff
==============================================================================
--- cfe/trunk/lib/Format/UnwrappedLineParser.h (original)
+++ cfe/trunk/lib/Format/UnwrappedLineParser.h Mon May 27 10:23:34 2013
@@ -28,9 +28,8 @@ namespace format {
 /// whitespace characters preceeding it.
 struct FormatToken {
   FormatToken()
-      : NewlinesBefore(0), HasUnescapedNewline(false),
-        LastNewlineOffset(0), TokenLength(0), IsFirst(false),
-        MustBreakBefore(false), TrailingWhiteSpaceLength(0) {}
+      : NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0),
+        TokenLength(0), IsFirst(false), MustBreakBefore(false) {}
 
   /// \brief The \c Token.
   Token Tok;
@@ -66,9 +65,6 @@ struct FormatToken {
   /// before the token.
   bool MustBreakBefore;
 
-  /// \brief Number of characters of trailing whitespace.
-  unsigned TrailingWhiteSpaceLength;
-
   /// \brief Returns actual token start location without leading escaped
   /// newlines and whitespace.
   ///
@@ -77,6 +73,12 @@ struct FormatToken {
   SourceLocation getStartOfNonWhitespace() const {
     return WhitespaceRange.getEnd();
   }
+
+  /// \brief The raw text of the token.
+  ///
+  /// Contains the raw token text without leading whitespace and without leading
+  /// escaped newlines.
+  StringRef TokenText;
 };
 
 /// \brief An unwrapped line is a sequence of \c Token, that we would like to

Modified: cfe/trunk/lib/Format/WhitespaceManager.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/WhitespaceManager.cpp?rev=182738&r1=182737&r2=182738&view=diff
==============================================================================
--- cfe/trunk/lib/Format/WhitespaceManager.cpp (original)
+++ cfe/trunk/lib/Format/WhitespaceManager.cpp Mon May 27 10:23:34 2013
@@ -46,19 +46,6 @@ void WhitespaceManager::replaceWhitespac
       true, Tok.FormatTok.WhitespaceRange,
       Spaces, StartOfTokenColumn, Newlines, "", "", Tok.FormatTok.Tok.getKind(),
       InPPDirective && !Tok.FormatTok.IsFirst));
-
-  // Align line comments if they are trailing or if they continue other
-  // trailing comments.
-  // FIXME: Pull this out and generalize so it works the same way in broken
-  // comments and unbroken comments with trailing whitespace.
-  if (Tok.isTrailingComment()) {
-    SourceLocation TokenEndLoc = Tok.FormatTok.getStartOfNonWhitespace()
-        .getLocWithOffset(Tok.FormatTok.TokenLength);
-    // Remove the comment's trailing whitespace.
-    if (Tok.FormatTok.TrailingWhiteSpaceLength != 0)
-      Replaces.insert(tooling::Replacement(
-          SourceMgr, TokenEndLoc, Tok.FormatTok.TrailingWhiteSpaceLength, ""));
-  }
 }
 
 void WhitespaceManager::addUntouchableToken(const FormatToken &Tok,
@@ -86,12 +73,6 @@ void WhitespaceManager::breakToken(const
       tok::unknown, InPPDirective && !Tok.IsFirst));
 }
 
-void WhitespaceManager::addReplacement(const SourceLocation &SourceLoc,
-                                       unsigned ReplaceChars, StringRef Text) {
-  Replaces.insert(
-      tooling::Replacement(SourceMgr, SourceLoc, ReplaceChars, Text));
-}
-
 const tooling::Replacements &WhitespaceManager::generateReplacements() {
   if (Changes.empty())
     return Replaces;

Modified: cfe/trunk/lib/Format/WhitespaceManager.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/WhitespaceManager.h?rev=182738&r1=182737&r2=182738&view=diff
==============================================================================
--- cfe/trunk/lib/Format/WhitespaceManager.h (original)
+++ cfe/trunk/lib/Format/WhitespaceManager.h Mon May 27 10:23:34 2013
@@ -67,14 +67,6 @@ public:
   /// \brief Returns all the \c Replacements created during formatting.
   const tooling::Replacements &generateReplacements();
 
-  /// \brief Replaces \p ReplaceChars after \p SourceLoc with \p Text.
-  ///
-  /// FIXME: This is currently used to align comments outside of the \c
-  /// WhitespaceManager. Once this has been moved inside, get rid of this
-  /// method.
-  void addReplacement(const SourceLocation &SourceLoc, unsigned ReplaceChars,
-                      StringRef Text);
-
 private:
   /// \brief Represents a change before a token, a break inside a token,
   /// or the layout of an unchanged token (or whitespace within).

Modified: cfe/trunk/unittests/Format/FormatTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=182738&r1=182737&r2=182738&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTest.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTest.cpp Mon May 27 10:23:34 2013
@@ -766,8 +766,8 @@ TEST_F(FormatTest, AlignsMultiLineCommen
                    "           1.1.1. to keep the formatting.\n"
                    "   */"));
   EXPECT_EQ("/*\n"
-            " Don't try to outdent if there's not enough inentation.\n"
-            " */",
+            "Don't try to outdent if there's not enough inentation.\n"
+            "*/",
             format("  /*\n"
                    " Don't try to outdent if there's not enough inentation.\n"
                    " */"));
@@ -808,6 +808,65 @@ TEST_F(FormatTest, SplitsLongCxxComments
             format("// A comment before a macro definition\n"
                    "#define a b",
                    getLLVMStyleWithColumns(20)));
+
+  EXPECT_EQ("/* A comment before\n"
+            " * a macro\n"
+            " * definition */\n"
+            "#define a b",
+            format("/* A comment before a macro definition */\n"
+                   "#define a b",
+                   getLLVMStyleWithColumns(20)));
+
+  EXPECT_EQ("/* some comment\n"
+            "     *   a comment\n"
+            "* that we break\n"
+            " * another comment\n"
+            "* we have to break\n"
+            "* a left comment\n"
+            " */",
+            format("  /* some comment\n"
+                   "       *   a comment that we break\n"
+                   "   * another comment we have to break\n"
+                   "* a left comment\n"
+                   "   */",
+                   getLLVMStyleWithColumns(20)));
+
+  EXPECT_EQ("/*\n"
+            "\n"
+            "\n"
+            "    */\n",
+            format("  /*       \n"
+                   "      \n"
+                   "               \n"
+                   "      */\n"));
+}
+
+TEST_F(FormatTest, MultiLineCommentsInDefines) {
+  // FIXME: The line breaks are still suboptimal (current guess
+  // is that this is due to the token length being misused), but
+  // the comment handling is correct.
+  EXPECT_EQ("#define A(      \\\n"
+            "    x) /*       \\\n"
+            "a comment       \\\n"
+            "inside */       \\\n"
+            "  f();",
+            format("#define A(x) /* \\\n"
+                   "  a comment     \\\n"
+                   "  inside */     \\\n"
+                   "  f();",
+                   getLLVMStyleWithColumns(17)));
+  EXPECT_EQ("#define A(x) /* \\\n"
+            "        a       \\\n"
+            "        comment \\\n"
+            "        inside  \\\n"
+            "        */      \\\n"
+            "  f();",
+            format("#define A(      \\\n"
+                   "    x) /*       \\\n"
+                   "  a comment     \\\n"
+                   "  inside */     \\\n"
+                   "  f();",
+                   getLLVMStyleWithColumns(17)));
 }
 
 TEST_F(FormatTest, ParsesCommentsAdjacentToPPDirectives) {
@@ -928,7 +987,8 @@ TEST_F(FormatTest, SplitsLongLinesInComm
                    "    */", getLLVMStyleWithColumns(20)));
   EXPECT_EQ("{\n"
             "  if (something) /* This is a\n"
-            "long comment */\n"
+            "                    long\n"
+            "                    comment */\n"
             "    ;\n"
             "}",
             format("{\n"
@@ -4435,6 +4495,35 @@ TEST_F(FormatTest, ConfigurableUseOfTab)
                "\t\t    parameter2); \\\n"
                "\t}",
                Tab);
+  EXPECT_EQ("/*\n"
+            "\t      a\t\tcomment\n"
+            "\t      in multiple lines\n"
+            "       */",
+            format("   /*\t \t \n"
+                   " \t \t a\t\tcomment\t \t\n"
+                   " \t \t in multiple lines\t\n"
+                   " \t  */",
+                   Tab));
+  Tab.UseTab = false;
+  // FIXME: Change this test to a different tab size than
+  // 8 once configurable.
+  EXPECT_EQ("/*\n"
+            "              a\t\tcomment\n"
+            "              in multiple lines\n"
+            "       */",
+            format("   /*\t \t \n"
+                   " \t \t a\t\tcomment\t \t\n"
+                   " \t \t in multiple lines\t\n"
+                   " \t  */",
+                   Tab));
+
+  // FIXME: This is broken, as the spelling column number we
+  // get from the SourceManager counts tab as '1'.
+  // EXPECT_EQ("/* some\n"
+  //           "   comment */",
+  //          format(" \t \t /* some\n"
+  //                 " \t \t    comment */",
+  //                 Tab));
 }
 
 TEST_F(FormatTest, LinuxBraceBreaking) {