r179526 - Unified token breaking logic for strings and block comments.

Mon Apr 15 07:28:00 PDT 2013

Author: alexfh
Date: Mon Apr 15 09:28:00 2013
New Revision: 179526

URL: http://llvm.org/viewvc/llvm-project?rev=179526&view=rev
Log:
Unified token breaking logic for strings and block comments.

Summary:
Both strings and block comments are broken into lines in
breakProtrudingToken. Logic specific for strings or block comments is abstracted
in implementations of the BreakToken interface. Among other goodness, this
change fixes placement of backslashes after a block comment inside a
preprocessor directive (see removed FIXMEs in unit tests).

The code is far from being polished, and some parts of it will be changed for
line comments support.

Reviewers: klimek

Reviewed By: klimek

CC: cfe-commits

Differential Revision: http://llvm-reviews.chandlerc.com/D665

Added:
    cfe/trunk/lib/Format/BreakableToken.cpp
    cfe/trunk/lib/Format/BreakableToken.h
    cfe/trunk/lib/Format/WhitespaceManager.cpp
    cfe/trunk/lib/Format/WhitespaceManager.h
Modified:
    cfe/trunk/lib/Format/CMakeLists.txt
    cfe/trunk/lib/Format/Format.cpp
    cfe/trunk/unittests/Format/FormatTest.cpp

Added: cfe/trunk/lib/Format/BreakableToken.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/BreakableToken.cpp?rev=179526&view=auto
==============================================================================

--- cfe/trunk/lib/Format/BreakableToken.cpp (added)
+++ cfe/trunk/lib/Format/BreakableToken.cpp Mon Apr 15 09:28:00 2013
@@ -0,0 +1,161 @@
+//===--- BreakableToken.cpp - Format C++ code -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Contains implementation of BreakableToken class and classes derived
+/// from it.
+///
+//===----------------------------------------------------------------------===//
+
+#include "BreakableToken.h"
+#include <algorithm>
+
+namespace clang {
+namespace format {
+
+BreakableBlockComment::BreakableBlockComment(const SourceManager &SourceMgr,
+                                             const AnnotatedToken &Token,
+                                             unsigned StartColumn)
+    : Tok(Token.FormatTok), StartColumn(StartColumn) {
+
+  SourceLocation TokenLoc = Tok.Tok.getLocation();
+  TokenText = StringRef(SourceMgr.getCharacterData(TokenLoc), Tok.TokenLength);
+  assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
+
+  OriginalStartColumn = SourceMgr.getSpellingColumnNumber(TokenLoc) - 1;
+
+  TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");
+
+  NeedsStar = true;
+  CommonPrefixLength = UINT_MAX;
+  if (Lines.size() == 1) {
+    if (Token.Parent == 0) {
+      // Standalone block comments will be aligned and prefixed with *s.
+      CommonPrefixLength = OriginalStartColumn + 1;
+    } else {
+      // Trailing comments can start on arbitrary column, and available
+      // horizontal space can be too small to align consecutive lines with
+      // the first one. We could, probably, align them to current
+      // indentation level, but now we just wrap them without indentation
+      // and stars.
+      CommonPrefixLength = 0;
+      NeedsStar = false;
+    }
+  } else {
+    for (size_t i = 1; i < Lines.size(); ++i) {
+      size_t FirstNonWhitespace = Lines[i].find_first_not_of(" ");
+      if (FirstNonWhitespace != StringRef::npos) {
+        NeedsStar = NeedsStar && (Lines[i][FirstNonWhitespace] == '*');
+        CommonPrefixLength =
+            std::min<unsigned>(CommonPrefixLength, FirstNonWhitespace);
+      }
+    }
+  }
+  if (CommonPrefixLength == UINT_MAX)
+    CommonPrefixLength = 0;
+
+  IndentAtLineBreak =
+      std::max<int>(StartColumn - OriginalStartColumn + CommonPrefixLength, 0);
+}
+
+void BreakableBlockComment::alignLines(WhitespaceManager &Whitespaces) {
+  SourceLocation TokenLoc = Tok.Tok.getLocation();
+  int IndentDelta = StartColumn - OriginalStartColumn;
+  if (IndentDelta > 0) {
+    std::string WhiteSpace(IndentDelta, ' ');
+    for (size_t i = 1; i < Lines.size(); ++i) {
+      Whitespaces.addReplacement(
+          TokenLoc.getLocWithOffset(Lines[i].data() - TokenText.data()), 0,
+          WhiteSpace);
+    }
+  } else if (IndentDelta < 0) {
+    std::string WhiteSpace(-IndentDelta, ' ');
+    // Check that the line is indented enough.
+    for (size_t i = 1; i < Lines.size(); ++i) {
+      if (!Lines[i].startswith(WhiteSpace))
+        return;
+    }
+    for (size_t i = 1; i < Lines.size(); ++i) {
+      Whitespaces.addReplacement(
+          TokenLoc.getLocWithOffset(Lines[i].data() - TokenText.data()),
+          -IndentDelta, "");
+    }
+  }
+
+  for (unsigned i = 1; i < Lines.size(); ++i)
+    Lines[i] = Lines[i].substr(CommonPrefixLength + (NeedsStar ? 2 : 0));
+}
+
+BreakableToken::Split BreakableBlockComment::getSplit(unsigned LineIndex,
+                                                      unsigned TailOffset,
+                                                      unsigned ColumnLimit) {
+  StringRef Text = getLine(LineIndex).substr(TailOffset);
+  unsigned DecorationLength =
+      (TailOffset == 0 && LineIndex == 0) ? StartColumn + 2 : getPrefixLength();
+  if (ColumnLimit <= DecorationLength + 1)
+    return Split(StringRef::npos, 0);
+
+  unsigned MaxSplit = ColumnLimit - DecorationLength + 1;
+  StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit);
+  if (SpaceOffset == StringRef::npos ||
+      Text.find_last_not_of(' ', SpaceOffset) == StringRef::npos) {
+    SpaceOffset = Text.find(' ', MaxSplit);
+  }
+  if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {
+    StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim();
+    StringRef AfterCut = Text.substr(SpaceOffset).ltrim();
+    return BreakableToken::Split(BeforeCut.size(),
+                                 AfterCut.begin() - BeforeCut.end());
+  }
+  return BreakableToken::Split(StringRef::npos, 0);
+}
+
+void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
+                                        Split Split, bool InPPDirective,
+                                        WhitespaceManager &Whitespaces) {
+  StringRef Text = getLine(LineIndex).substr(TailOffset);
+  StringRef AdditionalPrefix = NeedsStar ? "* " : "";
+  if (Text.size() == Split.first + Split.second) {
+    // For all but the last line handle trailing space separately.
+    if (LineIndex < Lines.size() - 1)
+      return;
+    // For the last line we need to break before "*/", but not to add "* ".
+    AdditionalPrefix = "";
+  }
+
+  unsigned WhitespaceStartColumn =
+      Split.first +
+      (LineIndex == 0 && TailOffset == 0 ? StartColumn + 2 : getPrefixLength());
+  unsigned BreakOffset = Text.data() - TokenText.data() + Split.first;
+  unsigned CharsToRemove = Split.second;
+  Whitespaces.breakToken(Tok, BreakOffset, CharsToRemove, "", AdditionalPrefix,
+                         InPPDirective, IndentAtLineBreak,
+                         WhitespaceStartColumn);
+}
+
+void BreakableBlockComment::trimLine(unsigned LineIndex, unsigned TailOffset,
+                                     unsigned InPPDirective,
+                                     WhitespaceManager &Whitespaces) {
+  if (LineIndex == Lines.size() - 1)
+    return;
+  StringRef Text = Lines[LineIndex].substr(TailOffset);
+  if (!Text.endswith(" ") && !InPPDirective)
+    return;
+
+  StringRef TrimmedLine = Text.rtrim();
+  unsigned WhitespaceStartColumn =
+      getLineLengthAfterSplit(LineIndex, TailOffset);
+  unsigned BreakOffset = TrimmedLine.end() - TokenText.data();
+  unsigned CharsToRemove = Text.size() - TrimmedLine.size() + 1;
+  Whitespaces.breakToken(Tok, BreakOffset, CharsToRemove, "", "", InPPDirective,
+                         0, WhitespaceStartColumn);
+}
+
+} // namespace format
+} // namespace clang

Added: cfe/trunk/lib/Format/BreakableToken.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/BreakableToken.h?rev=179526&view=auto
==============================================================================
--- cfe/trunk/lib/Format/BreakableToken.h (added)
+++ cfe/trunk/lib/Format/BreakableToken.h Mon Apr 15 09:28:00 2013
@@ -0,0 +1,226 @@
+//===--- BreakableToken.h - Format C++ code -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Declares BreakableToken, BreakableStringLiteral, and
+/// BreakableBlockComment classes, that contain token type-specific logic to
+/// break long lines in tokens.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_FORMAT_BREAKABLETOKEN_H
+#define LLVM_CLANG_FORMAT_BREAKABLETOKEN_H
+
+#include "TokenAnnotator.h"
+#include "WhitespaceManager.h"
+#include <utility>
+
+namespace clang {
+namespace format {
+
+class BreakableToken {
+public:
+  virtual ~BreakableToken() {}
+  virtual unsigned getLineCount() const = 0;
+  virtual unsigned getLineSize(unsigned Index) = 0;
+  virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
+                                           unsigned TailOffset) = 0;
+  virtual unsigned getPrefixLength() = 0;
+  virtual unsigned getSuffixLength(unsigned LineIndex) = 0;
+
+  // Contains starting character index and length of split.
+  typedef std::pair<StringRef::size_type, unsigned> Split;
+  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
+                         unsigned ColumnLimit) = 0;
+  virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
+                           bool InPPDirective,
+                           WhitespaceManager &Whitespaces) = 0;
+  virtual void trimLine(unsigned LineIndex, unsigned TailOffset,
+                        unsigned InPPDirective,
+                        WhitespaceManager &Whitespaces) = 0;
+};
+
+class BreakableStringLiteral : public BreakableToken {
+public:
+  BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn)
+      : Tok(Tok), StartColumn(StartColumn) {}
+
+  virtual unsigned getLineCount() const { return 1; }
+
+  virtual unsigned getLineSize(unsigned Index) {
+    return Tok.TokenLength - 2; // Should be in sync with getLine
+  }
+
+  virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
+                                           unsigned TailOffset) {
+    return getPrefixLength() + getLine(LineIndex).size() - TailOffset +
+           getSuffixLength(LineIndex);
+  }
+
+  virtual unsigned getPrefixLength() { return StartColumn + 1; }
+
+  virtual unsigned getSuffixLength(unsigned LineIndex) { return 1; }
+
+  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
+                         unsigned ColumnLimit) {
+    StringRef Text = getLine(LineIndex).substr(TailOffset);
+    unsigned DecorationLength = getPrefixLength() + getSuffixLength(0);
+    if (ColumnLimit <= DecorationLength)
+      return Split(StringRef::npos, 0);
+    unsigned MaxSplit = ColumnLimit - DecorationLength;
+    assert(MaxSplit < Text.size());
+    StringRef::size_type SpaceOffset = Text.rfind(' ', MaxSplit);
+    if (SpaceOffset != StringRef::npos && SpaceOffset != 0)
+      return Split(SpaceOffset + 1, 0);
+    StringRef::size_type SlashOffset = Text.rfind('/', MaxSplit);
+    if (SlashOffset != StringRef::npos && SlashOffset != 0)
+      return Split(SlashOffset + 1, 0);
+    StringRef::size_type SplitPoint = getStartOfCharacter(Text, MaxSplit);
+    if (SplitPoint != StringRef::npos && SplitPoint > 1)
+      // Do not split at 0.
+      return Split(SplitPoint, 0);
+    return Split(StringRef::npos, 0);
+  }
+
+  virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
+                           bool InPPDirective, WhitespaceManager &Whitespaces) {
+    unsigned WhitespaceStartColumn = StartColumn + Split.first + 2;
+    Whitespaces.breakToken(Tok, TailOffset + Split.first + 1, Split.second,
+                           "\"", "\"", InPPDirective, StartColumn,
+                           WhitespaceStartColumn);
+  }
+
+  virtual void trimLine(unsigned LineIndex, unsigned TailOffset,
+                        unsigned InPPDirective,
+                        WhitespaceManager &Whitespaces) {}
+
+private:
+  StringRef getLine(unsigned Index) {
+    // Get string without quotes.
+    // FIXME: Handle string prefixes.
+    return StringRef(Tok.Tok.getLiteralData() + 1, Tok.TokenLength - 2);
+  }
+
+  static StringRef::size_type getStartOfCharacter(StringRef Text,
+                                                  StringRef::size_type Offset) {
+    StringRef::size_type NextEscape = Text.find('\\');
+    while (NextEscape != StringRef::npos && NextEscape < Offset) {
+      StringRef::size_type SequenceLength =
+          getEscapeSequenceLength(Text.substr(NextEscape));
+      if (Offset < NextEscape + SequenceLength)
+        return NextEscape;
+      NextEscape = Text.find('\\', NextEscape + SequenceLength);
+    }
+    return Offset;
+  }
+
+  static unsigned getEscapeSequenceLength(StringRef Text) {
+    assert(Text[0] == '\\');
+    if (Text.size() < 2)
+      return 1;
+
+    switch (Text[1]) {
+    case 'u':
+      return 6;
+    case 'U':
+      return 10;
+    case 'x':
+      return getHexLength(Text);
+    default:
+      if (Text[1] >= '0' && Text[1] <= '7')
+        return getOctalLength(Text);
+      return 2;
+    }
+  }
+
+  static unsigned getHexLength(StringRef Text) {
+    unsigned I = 2; // Point after '\x'.
+    while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') ||
+                               (Text[I] >= 'a' && Text[I] <= 'f') ||
+                               (Text[I] >= 'A' && Text[I] <= 'F'))) {
+      ++I;
+    }
+    return I;
+  }
+
+  static unsigned getOctalLength(StringRef Text) {
+    unsigned I = 1;
+    while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) {
+      ++I;
+    }
+    return I;
+  }
+
+  const FormatToken &Tok;
+  unsigned StartColumn;
+};
+
+class BreakableBlockComment : public BreakableToken {
+public:
+  BreakableBlockComment(const SourceManager &SourceMgr,
+                        const AnnotatedToken &Token, unsigned StartColumn);
+
+  void alignLines(WhitespaceManager &Whitespaces);
+
+  virtual unsigned getLineCount() const { return Lines.size(); }
+
+  virtual unsigned getLineSize(unsigned Index) {
+    return getLine(Index).size();
+  }
+
+  virtual unsigned getLineLengthAfterSplit(unsigned LineIndex,
+                                           unsigned TailOffset) {
+    unsigned ContentStartColumn = getPrefixLength();
+    if (TailOffset == 0 && LineIndex == 0)
+      ContentStartColumn = StartColumn + 2;
+    return ContentStartColumn + getLine(LineIndex).size() - TailOffset +
+           getSuffixLength(LineIndex);
+  }
+
+  virtual unsigned getPrefixLength() {
+    return IndentAtLineBreak + (NeedsStar ? 2 : 0);
+  }
+
+  virtual unsigned getSuffixLength(unsigned LineIndex) {
+    if (LineIndex + 1 < Lines.size())
+      return 0;
+    return 2;
+  }
+
+  virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
+                         unsigned ColumnLimit);
+
+  virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
+                           bool InPPDirective, WhitespaceManager &Whitespaces);
+
+  virtual void trimLine(unsigned LineIndex, unsigned TailOffset,
+                        unsigned InPPDirective, WhitespaceManager &Whitespaces);
+
+private:
+  // Get comment lines without /* */, common prefix and trailing whitespace.
+  // Last line is not trimmed, as it is terminated by */, so its trailing
+  // whitespace is not really trailing.
+  StringRef getLine(unsigned Index) {
+    return Index < Lines.size() - 1 ? Lines[Index].rtrim() : Lines[Index];
+  }
+
+  const FormatToken &Tok;
+  const unsigned StartColumn;
+  StringRef TokenText;
+  unsigned OriginalStartColumn;
+  unsigned CommonPrefixLength;
+  unsigned IndentAtLineBreak;
+  bool NeedsStar;
+  SmallVector<StringRef, 16> Lines;
+};
+
+} // namespace format
+} // namespace clang
+
+#endif // LLVM_CLANG_FORMAT_BREAKABLETOKEN_H

Modified: cfe/trunk/lib/Format/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/CMakeLists.txt?rev=179526&r1=179525&r2=179526&view=diff
==============================================================================
--- cfe/trunk/lib/Format/CMakeLists.txt (original)
+++ cfe/trunk/lib/Format/CMakeLists.txt Mon Apr 15 09:28:00 2013
@@ -1,9 +1,11 @@
 set(LLVM_LINK_COMPONENTS support)
 
 add_clang_library(clangFormat
+  BreakableToken.cpp
+  Format.cpp
   TokenAnnotator.cpp
   UnwrappedLineParser.cpp
-  Format.cpp
+  WhitespaceManager.cpp
   )
 
 add_dependencies(clangFormat

Modified: cfe/trunk/lib/Format/Format.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=179526&r1=179525&r2=179526&view=diff
==============================================================================
--- cfe/trunk/lib/Format/Format.cpp (original)
+++ cfe/trunk/lib/Format/Format.cpp Mon Apr 15 09:28:00 2013
@@ -15,8 +15,10 @@
 
 #define DEBUG_TYPE "format-formatter"
 
+#include "BreakableToken.h"
 #include "TokenAnnotator.h"
 #include "UnwrappedLineParser.h"
+#include "WhitespaceManager.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/OperatorPrecedence.h"
 #include "clang/Basic/SourceManager.h"
@@ -93,367 +95,6 @@ static unsigned getLengthToMatchingParen
   return End->TotalLength - Tok.TotalLength + 1;
 }
 
-static size_t
-calculateColumnLimit(const FormatStyle &Style, bool InPPDirective) {
-  // In preprocessor directives reserve two chars for trailing " \"
-  return Style.ColumnLimit - (InPPDirective ? 2 : 0);
-}
-
-/// \brief Manages the whitespaces around tokens and their replacements.
-///
-/// This includes special handling for certain constructs, e.g. the alignment of
-/// trailing line comments.
-class WhitespaceManager {
-public:
-  WhitespaceManager(SourceManager &SourceMgr, const FormatStyle &Style)
-      : SourceMgr(SourceMgr), Style(Style) {}
-
-  /// \brief Replaces the whitespace in front of \p Tok. Only call once for
-  /// each \c AnnotatedToken.
-  void replaceWhitespace(const AnnotatedToken &Tok, unsigned NewLines,
-                         unsigned Spaces, unsigned WhitespaceStartColumn) {
-    // 2+ newlines mean an empty line separating logic scopes.
-    if (NewLines >= 2)
-      alignComments();
-
-    SourceLocation TokenLoc = Tok.FormatTok.Tok.getLocation();
-    bool LineExceedsColumnLimit = Spaces + WhitespaceStartColumn +
-                                  Tok.FormatTok.TokenLength > Style.ColumnLimit;
-
-    // Align line comments if they are trailing or if they continue other
-    // trailing comments.
-    if (Tok.isTrailingComment()) {
-      // Remove the comment's trailing whitespace.
-      if (Tok.FormatTok.Tok.getLength() != Tok.FormatTok.TokenLength)
-        Replaces.insert(tooling::Replacement(
-            SourceMgr, TokenLoc.getLocWithOffset(Tok.FormatTok.TokenLength),
-            Tok.FormatTok.Tok.getLength() - Tok.FormatTok.TokenLength, ""));
-
-      // Align comment with other comments.
-      if ((Tok.Parent != NULL || !Comments.empty()) &&
-          !LineExceedsColumnLimit) {
-        StoredComment Comment;
-        Comment.Tok = Tok.FormatTok;
-        Comment.Spaces = Spaces;
-        Comment.NewLines = NewLines;
-        Comment.MinColumn =
-            NewLines > 0 ? Spaces : WhitespaceStartColumn + Spaces;
-        Comment.MaxColumn = Style.ColumnLimit - Tok.FormatTok.TokenLength;
-        Comment.Untouchable = false;
-        Comments.push_back(Comment);
-        return;
-      }
-    }
-
-    // If this line does not have a trailing comment, align the stored comments.
-    if (Tok.Children.empty() && !Tok.isTrailingComment())
-      alignComments();
-
-    if (Tok.Type == TT_BlockComment) {
-      indentBlockComment(Tok, Spaces, WhitespaceStartColumn, NewLines, false);
-    } else if (Tok.Type == TT_LineComment && LineExceedsColumnLimit) {
-      StringRef Line(SourceMgr.getCharacterData(TokenLoc),
-                     Tok.FormatTok.TokenLength);
-      int StartColumn = Spaces + (NewLines == 0 ? WhitespaceStartColumn : 0);
-      StringRef Prefix = getLineCommentPrefix(Line);
-      std::string NewPrefix = std::string(StartColumn, ' ') + Prefix.str();
-      splitLineInComment(Tok.FormatTok, Line.substr(Prefix.size()),
-                         StartColumn + Prefix.size(), NewPrefix,
-                         /*InPPDirective=*/ false,
-                         /*CommentHasMoreLines=*/ false);
-    }
-
-    storeReplacement(Tok.FormatTok, getNewLineText(NewLines, Spaces));
-  }
-
-  /// \brief Like \c replaceWhitespace, but additionally adds right-aligned
-  /// backslashes to escape newlines inside a preprocessor directive.
-  ///
-  /// This function and \c replaceWhitespace have the same behavior if
-  /// \c Newlines == 0.
-  void replacePPWhitespace(const AnnotatedToken &Tok, unsigned NewLines,
-                           unsigned Spaces, unsigned WhitespaceStartColumn) {
-    if (Tok.Type == TT_BlockComment)
-      indentBlockComment(Tok, Spaces, WhitespaceStartColumn, NewLines, true);
-
-    storeReplacement(Tok.FormatTok,
-                     getNewLineText(NewLines, Spaces, WhitespaceStartColumn));
-  }
-
-  /// \brief Inserts a line break into the middle of a token.
-  ///
-  /// Will break at \p Offset inside \p Tok, putting \p Prefix before the line
-  /// break and \p Postfix before the rest of the token starts in the next line.
-  ///
-  /// \p InPPDirective, \p Spaces, \p WhitespaceStartColumn and \p Style are
-  /// used to generate the correct line break.
-  void breakToken(const FormatToken &Tok, unsigned Offset,
-                  unsigned ReplaceChars, StringRef Prefix, StringRef Postfix,
-                  bool InPPDirective, unsigned Spaces,
-                  unsigned WhitespaceStartColumn) {
-    std::string NewLineText;
-    if (!InPPDirective)
-      NewLineText = getNewLineText(1, Spaces);
-    else
-      NewLineText = getNewLineText(1, Spaces, WhitespaceStartColumn);
-    std::string ReplacementText = (Prefix + NewLineText + Postfix).str();
-    SourceLocation Location = Tok.Tok.getLocation().getLocWithOffset(Offset);
-    Replaces.insert(tooling::Replacement(SourceMgr, Location, ReplaceChars,
-                                         ReplacementText));
-  }
-
-  /// \brief Returns all the \c Replacements created during formatting.
-  const tooling::Replacements &generateReplacements() {
-    alignComments();
-    return Replaces;
-  }
-
-  void addUntouchableComment(unsigned Column) {
-    StoredComment Comment;
-    Comment.MinColumn = Column;
-    Comment.MaxColumn = Column;
-    Comment.Untouchable = true;
-    Comments.push_back(Comment);
-  }
-
-private:
-  static StringRef getLineCommentPrefix(StringRef Comment) {
-    const char *KnownPrefixes[] = { "/// ", "///", "// ", "//" };
-    for (size_t i = 0; i < llvm::array_lengthof(KnownPrefixes); ++i)
-      if (Comment.startswith(KnownPrefixes[i]))
-        return KnownPrefixes[i];
-    return "";
-  }
-
-  /// \brief Finds a common prefix of lines of a block comment to properly
-  /// indent (and possibly decorate with '*'s) added lines.
-  ///
-  /// The first line is ignored (it's special and starts with /*). The number of
-  /// lines should be more than one.
-  static StringRef findCommentLinesPrefix(ArrayRef<StringRef> Lines,
-                                          const char *PrefixChars = " *") {
-    assert(Lines.size() > 1);
-    StringRef Prefix(Lines[1].data(), Lines[1].find_first_not_of(PrefixChars));
-    for (size_t i = 2; i < Lines.size(); ++i) {
-      for (size_t j = 0; j < Prefix.size() && j < Lines[i].size(); ++j) {
-        if (Prefix[j] != Lines[i][j]) {
-          Prefix = Prefix.substr(0, j);
-          break;
-        }
-      }
-    }
-    return Prefix;
-  }
-
-  /// \brief Splits one line in a line or block comment, if it doesn't fit to
-  /// provided column limit. Removes trailing whitespace in each line.
-  ///
-  /// \param Line points to the line contents without leading // or /*.
-  ///
-  /// \param StartColumn is the column where the first character of Line will be
-  /// located after formatting.
-  ///
-  /// \param LinePrefix is inserted after each line break.
-  ///
-  /// When \param InPPDirective is true, each line break will be preceded by a
-  /// backslash in the last column to make line breaks inside the comment
-  /// visually consistent with line breaks outside the comment. This only makes
-  /// sense for block comments.
-  ///
-  /// When \param CommentHasMoreLines is false, no line breaks/trailing
-  /// backslashes will be inserted after it.
-  void splitLineInComment(const FormatToken &Tok, StringRef Line,
-                          size_t StartColumn, StringRef LinePrefix,
-                          bool InPPDirective, bool CommentHasMoreLines,
-                          const char *WhiteSpaceChars = " ") {
-    size_t ColumnLimit = calculateColumnLimit(Style, InPPDirective);
-    const char *TokenStart = SourceMgr.getCharacterData(Tok.Tok.getLocation());
-
-    StringRef TrimmedLine = Line.rtrim();
-    int TrailingSpaceLength = Line.size() - TrimmedLine.size();
-
-    // Don't touch leading whitespace.
-    Line = TrimmedLine.ltrim();
-    StartColumn += TrimmedLine.size() - Line.size();
-
-    while (Line.size() + StartColumn > ColumnLimit) {
-      // Try to break at the last whitespace before the column limit.
-      size_t SpacePos =
-          Line.find_last_of(WhiteSpaceChars, ColumnLimit - StartColumn + 1);
-      if (SpacePos == StringRef::npos) {
-        // Try to find any whitespace in the line.
-        SpacePos = Line.find_first_of(WhiteSpaceChars);
-        if (SpacePos == StringRef::npos) // No whitespace found, give up.
-          break;
-      }
-
-      StringRef NextCut = Line.substr(0, SpacePos).rtrim();
-      StringRef RemainingLine = Line.substr(SpacePos).ltrim();
-      if (RemainingLine.empty())
-        break;
-
-      if (RemainingLine == "*/" && LinePrefix.endswith("* "))
-        LinePrefix = LinePrefix.substr(0, LinePrefix.size() - 2);
-
-      Line = RemainingLine;
-
-      size_t ReplaceChars = Line.begin() - NextCut.end();
-      breakToken(Tok, NextCut.end() - TokenStart, ReplaceChars, "", LinePrefix,
-                 InPPDirective, 0, NextCut.size() + StartColumn);
-      StartColumn = LinePrefix.size();
-    }
-
-    if (TrailingSpaceLength > 0 || (InPPDirective && CommentHasMoreLines)) {
-      // Remove trailing whitespace/insert backslash. + 1 is for \n
-      breakToken(Tok, Line.end() - TokenStart, TrailingSpaceLength + 1, "", "",
-                 InPPDirective, 0, Line.size() + StartColumn);
-    }
-  }
-
-  /// \brief Changes indentation of all lines in a block comment by Indent,
-  /// removes trailing whitespace from each line, splits lines that end up
-  /// exceeding the column limit.
-  void indentBlockComment(const AnnotatedToken &Tok, int Indent,
-                          int WhitespaceStartColumn, int NewLines,
-                          bool InPPDirective) {
-    assert(Tok.Type == TT_BlockComment);
-    int StartColumn = Indent + (NewLines == 0 ? WhitespaceStartColumn : 0);
-    const SourceLocation TokenLoc = Tok.FormatTok.Tok.getLocation();
-    const int CurrentIndent = SourceMgr.getSpellingColumnNumber(TokenLoc) - 1;
-    const int IndentDelta = Indent - CurrentIndent;
-    const StringRef Text(SourceMgr.getCharacterData(TokenLoc),
-                         Tok.FormatTok.TokenLength);
-    assert(Text.startswith("/*") && Text.endswith("*/"));
-
-    SmallVector<StringRef, 16> Lines;
-    Text.split(Lines, "\n");
-
-    if (IndentDelta > 0) {
-      std::string WhiteSpace(IndentDelta, ' ');
-      for (size_t i = 1; i < Lines.size(); ++i) {
-        Replaces.insert(tooling::Replacement(
-            SourceMgr, TokenLoc.getLocWithOffset(Lines[i].data() - Text.data()),
-            0, WhiteSpace));
-      }
-    } else if (IndentDelta < 0) {
-      std::string WhiteSpace(-IndentDelta, ' ');
-      // Check that the line is indented enough.
-      for (size_t i = 1; i < Lines.size(); ++i) {
-        if (!Lines[i].startswith(WhiteSpace))
-          return;
-      }
-      for (size_t i = 1; i < Lines.size(); ++i) {
-        Replaces.insert(tooling::Replacement(
-            SourceMgr, TokenLoc.getLocWithOffset(Lines[i].data() - Text.data()),
-            -IndentDelta, ""));
-      }
-    }
-
-    // Split long lines in comments.
-    size_t OldPrefixSize = 0;
-    std::string NewPrefix;
-    if (Lines.size() > 1) {
-      StringRef CurrentPrefix = findCommentLinesPrefix(Lines);
-      OldPrefixSize = CurrentPrefix.size();
-      NewPrefix = (IndentDelta < 0)
-                  ? CurrentPrefix.substr(-IndentDelta).str()
-                  : std::string(IndentDelta, ' ') + CurrentPrefix.str();
-      if (CurrentPrefix.endswith("*")) {
-        NewPrefix += " ";
-        ++OldPrefixSize;
-      }
-    } else if (Tok.Parent == 0) {
-      NewPrefix = std::string(StartColumn, ' ') + " * ";
-    }
-
-    StartColumn += 2;
-    for (size_t i = 0; i < Lines.size(); ++i) {
-      StringRef Line = Lines[i].substr(i == 0 ? 2 : OldPrefixSize);
-      splitLineInComment(Tok.FormatTok, Line, StartColumn, NewPrefix,
-                         InPPDirective, i != Lines.size() - 1);
-      StartColumn = NewPrefix.size();
-    }
-  }
-
-  std::string getNewLineText(unsigned NewLines, unsigned Spaces) {
-    return std::string(NewLines, '\n') + std::string(Spaces, ' ');
-  }
-
-  std::string getNewLineText(unsigned NewLines, unsigned Spaces,
-                             unsigned WhitespaceStartColumn) {
-    std::string NewLineText;
-    if (NewLines > 0) {
-      unsigned Offset =
-          std::min<int>(Style.ColumnLimit - 1, WhitespaceStartColumn);
-      for (unsigned i = 0; i < NewLines; ++i) {
-        NewLineText += std::string(Style.ColumnLimit - Offset - 1, ' ');
-        NewLineText += "\\\n";
-        Offset = 0;
-      }
-    }
-    return NewLineText + std::string(Spaces, ' ');
-  }
-
-  /// \brief Structure to store a comment for later layout and alignment.
-  struct StoredComment {
-    FormatToken Tok;
-    unsigned MinColumn;
-    unsigned MaxColumn;
-    unsigned NewLines;
-    unsigned Spaces;
-    bool Untouchable;
-  };
-  SmallVector<StoredComment, 16> Comments;
-  typedef SmallVector<StoredComment, 16>::iterator comment_iterator;
-
-  /// \brief Try to align all stashed comments.
-  void alignComments() {
-    unsigned MinColumn = 0;
-    unsigned MaxColumn = UINT_MAX;
-    comment_iterator Start = Comments.begin();
-    for (comment_iterator I = Start, E = Comments.end(); I != E; ++I) {
-      if (I->MinColumn > MaxColumn || I->MaxColumn < MinColumn) {
-        alignComments(Start, I, MinColumn);
-        MinColumn = I->MinColumn;
-        MaxColumn = I->MaxColumn;
-        Start = I;
-      } else {
-        MinColumn = std::max(MinColumn, I->MinColumn);
-        MaxColumn = std::min(MaxColumn, I->MaxColumn);
-      }
-    }
-    alignComments(Start, Comments.end(), MinColumn);
-    Comments.clear();
-  }
-
-  /// \brief Put all the comments between \p I and \p E into \p Column.
-  void alignComments(comment_iterator I, comment_iterator E, unsigned Column) {
-    while (I != E) {
-      if (!I->Untouchable) {
-        unsigned Spaces = I->Spaces + Column - I->MinColumn;
-        storeReplacement(I->Tok, getNewLineText(I->NewLines, Spaces));
-      }
-      ++I;
-    }
-  }
-
-  /// \brief Stores \p Text as the replacement for the whitespace in front of
-  /// \p Tok.
-  void storeReplacement(const FormatToken &Tok, const std::string Text) {
-    // Don't create a replacement, if it does not change anything.
-    if (StringRef(SourceMgr.getCharacterData(Tok.WhiteSpaceStart),
-                  Tok.WhiteSpaceLength) == Text)
-      return;
-
-    Replaces.insert(tooling::Replacement(SourceMgr, Tok.WhiteSpaceStart,
-                                         Tok.WhiteSpaceLength, Text));
-  }
-
-  SourceManager &SourceMgr;
-  tooling::Replacements Replaces;
-  const FormatStyle &Style;
-};
-
 class UnwrappedLineFormatter {
 public:
   UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr,
@@ -602,7 +243,7 @@ private:
       if (StartOfFunctionCall != Other.StartOfFunctionCall)
         return StartOfFunctionCall < Other.StartOfFunctionCall;
       if (NestedNameSpecifierContinuation !=
-              Other.NestedNameSpecifierContinuation)
+          Other.NestedNameSpecifierContinuation)
         return NestedNameSpecifierContinuation <
                Other.NestedNameSpecifierContinuation;
       if (CallContinuation != Other.CallContinuation)
@@ -647,7 +288,7 @@ private:
       if (Column != Other.Column)
         return Column < Other.Column;
       if (LineContainsContinuedForLoopSection !=
-              Other.LineContainsContinuedForLoopSection)
+          Other.LineContainsContinuedForLoopSection)
         return LineContainsContinuedForLoopSection;
       if (ParenLevel != Other.ParenLevel)
         return ParenLevel < Other.ParenLevel;
@@ -806,7 +447,7 @@ private:
       if (Current.Type == TT_ObjCSelectorName &&
           State.Stack.back().ColonPos == 0) {
         if (State.Stack.back().Indent + Current.LongestObjCSelectorName >
-                State.Column + Spaces + Current.FormatTok.TokenLength)
+            State.Column + Spaces + Current.FormatTok.TokenLength)
           State.Stack.back().ColonPos =
               State.Stack.back().Indent + Current.LongestObjCSelectorName;
         else
@@ -970,115 +611,78 @@ private:
   /// it if possible.
   unsigned breakProtrudingToken(const AnnotatedToken &Current, LineState &State,
                                 bool DryRun) {
-    if (Current.isNot(tok::string_literal))
+    llvm::OwningPtr<BreakableToken> Token;
+    unsigned StartColumn = State.Column - Current.FormatTok.TokenLength;
+    if (Current.is(tok::string_literal)) {
+      // Only break up default narrow strings.
+      const char *LiteralData = Current.FormatTok.Tok.getLiteralData();
+      if (!LiteralData || *LiteralData != '"')
+        return 0;
+
+      Token.reset(new BreakableStringLiteral(Current.FormatTok, StartColumn));
+    } else if (Current.Type == TT_BlockComment) {
+      BreakableBlockComment *BBC =
+          new BreakableBlockComment(SourceMgr, Current, StartColumn);
+      if (!DryRun)
+        BBC->alignLines(Whitespaces);
+      Token.reset(BBC);
+    } else {
       return 0;
-    // Only break up default narrow strings.
-    const char *LiteralData = Current.FormatTok.Tok.getLiteralData();
-    if (!LiteralData || *LiteralData != '"')
+    }
+
+    if (Token->getPrefixLength() + Token->getSuffixLength(0) >
+        getColumnLimit()) {
       return 0;
+    }
 
+    bool BreakInserted = false;
     unsigned Penalty = 0;
-    unsigned TailOffset = 0;
-    unsigned TailLength = Current.FormatTok.TokenLength;
-    unsigned StartColumn = State.Column - Current.FormatTok.TokenLength;
-    unsigned OffsetFromStart = 0;
-    while (StartColumn + TailLength > getColumnLimit()) {
-      StringRef Text = StringRef(LiteralData + TailOffset, TailLength);
-      if (StartColumn + OffsetFromStart + 1 > getColumnLimit())
-        break;
-      StringRef::size_type SplitPoint = getSplitPoint(
-          Text, getColumnLimit() - StartColumn - OffsetFromStart - 1);
-      if (SplitPoint == StringRef::npos)
-        break;
-      assert(SplitPoint != 0);
-      // +2, because 'Text' starts after the opening quotes, and does not
-      // include the closing quote we need to insert.
-      unsigned WhitespaceStartColumn =
-          StartColumn + OffsetFromStart + SplitPoint + 2;
-      State.Stack.back().LastSpace = StartColumn;
+    for (unsigned LineIndex = 0; LineIndex < Token->getLineCount();
+         ++LineIndex) {
+      unsigned TokenLineSize = Token->getLineSize(LineIndex);
+      unsigned TailOffset = 0;
+      unsigned RemainingLength =
+          Token->getLineLengthAfterSplit(LineIndex, TailOffset);
+      while (RemainingLength > getColumnLimit()) {
+        unsigned DecorationLength =
+            RemainingLength - (TokenLineSize - TailOffset);
+        if (DecorationLength + 1 > getColumnLimit()) {
+          // Can't reduce line length by splitting here.
+          break;
+        }
+        BreakableToken::Split Split =
+            Token->getSplit(LineIndex, TailOffset, getColumnLimit());
+        if (Split.first == StringRef::npos)
+          break;
+        assert(Split.first != 0);
+        if (!DryRun) {
+          Token->insertBreak(LineIndex, TailOffset, Split, Line.InPPDirective,
+                             Whitespaces);
+        }
+        TailOffset += Split.first + Split.second;
+        unsigned OldRemainingLength = RemainingLength;
+        RemainingLength = Token->getLineLengthAfterSplit(LineIndex, TailOffset);
+        assert(RemainingLength < OldRemainingLength);
+        Penalty += Style.PenaltyExcessCharacter;
+        BreakInserted = true;
+      }
+      State.Column = RemainingLength;
       if (!DryRun) {
-        Whitespaces.breakToken(Current.FormatTok, TailOffset + SplitPoint + 1,
-                               0, "\"", "\"", Line.InPPDirective, StartColumn,
-                               WhitespaceStartColumn);
+        Token->trimLine(LineIndex, TailOffset, Line.InPPDirective, Whitespaces);
       }
-      TailOffset += SplitPoint + 1;
-      TailLength -= SplitPoint + 1;
-      OffsetFromStart = 1;
-      Penalty += Style.PenaltyExcessCharacter;
+    }
+
+    if (BreakInserted) {
       for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
         State.Stack[i].BreakBeforeParameter = true;
+      State.Stack.back().LastSpace = StartColumn;
     }
-    State.Column = StartColumn + TailLength;
     return Penalty;
   }
 
-  StringRef::size_type
-  getSplitPoint(StringRef Text, StringRef::size_type Offset) {
-    StringRef::size_type SpaceOffset = Text.rfind(' ', Offset);
-    if (SpaceOffset != StringRef::npos && SpaceOffset != 0)
-      return SpaceOffset;
-    StringRef::size_type SlashOffset = Text.rfind('/', Offset);
-    if (SlashOffset != StringRef::npos && SlashOffset != 0)
-      return SlashOffset;
-    StringRef::size_type Split = getStartOfCharacter(Text, Offset);
-    if (Split != StringRef::npos && Split > 1)
-      // Do not split at 0.
-      return Split - 1;
-    return StringRef::npos;
-  }
-
-  StringRef::size_type
-  getStartOfCharacter(StringRef Text, StringRef::size_type Offset) {
-    StringRef::size_type NextEscape = Text.find('\\');
-    while (NextEscape != StringRef::npos && NextEscape < Offset) {
-      StringRef::size_type SequenceLength =
-          getEscapeSequenceLength(Text.substr(NextEscape));
-      if (Offset < NextEscape + SequenceLength)
-        return NextEscape;
-      NextEscape = Text.find('\\', NextEscape + SequenceLength);
-    }
-    return Offset;
-  }
-
-  unsigned getEscapeSequenceLength(StringRef Text) {
-    assert(Text[0] == '\\');
-    if (Text.size() < 2)
-      return 1;
-
-    switch (Text[1]) {
-    case 'u':
-      return 6;
-    case 'U':
-      return 10;
-    case 'x':
-      return getHexLength(Text);
-    default:
-      if (Text[1] >= '0' && Text[1] <= '7')
-        return getOctalLength(Text);
-      return 2;
-    }
-  }
-
-  unsigned getHexLength(StringRef Text) {
-    unsigned I = 2; // Point after '\x'.
-    while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') ||
-                               (Text[I] >= 'a' && Text[I] <= 'f') ||
-                               (Text[I] >= 'A' && Text[I] <= 'F'))) {
-      ++I;
-    }
-    return I;
-  }
-
-  unsigned getOctalLength(StringRef Text) {
-    unsigned I = 1;
-    while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) {
-      ++I;
-    }
-    return I;
-  }
-
   unsigned getColumnLimit() {
-    return calculateColumnLimit(Style, Line.InPPDirective);
+    // In preprocessor directives reserve two chars for trailing " \"
+    return Style.ColumnLimit - (Line.InPPDirective ? 2 : 0);
   }
 
   /// \brief An edge in the solution space from \c Previous->State to \c State,
@@ -1401,9 +1005,9 @@ public:
           AnnotatedLines[i].First.Children.empty())
         AnnotatedLines[i].Level = NextNoneCommentLine->Level;
       else
-        NextNoneCommentLine = AnnotatedLines[i].First.isNot(tok::r_brace)
-                                  ? &AnnotatedLines[i]
-                                  : NULL;
+        NextNoneCommentLine =
+            AnnotatedLines[i].First.isNot(tok::r_brace) ? &AnnotatedLines[i]
+                                                        : NULL;
     }
 
     std::vector<int> IndentForLevel;
@@ -1746,9 +1350,10 @@ private:
   std::vector<AnnotatedLine> AnnotatedLines;
 };
 
-tooling::Replacements
-reformat(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr,
-         std::vector<CharSourceRange> Ranges, DiagnosticConsumer *DiagClient) {
+tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
+                               SourceManager &SourceMgr,
+                               std::vector<CharSourceRange> Ranges,
+                               DiagnosticConsumer *DiagClient) {
   IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions();
   OwningPtr<DiagnosticConsumer> DiagPrinter;
   if (DiagClient == 0) {

Added: cfe/trunk/lib/Format/WhitespaceManager.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/WhitespaceManager.cpp?rev=179526&view=auto
==============================================================================
--- cfe/trunk/lib/Format/WhitespaceManager.cpp (added)
+++ cfe/trunk/lib/Format/WhitespaceManager.cpp Mon Apr 15 09:28:00 2013
@@ -0,0 +1,223 @@
+//===--- WhitespaceManager.cpp - Format C++ code --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements WhitespaceManager class.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WhitespaceManager.h"
+#include "llvm/ADT/STLExtras.h"
+
+namespace clang {
+namespace format {
+
+void WhitespaceManager::replaceWhitespace(const AnnotatedToken &Tok,
+                                          unsigned NewLines, unsigned Spaces,
+                                          unsigned WhitespaceStartColumn) {
+  // 2+ newlines mean an empty line separating logic scopes.
+  if (NewLines >= 2)
+    alignComments();
+
+  SourceLocation TokenLoc = Tok.FormatTok.Tok.getLocation();
+  bool LineExceedsColumnLimit =
+      Spaces + WhitespaceStartColumn + Tok.FormatTok.TokenLength >
+      Style.ColumnLimit;
+
+  // Align line comments if they are trailing or if they continue other
+  // trailing comments.
+  if (Tok.isTrailingComment()) {
+    // Remove the comment's trailing whitespace.
+    if (Tok.FormatTok.Tok.getLength() != Tok.FormatTok.TokenLength)
+      Replaces.insert(tooling::Replacement(
+          SourceMgr, TokenLoc.getLocWithOffset(Tok.FormatTok.TokenLength),
+          Tok.FormatTok.Tok.getLength() - Tok.FormatTok.TokenLength, ""));
+
+    // Align comment with other comments.
+    if ((Tok.Parent != NULL || !Comments.empty()) && !LineExceedsColumnLimit) {
+      StoredComment Comment;
+      Comment.Tok = Tok.FormatTok;
+      Comment.Spaces = Spaces;
+      Comment.NewLines = NewLines;
+      Comment.MinColumn =
+          NewLines > 0 ? Spaces : WhitespaceStartColumn + Spaces;
+      Comment.MaxColumn = Style.ColumnLimit - Tok.FormatTok.TokenLength;
+      Comment.Untouchable = false;
+      Comments.push_back(Comment);
+      return;
+    }
+  }
+
+  // If this line does not have a trailing comment, align the stored comments.
+  if (Tok.Children.empty() && !Tok.isTrailingComment())
+    alignComments();
+
+  if (Tok.Type == TT_LineComment && LineExceedsColumnLimit) {
+    StringRef Line(SourceMgr.getCharacterData(TokenLoc),
+                   Tok.FormatTok.TokenLength);
+    int StartColumn = Spaces + (NewLines == 0 ? WhitespaceStartColumn : 0);
+    StringRef Prefix = getLineCommentPrefix(Line);
+    std::string NewPrefix = std::string(StartColumn, ' ') + Prefix.str();
+    splitLineComment(Tok.FormatTok, Line.substr(Prefix.size()),
+                     StartColumn + Prefix.size(), NewPrefix);
+  }
+
+  storeReplacement(Tok.FormatTok, getNewLineText(NewLines, Spaces));
+}
+
+void WhitespaceManager::replacePPWhitespace(const AnnotatedToken &Tok,
+                                            unsigned NewLines, unsigned Spaces,
+                                            unsigned WhitespaceStartColumn) {
+  storeReplacement(Tok.FormatTok,
+                   getNewLineText(NewLines, Spaces, WhitespaceStartColumn));
+}
+
+void WhitespaceManager::breakToken(const FormatToken &Tok, unsigned Offset,
+                                   unsigned ReplaceChars, StringRef Prefix,
+                                   StringRef Postfix, bool InPPDirective,
+                                   unsigned Spaces,
+                                   unsigned WhitespaceStartColumn) {
+  std::string NewLineText;
+  if (!InPPDirective)
+    NewLineText = getNewLineText(1, Spaces);
+  else
+    NewLineText = getNewLineText(1, Spaces, WhitespaceStartColumn);
+  std::string ReplacementText = (Prefix + NewLineText + Postfix).str();
+  SourceLocation Location = Tok.Tok.getLocation().getLocWithOffset(Offset);
+  Replaces.insert(
+      tooling::Replacement(SourceMgr, Location, ReplaceChars, ReplacementText));
+}
+
+const tooling::Replacements &WhitespaceManager::generateReplacements() {
+  alignComments();
+  return Replaces;
+}
+
+void WhitespaceManager::addReplacement(const SourceLocation &SourceLoc,
+                                       unsigned ReplaceChars, StringRef Text) {
+  Replaces.insert(
+      tooling::Replacement(SourceMgr, SourceLoc, ReplaceChars, Text));
+}
+
+void WhitespaceManager::addUntouchableComment(unsigned Column) {
+  StoredComment Comment;
+  Comment.MinColumn = Column;
+  Comment.MaxColumn = Column;
+  Comment.Untouchable = true;
+  Comments.push_back(Comment);
+}
+
+StringRef WhitespaceManager::getLineCommentPrefix(StringRef Comment) {
+  const char *KnownPrefixes[] = { "/// ", "///", "// ", "//" };
+  for (size_t i = 0; i < llvm::array_lengthof(KnownPrefixes); ++i)
+    if (Comment.startswith(KnownPrefixes[i]))
+      return KnownPrefixes[i];
+  return "";
+}
+
+void
+WhitespaceManager::splitLineComment(const FormatToken &Tok, StringRef Line,
+                                    size_t StartColumn, StringRef LinePrefix,
+                                    const char *WhiteSpaceChars /*= " "*/) {
+  const char *TokenStart = SourceMgr.getCharacterData(Tok.Tok.getLocation());
+
+  StringRef TrimmedLine = Line.rtrim();
+  // Don't touch leading whitespace.
+  Line = TrimmedLine.ltrim();
+  StartColumn += TrimmedLine.size() - Line.size();
+
+  while (Line.size() + StartColumn > Style.ColumnLimit) {
+    // Try to break at the last whitespace before the column limit.
+    size_t SpacePos =
+        Line.find_last_of(WhiteSpaceChars, Style.ColumnLimit - StartColumn + 1);
+    if (SpacePos == StringRef::npos) {
+      // Try to find any whitespace in the line.
+      SpacePos = Line.find_first_of(WhiteSpaceChars);
+      if (SpacePos == StringRef::npos) // No whitespace found, give up.
+        break;
+    }
+
+    StringRef NextCut = Line.substr(0, SpacePos).rtrim();
+    StringRef RemainingLine = Line.substr(SpacePos).ltrim();
+    if (RemainingLine.empty())
+      break;
+
+    Line = RemainingLine;
+
+    size_t ReplaceChars = Line.begin() - NextCut.end();
+    breakToken(Tok, NextCut.end() - TokenStart, ReplaceChars, "", LinePrefix,
+               false, 0, 0);
+    StartColumn = LinePrefix.size();
+  }
+}
+
+std::string WhitespaceManager::getNewLineText(unsigned NewLines,
+                                              unsigned Spaces) {
+  return std::string(NewLines, '\n') + std::string(Spaces, ' ');
+}
+
+std::string WhitespaceManager::getNewLineText(unsigned NewLines,
+                                              unsigned Spaces,
+                                              unsigned WhitespaceStartColumn) {
+  std::string NewLineText;
+  if (NewLines > 0) {
+    unsigned Offset =
+        std::min<int>(Style.ColumnLimit - 1, WhitespaceStartColumn);
+    for (unsigned i = 0; i < NewLines; ++i) {
+      NewLineText += std::string(Style.ColumnLimit - Offset - 1, ' ');
+      NewLineText += "\\\n";
+      Offset = 0;
+    }
+  }
+  return NewLineText + std::string(Spaces, ' ');
+}
+
+void WhitespaceManager::alignComments() {
+  unsigned MinColumn = 0;
+  unsigned MaxColumn = UINT_MAX;
+  comment_iterator Start = Comments.begin();
+  for (comment_iterator I = Start, E = Comments.end(); I != E; ++I) {
+    if (I->MinColumn > MaxColumn || I->MaxColumn < MinColumn) {
+      alignComments(Start, I, MinColumn);
+      MinColumn = I->MinColumn;
+      MaxColumn = I->MaxColumn;
+      Start = I;
+    } else {
+      MinColumn = std::max(MinColumn, I->MinColumn);
+      MaxColumn = std::min(MaxColumn, I->MaxColumn);
+    }
+  }
+  alignComments(Start, Comments.end(), MinColumn);
+  Comments.clear();
+}
+
+void WhitespaceManager::alignComments(comment_iterator I, comment_iterator E,
+                                      unsigned Column) {
+  while (I != E) {
+    if (!I->Untouchable) {
+      unsigned Spaces = I->Spaces + Column - I->MinColumn;
+      storeReplacement(I->Tok, getNewLineText(I->NewLines, Spaces));
+    }
+    ++I;
+  }
+}
+
+void WhitespaceManager::storeReplacement(const FormatToken &Tok,
+                                         const std::string Text) {
+  // Don't create a replacement, if it does not change anything.
+  if (StringRef(SourceMgr.getCharacterData(Tok.WhiteSpaceStart),
+                Tok.WhiteSpaceLength) == Text)
+    return;
+
+  Replaces.insert(tooling::Replacement(SourceMgr, Tok.WhiteSpaceStart,
+                                       Tok.WhiteSpaceLength, Text));
+}
+
+} // namespace format
+} // namespace clang

Added: cfe/trunk/lib/Format/WhitespaceManager.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/WhitespaceManager.h?rev=179526&view=auto
==============================================================================
--- cfe/trunk/lib/Format/WhitespaceManager.h (added)
+++ cfe/trunk/lib/Format/WhitespaceManager.h Mon Apr 15 09:28:00 2013
@@ -0,0 +1,120 @@
+//===--- WhitespaceManager.h - Format C++ code ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief WhitespaceManager class manages whitespace around tokens and their
+/// replacements.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_FORMAT_WHITESPACEMANAGER_H
+#define LLVM_CLANG_FORMAT_WHITESPACEMANAGER_H
+
+#include "TokenAnnotator.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Format/Format.h"
+#include <string>
+
+namespace clang {
+namespace format {
+
+/// \brief Manages the whitespaces around tokens and their replacements.
+///
+/// This includes special handling for certain constructs, e.g. the alignment of
+/// trailing line comments.
+class WhitespaceManager {
+public:
+  WhitespaceManager(SourceManager &SourceMgr, const FormatStyle &Style)
+      : SourceMgr(SourceMgr), Style(Style) {}
+
+  /// \brief Replaces the whitespace in front of \p Tok. Only call once for
+  /// each \c AnnotatedToken.
+  void replaceWhitespace(const AnnotatedToken &Tok, unsigned NewLines,
+                         unsigned Spaces, unsigned WhitespaceStartColumn);
+
+  /// \brief Like \c replaceWhitespace, but additionally adds right-aligned
+  /// backslashes to escape newlines inside a preprocessor directive.
+  ///
+  /// This function and \c replaceWhitespace have the same behavior if
+  /// \c Newlines == 0.
+  void replacePPWhitespace(const AnnotatedToken &Tok, unsigned NewLines,
+                           unsigned Spaces, unsigned WhitespaceStartColumn);
+
+  /// \brief Inserts a line break into the middle of a token.
+  ///
+  /// Will break at \p Offset inside \p Tok, putting \p Prefix before the line
+  /// break and \p Postfix before the rest of the token starts in the next line.
+  ///
+  /// \p InPPDirective, \p Spaces, \p WhitespaceStartColumn and \p Style are
+  /// used to generate the correct line break.
+  void breakToken(const FormatToken &Tok, unsigned Offset,
+                  unsigned ReplaceChars, StringRef Prefix, StringRef Postfix,
+                  bool InPPDirective, unsigned Spaces,
+                  unsigned WhitespaceStartColumn);
+
+  /// \brief Returns all the \c Replacements created during formatting.
+  const tooling::Replacements &generateReplacements();
+
+  void addReplacement(const SourceLocation &SourceLoc, unsigned ReplaceChars,
+                      StringRef Text);
+
+  void addUntouchableComment(unsigned Column);
+
+private:
+  static StringRef getLineCommentPrefix(StringRef Comment);
+
+  /// \brief Splits one line in a line comment, if it doesn't fit to
+  /// provided column limit. Removes trailing whitespace in each line.
+  ///
+  /// \param Line points to the line contents without leading // or /*.
+  ///
+  /// \param StartColumn is the column where the first character of Line will be
+  /// located after formatting.
+  ///
+  /// \param LinePrefix is inserted after each line break.
+  void splitLineComment(const FormatToken &Tok, StringRef Line,
+                        size_t StartColumn, StringRef LinePrefix,
+                        const char *WhiteSpaceChars = " ");
+
+  std::string getNewLineText(unsigned NewLines, unsigned Spaces);
+
+  std::string getNewLineText(unsigned NewLines, unsigned Spaces,
+                             unsigned WhitespaceStartColumn);
+
+  /// \brief Structure to store a comment for later layout and alignment.
+  struct StoredComment {
+    FormatToken Tok;
+    unsigned MinColumn;
+    unsigned MaxColumn;
+    unsigned NewLines;
+    unsigned Spaces;
+    bool Untouchable;
+  };
+  SmallVector<StoredComment, 16> Comments;
+  typedef SmallVector<StoredComment, 16>::iterator comment_iterator;
+
+  /// \brief Try to align all stashed comments.
+  void alignComments();
+
+  /// \brief Put all the comments between \p I and \p E into \p Column.
+  void alignComments(comment_iterator I, comment_iterator E, unsigned Column);
+
+  /// \brief Stores \p Text as the replacement for the whitespace in front of
+  /// \p Tok.
+  void storeReplacement(const FormatToken &Tok, const std::string Text);
+
+  SourceManager &SourceMgr;
+  tooling::Replacements Replaces;
+  const FormatStyle &Style;
+};
+
+} // namespace format
+} // namespace clang
+
+#endif // LLVM_CLANG_FORMAT_WHITESPACEMANAGER_H

Modified: cfe/trunk/unittests/Format/FormatTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=179526&r1=179525&r2=179526&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTest.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTest.cpp Mon Apr 15 09:28:00 2013
@@ -845,10 +845,7 @@ TEST_F(FormatTest, SplitsLongLinesInComm
             "   Macro comment   \\\n"
             "   with a long     \\\n"
             "   line            \\\n"
-            // FIXME: We should look at the length of the last line of the token
-            // instead of the full token's length.
-            //"  */               \\\n"
-            "   */\\\n"
+            "   */              \\\n"
             "  A + B",
             format("#define X \\\n"
                    "  /*\n"
@@ -860,10 +857,7 @@ TEST_F(FormatTest, SplitsLongLinesInComm
   EXPECT_EQ("#define X          \\\n"
             "  /* Macro comment \\\n"
             "     with a long   \\\n"
-            // FIXME: We should look at the length of the last line of the token
-            // instead of the full token's length.
-            //"   line */         \\\n"
-            "     line */\\\n"
+            "     line */       \\\n"
             "  A + B",
             format("#define X \\\n"
                    "  /* Macro comment with a long\n"
@@ -873,10 +867,7 @@ TEST_F(FormatTest, SplitsLongLinesInComm
   EXPECT_EQ("#define X          \\\n"
             "  /* Macro comment \\\n"
             "   * with a long   \\\n"
-            // FIXME: We should look at the length of the last line of the token
-            // instead of the full token's length.
-            //"   * line */       \\\n"
-            "   * line */\\\n"
+            "   * line */       \\\n"
             "  A + B",
             format("#define X \\\n"
                    "  /* Macro comment with a long  line */ \\\n"
@@ -3697,7 +3688,8 @@ TEST_F(FormatTest, BreakStringLiterals)
             "\"text\"",
             format("\"some text\"", getLLVMStyleWithColumns(7)));
   EXPECT_EQ("\"some\"\n"
-            "\" text\"",
+            "\" tex\"\n"
+            "\"t\"",
             format("\"some text\"", getLLVMStyleWithColumns(6)));
   EXPECT_EQ("\"some\"\n"
             "\" tex\"\n"
@@ -3790,7 +3782,8 @@ TEST_F(FormatTest, DoNotBreakStringLiter
             "\"000001\"",
             format("\"test\\000000000001\"", getLLVMStyleWithColumns(9)));
   EXPECT_EQ("\"test\\000\"\n"
-            "\"000000001\"",
+            "\"00000000\"\n"
+            "\"1\"",
             format("\"test\\000000000001\"", getLLVMStyleWithColumns(10)));
   EXPECT_EQ("R\"(\\x\\x00)\"\n",
             format("R\"(\\x\\x00)\"\n", getLLVMStyleWithColumns(7)));