r194090 - clang-format: Allow line merging and partial formatting of nested blocks

Daniel Jasper djasper at google.com
Tue Nov 5 11:10:03 PST 2013


Author: djasper
Date: Tue Nov  5 13:10:03 2013
New Revision: 194090

URL: http://llvm.org/viewvc/llvm-project?rev=194090&view=rev
Log:
clang-format: Allow line merging and partial formatting of nested blocks

Before, clang-format would always format entire nested blocks, which
can be unwanted e.g. for long DEBUG({...}) statements. Also
clang-format would not allow to merge lines in nested blocks (e.g. to
put "if (a) return;" on one line in Google style).

This is the first step of several refactorings mostly focussing on the
additional functionality (by reusing the "format many lines" code to
format the children of a nested block). The next steps are:
* Pull out the line merging into its own class.
* Seperate the formatting of many lines from the formatting of a single
  line (and the analysis of the solution space).

Modified:
    cfe/trunk/lib/Format/Format.cpp
    cfe/trunk/lib/Format/TokenAnnotator.cpp
    cfe/trunk/unittests/Format/FormatTest.cpp

Modified: cfe/trunk/lib/Format/Format.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=194090&r1=194089&r2=194090&view=diff
==============================================================================
--- cfe/trunk/lib/Format/Format.cpp (original)
+++ cfe/trunk/lib/Format/Format.cpp Tue Nov  5 13:10:03 2013
@@ -372,28 +372,148 @@ private:
 
 class UnwrappedLineFormatter {
 public:
-  UnwrappedLineFormatter(ContinuationIndenter *Indenter,
+  UnwrappedLineFormatter(SourceManager &SourceMgr,
+                         SmallVectorImpl<CharSourceRange> &Ranges,
+                         ContinuationIndenter *Indenter,
                          WhitespaceManager *Whitespaces,
-                         const FormatStyle &Style, const AnnotatedLine &Line)
-      : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style), Line(Line),
-        Count(0) {}
+                         const FormatStyle &Style)
+      : SourceMgr(SourceMgr), Ranges(Ranges), Indenter(Indenter),
+        Whitespaces(Whitespaces), Style(Style) {}
+
+  unsigned format(SmallVectorImpl<AnnotatedLine *> &Lines, bool DryRun,
+                  int AdditionalIndent = 0) {
+    assert(!Lines.empty());
+    unsigned Penalty = 0;
+    std::vector<int> IndentForLevel;
+    for (unsigned i = 0, e = Lines[0]->Level; i != e; ++i)
+      IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent);
+    bool PreviousLineWasTouched = false;
+    const AnnotatedLine *PreviousLine = NULL;
+    bool FormatPPDirective = false;
+    for (SmallVectorImpl<AnnotatedLine *>::iterator I = Lines.begin(),
+                                                    E = Lines.end();
+         I != E; ++I) {
+      const AnnotatedLine &TheLine = **I;
+      const FormatToken *FirstTok = TheLine.First;
+      int Offset = getIndentOffset(*FirstTok);
+
+      // Check whether this line is part of a formatted preprocessor directive.
+      if (FirstTok->HasUnescapedNewline)
+        FormatPPDirective = false;
+      if (!FormatPPDirective && TheLine.InPPDirective &&
+          (touchesLine(TheLine) || touchesPPDirective(I + 1, E)))
+        FormatPPDirective = true;
+
+      // Determine indent and try to merge multiple unwrapped lines.
+      while (IndentForLevel.size() <= TheLine.Level)
+        IndentForLevel.push_back(-1);
+      IndentForLevel.resize(TheLine.Level + 1);
+      unsigned Indent = getIndent(IndentForLevel, TheLine.Level);
+      if (static_cast<int>(Indent) + Offset >= 0)
+        Indent += Offset;
+      if (!DryRun)
+        tryFitMultipleLinesInOne(Indent, I, E);
+
+      bool WasMoved = PreviousLineWasTouched && FirstTok->NewlinesBefore == 0;
+      if (TheLine.First->is(tok::eof)) {
+        if (PreviousLineWasTouched && !DryRun) {
+          unsigned Newlines = std::min(FirstTok->NewlinesBefore, 1u);
+          Whitespaces->replaceWhitespace(*TheLine.First, Newlines,
+                                         /*IndentLevel=*/0, /*Spaces=*/0,
+                                         /*TargetColumn=*/0);
+        }
+      } else if (TheLine.Type != LT_Invalid &&
+                 (WasMoved || FormatPPDirective || touchesLine(TheLine))) {
+        unsigned LevelIndent =
+            getIndent(IndentForLevel, TheLine.Level);
+        if (FirstTok->WhitespaceRange.isValid()) {
+          if (!DryRun)
+            formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level,
+                             Indent, TheLine.InPPDirective);
+        } else {
+          Indent = LevelIndent = FirstTok->OriginalColumn;
+        }
+
+        // If everything fits on a single line, just put it there.
+        unsigned ColumnLimit = Style.ColumnLimit;
+        if (I + 1 != E) {
+          AnnotatedLine *NextLine = *(I + 1);
+          if (NextLine->InPPDirective && !NextLine->First->HasUnescapedNewline)
+            ColumnLimit = getColumnLimit(TheLine.InPPDirective);
+        }
+
+        if (TheLine.Last->TotalLength + Indent <= ColumnLimit) {
+          LineState State = Indenter->getInitialState(Indent, &TheLine, DryRun);
+          while (State.NextToken != NULL)
+            Indenter->addTokenToState(State, /*Newline=*/false, DryRun);
+        } else if (Style.ColumnLimit == 0) {
+          NoColumnLimitFormatter Formatter(Indenter);
+          if (!DryRun)
+            Formatter.format(Indent, &TheLine);
+        } else {
+          Penalty += format(TheLine, Indent, DryRun);
+        }
+
+        IndentForLevel[TheLine.Level] = LevelIndent;
+        PreviousLineWasTouched = true;
+      } else {
+        // Format the first token if necessary, and notify the WhitespaceManager
+        // about the unchanged whitespace.
+        for (FormatToken *Tok = TheLine.First; Tok != NULL; Tok = Tok->Next) {
+          if (Tok == TheLine.First &&
+              (Tok->NewlinesBefore > 0 || Tok->IsFirst)) {
+            unsigned LevelIndent = Tok->OriginalColumn;
+            if (!DryRun) {
+              // Remove trailing whitespace of the previous line if it was
+              // touched.
+              if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine)) {
+                formatFirstToken(*Tok, PreviousLine, TheLine.Level, LevelIndent,
+                                 TheLine.InPPDirective);
+              } else {
+                Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
+              }
+            }
+
+            if (static_cast<int>(LevelIndent) - Offset >= 0)
+              LevelIndent -= Offset;
+            if (Tok->isNot(tok::comment))
+              IndentForLevel[TheLine.Level] = LevelIndent;
+          } else if (!DryRun) {
+            Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
+          }
+        }
+        // If we did not reformat this unwrapped line, the column at the end of
+        // the last token is unchanged - thus, we can calculate the end of the
+        // last token.
+        PreviousLineWasTouched = false;
+      }
+      if (!DryRun) {
+        for (FormatToken *Tok = TheLine.First; Tok != NULL; Tok = Tok->Next) {
+          Tok->Finalized = true;
+        }
+      }
+      PreviousLine = *I;
+    }
+    return Penalty;
+  }
 
-  /// \brief Formats an \c UnwrappedLine and returns the penalty.
+private:
+  /// \brief Formats an \c AnnotatedLine and returns the penalty.
   ///
   /// If \p DryRun is \c false, directly applies the changes.
-  unsigned format(unsigned FirstIndent, bool DryRun = false) {
+  unsigned format(const AnnotatedLine &Line, unsigned FirstIndent,
+                  bool DryRun) {
     LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
 
     // If the ObjC method declaration does not fit on a line, we should format
     // it with one arg per line.
-    if (Line.Type == LT_ObjCMethodDecl)
+    if (State.Line->Type == LT_ObjCMethodDecl)
       State.Stack.back().BreakBeforeParameter = true;
 
     // Find best solution in solution space.
     return analyzeSolutionSpace(State, DryRun);
   }
 
-private:
   /// \brief An edge in the solution space from \c Previous->State to \c State,
   /// inserting a newline dependent on the \c NewLine.
   struct StateNode {
@@ -419,6 +539,262 @@ private:
   typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
                               std::greater<QueueItem> > QueueType;
 
+  /// \brief Get the offset of the line relatively to the level.
+  ///
+  /// For example, 'public:' labels in classes are offset by 1 or 2
+  /// characters to the left from their level.
+  int getIndentOffset(const FormatToken &RootToken) {
+    if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier())
+      return Style.AccessModifierOffset;
+    return 0;
+  }
+
+  /// \brief Add a new line and the required indent before the first Token
+  /// of the \c UnwrappedLine if there was no structural parsing error.
+  void formatFirstToken(FormatToken &RootToken,
+                        const AnnotatedLine *PreviousLine, unsigned IndentLevel,
+                        unsigned Indent, bool InPPDirective) {
+    unsigned Newlines =
+        std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
+    // Remove empty lines before "}" where applicable.
+    if (RootToken.is(tok::r_brace) &&
+        (!RootToken.Next ||
+         (RootToken.Next->is(tok::semi) && !RootToken.Next->Next)))
+      Newlines = std::min(Newlines, 1u);
+    if (Newlines == 0 && !RootToken.IsFirst)
+      Newlines = 1;
+
+    // Insert extra new line before access specifiers.
+    if (PreviousLine && PreviousLine->Last->isOneOf(tok::semi, tok::r_brace) &&
+        RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1)
+      ++Newlines;
+
+    // Remove empty lines after access specifiers.
+    if (PreviousLine && PreviousLine->First->isAccessSpecifier())
+      Newlines = std::min(1u, Newlines);
+
+    Whitespaces->replaceWhitespace(
+        RootToken, Newlines, IndentLevel, Indent, Indent,
+        InPPDirective && !RootToken.HasUnescapedNewline);
+  }
+
+  /// \brief Get the indent of \p Level from \p IndentForLevel.
+  ///
+  /// \p IndentForLevel must contain the indent for the level \c l
+  /// at \p IndentForLevel[l], or a value < 0 if the indent for
+  /// that level is unknown.
+  unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) {
+    if (IndentForLevel[Level] != -1)
+      return IndentForLevel[Level];
+    if (Level == 0)
+      return 0;
+    return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth;
+  }
+
+  /// \brief Tries to merge lines into one.
+  ///
+  /// This will change \c Line and \c AnnotatedLine to contain the merged line,
+  /// if possible; note that \c I will be incremented when lines are merged.
+  void tryFitMultipleLinesInOne(unsigned Indent,
+                                SmallVectorImpl<AnnotatedLine *>::iterator &I,
+                                SmallVectorImpl<AnnotatedLine *>::iterator E) {
+    // We can never merge stuff if there are trailing line comments.
+    AnnotatedLine *TheLine = *I;
+    if (TheLine->Last->Type == TT_LineComment)
+      return;
+
+    if (Indent > Style.ColumnLimit)
+      return;
+
+    unsigned Limit = Style.ColumnLimit - Indent;
+    // If we already exceed the column limit, we set 'Limit' to 0. The different
+    // tryMerge..() functions can then decide whether to still do merging.
+    Limit = TheLine->Last->TotalLength > Limit
+                ? 0
+                : Limit - TheLine->Last->TotalLength;
+
+    if (I + 1 == E || (*(I + 1))->Type == LT_Invalid)
+      return;
+
+    if (TheLine->Last->is(tok::l_brace)) {
+      tryMergeSimpleBlock(I, E, Limit);
+    } else if (Style.AllowShortIfStatementsOnASingleLine &&
+               TheLine->First->is(tok::kw_if)) {
+      tryMergeSimpleControlStatement(I, E, Limit);
+    } else if (Style.AllowShortLoopsOnASingleLine &&
+               TheLine->First->isOneOf(tok::kw_for, tok::kw_while)) {
+      tryMergeSimpleControlStatement(I, E, Limit);
+    } else if (TheLine->InPPDirective && (TheLine->First->HasUnescapedNewline ||
+                                          TheLine->First->IsFirst)) {
+      tryMergeSimplePPDirective(I, E, Limit);
+    }
+  }
+
+  void tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::iterator &I,
+                                 SmallVectorImpl<AnnotatedLine *>::iterator E,
+                                 unsigned Limit) {
+    if (Limit == 0)
+      return;
+    AnnotatedLine &Line = **I;
+    if (!(*(I + 1))->InPPDirective || (*(I + 1))->First->HasUnescapedNewline)
+      return;
+    if (I + 2 != E && (*(I + 2))->InPPDirective &&
+        !(*(I + 2))->First->HasUnescapedNewline)
+      return;
+    if (1 + (*(I + 1))->Last->TotalLength > Limit)
+      return;
+    join(Line, **(++I));
+  }
+
+  void
+  tryMergeSimpleControlStatement(SmallVectorImpl<AnnotatedLine *>::iterator &I,
+                                 SmallVectorImpl<AnnotatedLine *>::iterator E,
+                                 unsigned Limit) {
+    if (Limit == 0)
+      return;
+    if (Style.BreakBeforeBraces == FormatStyle::BS_Allman &&
+        (*(I + 1))->First->is(tok::l_brace))
+      return;
+    if ((*(I + 1))->InPPDirective != (*I)->InPPDirective ||
+        ((*(I + 1))->InPPDirective && (*(I + 1))->First->HasUnescapedNewline))
+      return;
+    AnnotatedLine &Line = **I;
+    if (Line.Last->isNot(tok::r_paren))
+      return;
+    if (1 + (*(I + 1))->Last->TotalLength > Limit)
+      return;
+    if ((*(I + 1))->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for,
+                                   tok::kw_while) ||
+        (*(I + 1))->First->Type == TT_LineComment)
+      return;
+    // Only inline simple if's (no nested if or else).
+    if (I + 2 != E && Line.First->is(tok::kw_if) &&
+        (*(I + 2))->First->is(tok::kw_else))
+      return;
+    join(Line, **(++I));
+  }
+
+  void tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::iterator &I,
+                           SmallVectorImpl<AnnotatedLine *>::iterator E,
+                           unsigned Limit) {
+    // No merging if the brace already is on the next line.
+    if (Style.BreakBeforeBraces != FormatStyle::BS_Attach)
+      return;
+
+    // First, check that the current line allows merging. This is the case if
+    // we're not in a control flow statement and the last token is an opening
+    // brace.
+    AnnotatedLine &Line = **I;
+    if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::r_brace,
+                            tok::kw_else, tok::kw_try, tok::kw_catch,
+                            tok::kw_for,
+                            // This gets rid of all ObjC @ keywords and methods.
+                            tok::at, tok::minus, tok::plus))
+      return;
+
+    FormatToken *Tok = (*(I + 1))->First;
+    if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore &&
+        (Tok->getNextNonComment() == NULL ||
+         Tok->getNextNonComment()->is(tok::semi))) {
+      // We merge empty blocks even if the line exceeds the column limit.
+      Tok->SpacesRequiredBefore = 0;
+      Tok->CanBreakBefore = true;
+      join(Line, **(I + 1));
+      I += 1;
+    } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace)) {
+      // Check that we still have three lines and they fit into the limit.
+      if (I + 2 == E || (*(I + 2))->Type == LT_Invalid ||
+          !nextTwoLinesFitInto(I, Limit))
+        return;
+
+      // Second, check that the next line does not contain any braces - if it
+      // does, readability declines when putting it into a single line.
+      if ((*(I + 1))->Last->Type == TT_LineComment || Tok->MustBreakBefore)
+        return;
+      do {
+        if (Tok->isOneOf(tok::l_brace, tok::r_brace))
+          return;
+        Tok = Tok->Next;
+      } while (Tok != NULL);
+
+      // Last, check that the third line contains a single closing brace.
+      Tok = (*(I + 2))->First;
+      if (Tok->getNextNonComment() != NULL || Tok->isNot(tok::r_brace) ||
+          Tok->MustBreakBefore)
+        return;
+
+      join(Line, **(I + 1));
+      join(Line, **(I + 2));
+      I += 2;
+    }
+  }
+
+  bool nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine *>::iterator I,
+                           unsigned Limit) {
+    return 1 + (*(I + 1))->Last->TotalLength + 1 +
+               (*(I + 2))->Last->TotalLength <=
+           Limit;
+  }
+
+  void join(AnnotatedLine &A, const AnnotatedLine &B) {
+    assert(!A.Last->Next);
+    assert(!B.First->Previous);
+    A.Last->Next = B.First;
+    B.First->Previous = A.Last;
+    unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore;
+    for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) {
+      Tok->TotalLength += LengthA;
+      A.Last = Tok;
+    }
+  }
+
+  unsigned getColumnLimit(bool InPPDirective) const {
+    // In preprocessor directives reserve two chars for trailing " \"
+    return Style.ColumnLimit - (InPPDirective ? 2 : 0);
+  }
+
+  bool touchesRanges(const CharSourceRange &Range) {
+    for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(),
+                                                          E = Ranges.end();
+         I != E; ++I) {
+      if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) &&
+          !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin()))
+        return true;
+    }
+    return false;
+  }
+
+  bool touchesLine(const AnnotatedLine &TheLine) {
+    const FormatToken *First = TheLine.First;
+    const FormatToken *Last = TheLine.Last;
+    CharSourceRange LineRange = CharSourceRange::getCharRange(
+        First->WhitespaceRange.getBegin().getLocWithOffset(
+            First->LastNewlineOffset),
+        Last->getStartOfNonWhitespace().getLocWithOffset(
+            Last->TokenText.size() - 1));
+    return touchesRanges(LineRange);
+  }
+
+  bool touchesPPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
+                          SmallVectorImpl<AnnotatedLine *>::const_iterator E) {
+    for (; I != E; ++I) {
+      if ((*I)->First->HasUnescapedNewline)
+        return false;
+      if (touchesLine(**I))
+        return true;
+    }
+    return false;
+  }
+
+  bool touchesEmptyLineBefore(const AnnotatedLine &TheLine) {
+    const FormatToken *First = TheLine.First;
+    CharSourceRange LineRange = CharSourceRange::getCharRange(
+        First->WhitespaceRange.getBegin(),
+        First->WhitespaceRange.getBegin().getLocWithOffset(
+            First->LastNewlineOffset));
+    return touchesRanges(LineRange);
+  }
+
   /// \brief Analyze the entire solution space starting from \p InitialState.
   ///
   /// This implements a variant of Dijkstra's algorithm on the graph that spans
@@ -430,6 +806,11 @@ private:
   unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun = false) {
     std::set<LineState> Seen;
 
+    // Increasing count of \c StateNode items we have created. This is used to
+    // create a deterministic order independent of the container.
+    unsigned Count = 0;
+    QueueType Queue;
+
     // Insert start element into queue.
     StateNode *Node =
         new (Allocator.Allocate()) StateNode(InitialState, false, NULL);
@@ -459,9 +840,9 @@ private:
 
       FormatDecision LastFormat = Node->State.NextToken->Decision;
       if (LastFormat == FD_Unformatted || LastFormat == FD_Continue)
-        addNextStateToQueue(Penalty, Node, /*NewLine=*/false);
+        addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue);
       if (LastFormat == FD_Unformatted || LastFormat == FD_Break)
-        addNextStateToQueue(Penalty, Node, /*NewLine=*/true);
+        addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue);
     }
 
     if (Queue.empty()) {
@@ -509,7 +890,7 @@ private:
   /// Assume the current state is \p PreviousNode and has been reached with a
   /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
   void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
-                           bool NewLine) {
+                           bool NewLine, unsigned *Count, QueueType *Queue) {
     if (NewLine && !Indenter->canBreak(PreviousNode->State))
       return;
     if (!NewLine && Indenter->mustBreak(PreviousNode->State))
@@ -522,8 +903,8 @@ private:
 
     Penalty += Indenter->addTokenToState(Node->State, NewLine, true);
 
-    Queue.push(QueueItem(OrderedPenalty(Penalty, Count), Node));
-    ++Count;
+    Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node));
+    ++(*Count);
   }
 
   /// \brief If the \p State's next token is an r_brace closing a nested block,
@@ -548,7 +929,7 @@ private:
   /// break or don't break.
   bool formatChildren(LineState &State, bool NewLine, bool DryRun,
                       unsigned &Penalty) {
-    const FormatToken &Previous = *State.NextToken->Previous;
+    FormatToken &Previous = *State.NextToken->Previous;
     const FormatToken *LBrace = State.NextToken->getPreviousNonComment();
     if (!LBrace || LBrace->isNot(tok::l_brace) ||
         LBrace->BlockKind != BK_Block || Previous.Children.size() == 0)
@@ -557,29 +938,15 @@ private:
       return true;
 
     if (NewLine) {
-      unsigned ParentIndent = State.Stack.back().Indent;
-      for (SmallVector<AnnotatedLine *, 1>::const_iterator
-               I = Previous.Children.begin(),
-               E = Previous.Children.end();
-           I != E; ++I) {
-        unsigned Indent =
-            ParentIndent + ((*I)->Level - Line.Level - 1) * Style.IndentWidth;
-        if (!DryRun) {
-          unsigned Newlines = std::min((*I)->First->NewlinesBefore,
-                                       Style.MaxEmptyLinesToKeep + 1);
-          Newlines = std::max(1u, Newlines);
-          Whitespaces->replaceWhitespace(
-              *(*I)->First, Newlines, (*I)->Level, /*Spaces=*/Indent,
-              /*StartOfTokenColumn=*/Indent, Line.InPPDirective);
-        }
-        UnwrappedLineFormatter Formatter(Indenter, Whitespaces, Style, **I);
-        Penalty += Formatter.format(Indent, DryRun);
-      }
+      int AdditionalIndent = State.Stack.back().Indent -
+                             Previous.Children[0]->Level * Style.IndentWidth;
+      Penalty += format(Previous.Children, DryRun, AdditionalIndent);
       return true;
     }
 
+    // Cannot merge multiple statements into a single line.
     if (Previous.Children.size() > 1)
-      return false; // Cannot merge multiple statements into a single line.
+      return false; 
 
     // We can't put the closing "}" on a line with a trailing comment.
     if (Previous.Children[0]->Last->isTrailingComment())
@@ -590,25 +957,20 @@ private:
           *Previous.Children[0]->First,
           /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1,
           /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective);
-      UnwrappedLineFormatter Formatter(Indenter, Whitespaces, Style,
-                                       *Previous.Children[0]);
-      Penalty += Formatter.format(State.Column + 1, DryRun);
     }
+    Penalty += format(*Previous.Children[0], State.Column + 1, DryRun);
 
     State.Column += 1 + Previous.Children[0]->Last->TotalLength;
     return true;
   }
 
+  SourceManager &SourceMgr;
+  SmallVectorImpl<CharSourceRange> &Ranges;
   ContinuationIndenter *Indenter;
   WhitespaceManager *Whitespaces;
   FormatStyle Style;
-  const AnnotatedLine &Line;
 
   llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
-  QueueType Queue;
-  // Increasing count of \c StateNode items we have created. This is used
-  // to create a deterministic order independent of the container.
-  unsigned Count;
 };
 
 class FormatTokenLexer {
@@ -831,7 +1193,7 @@ public:
             const std::vector<CharSourceRange> &Ranges)
       : Style(Style), Lex(Lex), SourceMgr(SourceMgr),
         Whitespaces(SourceMgr, Style, inputUsesCRLF(Lex.getBuffer())),
-        Ranges(Ranges), UnwrappedLines(1),
+        Ranges(Ranges.begin(), Ranges.end()), UnwrappedLines(1),
         Encoding(encoding::detectEncoding(Lex.getBuffer())) {
     DEBUG(llvm::dbgs() << "File encoding: "
                        << (Encoding == encoding::Encoding_UTF8 ? "UTF8"
@@ -875,120 +1237,20 @@ public:
   tooling::Replacements format(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
                                bool StructuralError, FormatTokenLexer &Tokens) {
     TokenAnnotator Annotator(Style, Tokens.getIdentTable().get("in"));
-    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
-      Annotator.annotate(*AnnotatedLines[i]);
-    }
-    deriveLocalStyle(AnnotatedLines);
-    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
-      Annotator.calculateFormattingInformation(*AnnotatedLines[i]);
-    }
-
-    Annotator.setCommentLineLevels(AnnotatedLines);
-
-    std::vector<int> IndentForLevel;
-    bool PreviousLineWasTouched = false;
-    const AnnotatedLine *PreviousLine = NULL;
-    bool FormatPPDirective = false;
-    for (SmallVectorImpl<AnnotatedLine *>::iterator I = AnnotatedLines.begin(),
-                                                    E = AnnotatedLines.end();
-         I != E; ++I) {
-      const AnnotatedLine &TheLine = **I;
-      const FormatToken *FirstTok = TheLine.First;
-      int Offset = getIndentOffset(*TheLine.First);
-
-      // Check whether this line is part of a formatted preprocessor directive.
-      if (FirstTok->HasUnescapedNewline)
-        FormatPPDirective = false;
-      if (!FormatPPDirective && TheLine.InPPDirective &&
-          (touchesLine(TheLine) || touchesPPDirective(I + 1, E)))
-        FormatPPDirective = true;
-
-      // Determine indent and try to merge multiple unwrapped lines.
-      while (IndentForLevel.size() <= TheLine.Level)
-        IndentForLevel.push_back(-1);
-      IndentForLevel.resize(TheLine.Level + 1);
-      unsigned Indent = getIndent(IndentForLevel, TheLine.Level);
-      if (static_cast<int>(Indent) + Offset >= 0)
-        Indent += Offset;
-      tryFitMultipleLinesInOne(Indent, I, E);
-
-      bool WasMoved = PreviousLineWasTouched && FirstTok->NewlinesBefore == 0;
-      if (TheLine.First->is(tok::eof)) {
-        if (PreviousLineWasTouched) {
-          unsigned Newlines = std::min(FirstTok->NewlinesBefore, 1u);
-          Whitespaces.replaceWhitespace(*TheLine.First, Newlines,
-                                        /*IndentLevel=*/0, /*Spaces=*/0,
-                                        /*TargetColumn=*/0);
-        }
-      } else if (TheLine.Type != LT_Invalid &&
-                 (WasMoved || FormatPPDirective || touchesLine(TheLine))) {
-        unsigned LevelIndent = getIndent(IndentForLevel, TheLine.Level);
-        if (FirstTok->WhitespaceRange.isValid()) {
-          formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level, Indent,
-                           TheLine.InPPDirective);
-        } else {
-          Indent = LevelIndent = FirstTok->OriginalColumn;
-        }
-        ContinuationIndenter Indenter(Style, SourceMgr, Whitespaces, Encoding,
-                                      BinPackInconclusiveFunctions);
-
-        // If everything fits on a single line, just put it there.
-        unsigned ColumnLimit = Style.ColumnLimit;
-        AnnotatedLine *NextLine = *(I + 1);
-        if ((I + 1) != E && NextLine->InPPDirective &&
-            !NextLine->First->HasUnescapedNewline)
-          ColumnLimit = getColumnLimit(TheLine.InPPDirective);
-
-        if (TheLine.Last->TotalLength + Indent <= ColumnLimit) {
-          LineState State =
-              Indenter.getInitialState(Indent, &TheLine, /*DryRun=*/false);
-          while (State.NextToken != NULL)
-            Indenter.addTokenToState(State, false, false);
-        } else if (Style.ColumnLimit == 0) {
-          NoColumnLimitFormatter Formatter(&Indenter);
-          Formatter.format(Indent, &TheLine);
-        } else {
-          UnwrappedLineFormatter Formatter(&Indenter, &Whitespaces, Style,
-                                           TheLine);
-          Formatter.format(Indent);
-        }
-
-        IndentForLevel[TheLine.Level] = LevelIndent;
-        PreviousLineWasTouched = true;
-      } else {
-        // Format the first token if necessary, and notify the WhitespaceManager
-        // about the unchanged whitespace.
-        for (FormatToken *Tok = TheLine.First; Tok != NULL; Tok = Tok->Next) {
-          if (Tok == TheLine.First &&
-              (Tok->NewlinesBefore > 0 || Tok->IsFirst)) {
-            unsigned LevelIndent = Tok->OriginalColumn;
-            // Remove trailing whitespace of the previous line if it was
-            // touched.
-            if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine)) {
-              formatFirstToken(*Tok, PreviousLine, TheLine.Level, LevelIndent,
-                               TheLine.InPPDirective);
-            } else {
-              Whitespaces.addUntouchableToken(*Tok, TheLine.InPPDirective);
-            }
-
-            if (static_cast<int>(LevelIndent) - Offset >= 0)
-              LevelIndent -= Offset;
-            if (Tok->isNot(tok::comment))
-              IndentForLevel[TheLine.Level] = LevelIndent;
-          } else {
-            Whitespaces.addUntouchableToken(*Tok, TheLine.InPPDirective);
-          }
-        }
-        // If we did not reformat this unwrapped line, the column at the end of
-        // the last token is unchanged - thus, we can calculate the end of the
-        // last token.
-        PreviousLineWasTouched = false;
-      }
-      for (FormatToken *Tok = TheLine.First; Tok != NULL; Tok = Tok->Next) {
-        Tok->Finalized = true;
-      }
-      PreviousLine = *I;
+    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
+      Annotator.annotate(*AnnotatedLines[i]);
+    }
+    deriveLocalStyle(AnnotatedLines);
+    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
+      Annotator.calculateFormattingInformation(*AnnotatedLines[i]);
     }
+
+    Annotator.setCommentLineLevels(AnnotatedLines);
+    ContinuationIndenter Indenter(Style, SourceMgr, Whitespaces, Encoding,
+                                  BinPackInconclusiveFunctions);
+    UnwrappedLineFormatter Formatter(SourceMgr, Ranges, &Indenter, &Whitespaces,
+                                     Style);
+    Formatter.format(AnnotatedLines, /*DryRun=*/false);
     return Whitespaces.generateReplacements();
   }
 
@@ -1051,228 +1313,6 @@ private:
         HasBinPackedFunction || !HasOnePerLineFunction;
   }
 
-  /// \brief Get the indent of \p Level from \p IndentForLevel.
-  ///
-  /// \p IndentForLevel must contain the indent for the level \c l
-  /// at \p IndentForLevel[l], or a value < 0 if the indent for
-  /// that level is unknown.
-  unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) {
-    if (IndentForLevel[Level] != -1)
-      return IndentForLevel[Level];
-    if (Level == 0)
-      return 0;
-    return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth;
-  }
-
-  /// \brief Get the offset of the line relatively to the level.
-  ///
-  /// For example, 'public:' labels in classes are offset by 1 or 2
-  /// characters to the left from their level.
-  int getIndentOffset(const FormatToken &RootToken) {
-    if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier())
-      return Style.AccessModifierOffset;
-    return 0;
-  }
-
-  /// \brief Tries to merge lines into one.
-  ///
-  /// This will change \c Line and \c AnnotatedLine to contain the merged line,
-  /// if possible; note that \c I will be incremented when lines are merged.
-  void tryFitMultipleLinesInOne(unsigned Indent,
-                                SmallVectorImpl<AnnotatedLine *>::iterator &I,
-                                SmallVectorImpl<AnnotatedLine *>::iterator E) {
-    // We can never merge stuff if there are trailing line comments.
-    AnnotatedLine *TheLine = *I;
-    if (TheLine->Last->Type == TT_LineComment)
-      return;
-
-    if (Indent > Style.ColumnLimit)
-      return;
-
-    unsigned Limit = Style.ColumnLimit - Indent;
-    // If we already exceed the column limit, we set 'Limit' to 0. The different
-    // tryMerge..() functions can then decide whether to still do merging.
-    Limit = TheLine->Last->TotalLength > Limit
-                ? 0
-                : Limit - TheLine->Last->TotalLength;
-
-    if (I + 1 == E || (*(I + 1))->Type == LT_Invalid)
-      return;
-
-    if (TheLine->Last->is(tok::l_brace)) {
-      tryMergeSimpleBlock(I, E, Limit);
-    } else if (Style.AllowShortIfStatementsOnASingleLine &&
-               TheLine->First->is(tok::kw_if)) {
-      tryMergeSimpleControlStatement(I, E, Limit);
-    } else if (Style.AllowShortLoopsOnASingleLine &&
-               TheLine->First->isOneOf(tok::kw_for, tok::kw_while)) {
-      tryMergeSimpleControlStatement(I, E, Limit);
-    } else if (TheLine->InPPDirective && (TheLine->First->HasUnescapedNewline ||
-                                          TheLine->First->IsFirst)) {
-      tryMergeSimplePPDirective(I, E, Limit);
-    }
-  }
-
-  void tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::iterator &I,
-                                 SmallVectorImpl<AnnotatedLine *>::iterator E,
-                                 unsigned Limit) {
-    if (Limit == 0)
-      return;
-    AnnotatedLine &Line = **I;
-    if (!(*(I + 1))->InPPDirective || (*(I + 1))->First->HasUnescapedNewline)
-      return;
-    if (I + 2 != E && (*(I + 2))->InPPDirective &&
-        !(*(I + 2))->First->HasUnescapedNewline)
-      return;
-    if (1 + (*(I + 1))->Last->TotalLength > Limit)
-      return;
-    join(Line, **(++I));
-  }
-
-  void
-  tryMergeSimpleControlStatement(SmallVectorImpl<AnnotatedLine *>::iterator &I,
-                                 SmallVectorImpl<AnnotatedLine *>::iterator E,
-                                 unsigned Limit) {
-    if (Limit == 0)
-      return;
-    if (Style.BreakBeforeBraces == FormatStyle::BS_Allman &&
-        (*(I + 1))->First->is(tok::l_brace))
-      return;
-    if ((*(I + 1))->InPPDirective != (*I)->InPPDirective ||
-        ((*(I + 1))->InPPDirective && (*(I + 1))->First->HasUnescapedNewline))
-      return;
-    AnnotatedLine &Line = **I;
-    if (Line.Last->isNot(tok::r_paren))
-      return;
-    if (1 + (*(I + 1))->Last->TotalLength > Limit)
-      return;
-    if ((*(I + 1))->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for,
-                                   tok::kw_while) ||
-        (*(I + 1))->First->Type == TT_LineComment)
-      return;
-    // Only inline simple if's (no nested if or else).
-    if (I + 2 != E && Line.First->is(tok::kw_if) &&
-        (*(I + 2))->First->is(tok::kw_else))
-      return;
-    join(Line, **(++I));
-  }
-
-  void tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::iterator &I,
-                           SmallVectorImpl<AnnotatedLine *>::iterator E,
-                           unsigned Limit) {
-    // No merging if the brace already is on the next line.
-    if (Style.BreakBeforeBraces != FormatStyle::BS_Attach)
-      return;
-
-    // First, check that the current line allows merging. This is the case if
-    // we're not in a control flow statement and the last token is an opening
-    // brace.
-    AnnotatedLine &Line = **I;
-    if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::r_brace,
-                            tok::kw_else, tok::kw_try, tok::kw_catch,
-                            tok::kw_for,
-                            // This gets rid of all ObjC @ keywords and methods.
-                            tok::at, tok::minus, tok::plus))
-      return;
-
-    FormatToken *Tok = (*(I + 1))->First;
-    if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore &&
-        (Tok->getNextNonComment() == NULL ||
-         Tok->getNextNonComment()->is(tok::semi))) {
-      // We merge empty blocks even if the line exceeds the column limit.
-      Tok->SpacesRequiredBefore = 0;
-      Tok->CanBreakBefore = true;
-      join(Line, **(I + 1));
-      I += 1;
-    } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace)) {
-      // Check that we still have three lines and they fit into the limit.
-      if (I + 2 == E || (*(I + 2))->Type == LT_Invalid ||
-          !nextTwoLinesFitInto(I, Limit))
-        return;
-
-      // Second, check that the next line does not contain any braces - if it
-      // does, readability declines when putting it into a single line.
-      if ((*(I + 1))->Last->Type == TT_LineComment || Tok->MustBreakBefore)
-        return;
-      do {
-        if (Tok->isOneOf(tok::l_brace, tok::r_brace))
-          return;
-        Tok = Tok->Next;
-      } while (Tok != NULL);
-
-      // Last, check that the third line contains a single closing brace.
-      Tok = (*(I + 2))->First;
-      if (Tok->getNextNonComment() != NULL || Tok->isNot(tok::r_brace) ||
-          Tok->MustBreakBefore)
-        return;
-
-      join(Line, **(I + 1));
-      join(Line, **(I + 2));
-      I += 2;
-    }
-  }
-
-  bool nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine *>::iterator I,
-                           unsigned Limit) {
-    return 1 + (*(I + 1))->Last->TotalLength + 1 +
-               (*(I + 2))->Last->TotalLength <=
-           Limit;
-  }
-
-  void join(AnnotatedLine &A, const AnnotatedLine &B) {
-    assert(!A.Last->Next);
-    assert(!B.First->Previous);
-    A.Last->Next = B.First;
-    B.First->Previous = A.Last;
-    unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore;
-    for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) {
-      Tok->TotalLength += LengthA;
-      A.Last = Tok;
-    }
-  }
-
-  bool touchesRanges(const CharSourceRange &Range) {
-    for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
-      if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(),
-                                               Ranges[i].getBegin()) &&
-          !SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(),
-                                               Range.getBegin()))
-        return true;
-    }
-    return false;
-  }
-
-  bool touchesLine(const AnnotatedLine &TheLine) {
-    const FormatToken *First = TheLine.First;
-    const FormatToken *Last = TheLine.Last;
-    CharSourceRange LineRange = CharSourceRange::getCharRange(
-        First->WhitespaceRange.getBegin().getLocWithOffset(
-            First->LastNewlineOffset),
-        Last->getStartOfNonWhitespace().getLocWithOffset(
-            Last->TokenText.size() - 1));
-    return touchesRanges(LineRange);
-  }
-
-  bool touchesPPDirective(SmallVectorImpl<AnnotatedLine *>::iterator I,
-                          SmallVectorImpl<AnnotatedLine *>::iterator E) {
-    for (; I != E; ++I) {
-      if ((*I)->First->HasUnescapedNewline)
-        return false;
-      if (touchesLine(**I))
-        return true;
-    }
-    return false;
-  }
-
-  bool touchesEmptyLineBefore(const AnnotatedLine &TheLine) {
-    const FormatToken *First = TheLine.First;
-    CharSourceRange LineRange = CharSourceRange::getCharRange(
-        First->WhitespaceRange.getBegin(),
-        First->WhitespaceRange.getBegin().getLocWithOffset(
-            First->LastNewlineOffset));
-    return touchesRanges(LineRange);
-  }
-
   virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) {
     assert(!UnwrappedLines.empty());
     UnwrappedLines.back().push_back(TheLine);
@@ -1282,45 +1322,11 @@ private:
     UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
   }
 
-  /// \brief Add a new line and the required indent before the first Token
-  /// of the \c UnwrappedLine if there was no structural parsing error.
-  void formatFirstToken(FormatToken &RootToken,
-                        const AnnotatedLine *PreviousLine, unsigned IndentLevel,
-                        unsigned Indent, bool InPPDirective) {
-    unsigned Newlines =
-        std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
-    // Remove empty lines before "}" where applicable.
-    if (RootToken.is(tok::r_brace) &&
-        (!RootToken.Next ||
-         (RootToken.Next->is(tok::semi) && !RootToken.Next->Next)))
-      Newlines = std::min(Newlines, 1u);
-    if (Newlines == 0 && !RootToken.IsFirst)
-      Newlines = 1;
-
-    // Insert extra new line before access specifiers.
-    if (PreviousLine && PreviousLine->Last->isOneOf(tok::semi, tok::r_brace) &&
-        RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1)
-      ++Newlines;
-
-    // Remove empty lines after access specifiers.
-    if (PreviousLine && PreviousLine->First->isAccessSpecifier())
-      Newlines = std::min(1u, Newlines);
-
-    Whitespaces.replaceWhitespace(
-        RootToken, Newlines, IndentLevel, Indent, Indent,
-        InPPDirective && !RootToken.HasUnescapedNewline);
-  }
-
-  unsigned getColumnLimit(bool InPPDirective) const {
-    // In preprocessor directives reserve two chars for trailing " \"
-    return Style.ColumnLimit - (InPPDirective ? 2 : 0);
-  }
-
   FormatStyle Style;
   Lexer &Lex;
   SourceManager &SourceMgr;
   WhitespaceManager Whitespaces;
-  std::vector<CharSourceRange> Ranges;
+  SmallVector<CharSourceRange, 8> Ranges;
   SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines;
 
   encoding::Encoding Encoding;

Modified: cfe/trunk/lib/Format/TokenAnnotator.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/TokenAnnotator.cpp?rev=194090&r1=194089&r2=194090&view=diff
==============================================================================
--- cfe/trunk/lib/Format/TokenAnnotator.cpp (original)
+++ cfe/trunk/lib/Format/TokenAnnotator.cpp Tue Nov  5 13:10:03 2013
@@ -1013,22 +1013,21 @@ private:
 
 void
 TokenAnnotator::setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) {
-  if (Lines.empty())
-    return;
-
   const AnnotatedLine *NextNonCommentLine = NULL;
-  for (unsigned i = Lines.size() - 1; i > 0; --i) {
-    if (NextNonCommentLine && Lines[i]->First->is(tok::comment) &&
-        !Lines[i]->First->Next)
-      Lines[i]->Level = NextNonCommentLine->Level;
+  for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(),
+                                                          E = Lines.rend();
+       I != E; ++I) {
+    if (NextNonCommentLine && (*I)->First->is(tok::comment) &&
+        (*I)->First->Next == NULL)
+      (*I)->Level = NextNonCommentLine->Level;
     else
-      NextNonCommentLine =
-          Lines[i]->First->isNot(tok::r_brace) ? Lines[i] : NULL;
+      NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : NULL;
+
+    setCommentLineLevels((*I)->Children);
   }
 }
 
 void TokenAnnotator::annotate(AnnotatedLine &Line) {
-  setCommentLineLevels(Line.Children);
   for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
                                                   E = Line.Children.end();
        I != E; ++I) {

Modified: cfe/trunk/unittests/Format/FormatTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=194090&r1=194089&r2=194090&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTest.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTest.cpp Tue Nov  5 13:10:03 2013
@@ -2447,6 +2447,45 @@ TEST_F(FormatTest, LayoutNestedBlocks) {
                    "  // comment\n"
                    "  int  j;\n"
                    "});"));
+
+  verifyFormat("DEBUG({\n"
+               "  if (a)\n"
+               "    return;\n"
+               "});");
+  verifyGoogleFormat("DEBUG({\n"
+                     "  if (a) return;\n"
+                     "});");
+  FormatStyle Style = getGoogleStyle();
+  Style.ColumnLimit = 45;
+  verifyFormat("Debug(aaaaa, {\n"
+               "               if (aaaaaaaaaaaaaaaaaaaaaaaa)\n"
+               "                 return;\n"
+               "             },\n"
+               "      a);", Style);
+
+  EXPECT_EQ("Debug({\n"
+            "        if (aaaaaaaaaaaaaaaaaaaaaaaa)\n"
+            "          return;\n"
+            "      },\n"
+            "      a);",
+            format("Debug({\n"
+                   "        if (aaaaaaaaaaaaaaaaaaaaaaaa)\n"
+                   "             return;\n"
+                   "      },\n"
+                   "      a);",
+                   50, 1, getLLVMStyle()));
+}
+
+TEST_F(FormatTest, IndividualStatementsOfNestedBlocks) {
+  EXPECT_EQ("DEBUG({\n"
+            "  int        i;\n"
+            "  int j;\n"
+            "});",
+            format("DEBUG(   {\n"
+                   "  int        i;\n"
+                   "  int        j;\n"
+                   "}   )  ;",
+                   40, 1, getLLVMStyle()));
 }
 
 TEST_F(FormatTest, PutEmptyBlocksIntoOneLine) {





More information about the cfe-commits mailing list