[cfe-commits] r172403 - /cfe/trunk/lib/Format/Format.cpp

Mon Jan 14 05:08:07 PST 2013

Author: djasper
Date: Mon Jan 14 07:08:07 2013
New Revision: 172403

URL: http://llvm.org/viewvc/llvm-project?rev=172403&view=rev
Log:
Refactor datastructure used in clang-format.

Main difference, add an AnnotatedLine class to hold information about a
line while formatting. At the same time degrade the UnwrappedLine class
to a class solely used for communicating between the UnwrappedLineParser
and the Formatter.

No functional changes intended.

Modified:
    cfe/trunk/lib/Format/Format.cpp

Modified: cfe/trunk/lib/Format/Format.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=172403&r1=172402&r2=172403&view=diff
==============================================================================

--- cfe/trunk/lib/Format/Format.cpp (original)
+++ cfe/trunk/lib/Format/Format.cpp Mon Jan 14 07:08:07 2013
@@ -93,6 +93,19 @@
   AnnotatedToken *Parent;
 };
 
+class AnnotatedLine {
+public:
+  AnnotatedLine(const FormatToken &FormatTok, unsigned Level,
+                bool InPPDirective)
+      : First(FormatTok), Level(Level), InPPDirective(InPPDirective) {}
+  AnnotatedToken First;
+  AnnotatedToken *Last;
+
+  LineType Type;
+  unsigned Level;
+  bool InPPDirective;
+};
+
 static prec::Level getPrecedence(const AnnotatedToken &Tok) {
   return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true);
 }
@@ -168,11 +181,13 @@
                                        NewLineText + std::string(Spaces, ' ')));
 }
 
-/// \brief Checks whether the (remaining) \c UnwrappedLine starting with
-/// \p RootToken fits into \p Limit columns.
-static bool fitsIntoLimit(const AnnotatedToken &RootToken, unsigned Limit) {
+/// \brief Calculates whether the (remaining) \c AnnotatedLine starting with
+/// \p RootToken fits into \p Limit columns on a single line.
+///
+/// If true, sets \p Length to the required length.
+static bool fitsIntoLimit(const AnnotatedToken &RootToken,
+                          unsigned Limit, unsigned* Length = 0) {
   unsigned Columns = RootToken.FormatTok.TokenLength;
-  bool FitsOnALine = true;
   const AnnotatedToken *Tok = &RootToken;
   while (!Tok->Children.empty()) {
     Tok = &Tok->Children[0];
@@ -181,11 +196,13 @@
     // needs to be put on a new line if the line needs to be split.
     if (Columns > Limit ||
         (Tok->MustBreakBefore && Tok->Type != TT_CtorInitializerColon)) {
-      FitsOnALine = false;
-      break;
+      // FIXME: Remove this hack.
+      return false;
     }
   }
-  return FitsOnALine;
+  if (Length != 0)
+    *Length = Columns;
+  return true;
 }
 
 /// \brief Returns if a token is an Objective-C selector name.
@@ -200,7 +217,7 @@
 class UnwrappedLineFormatter {
 public:
   UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr,
-                         const UnwrappedLine &Line, unsigned FirstIndent,
+                         const AnnotatedLine &Line, unsigned FirstIndent,
                          bool FitsOnALine, const AnnotatedToken &RootToken,
                          tooling::Replacements &Replaces, bool StructuralError)
       : Style(Style), SourceMgr(SourceMgr), Line(Line),
@@ -599,7 +616,7 @@
 
   FormatStyle Style;
   SourceManager &SourceMgr;
-  const UnwrappedLine &Line;
+  const AnnotatedLine &Line;
   const unsigned FirstIndent;
   const bool FitsOnALine;
   const AnnotatedToken &RootToken;
@@ -616,10 +633,9 @@
 /// \c UnwrappedLine.
 class TokenAnnotator {
 public:
-  TokenAnnotator(const UnwrappedLine &Line, const FormatStyle &Style,
-                 SourceManager &SourceMgr, Lexer &Lex)
-      : Style(Style), SourceMgr(SourceMgr), Lex(Lex),
-        RootToken(Line.RootToken) {}
+  TokenAnnotator(const FormatStyle &Style, SourceManager &SourceMgr, Lexer &Lex,
+                 AnnotatedLine &Line)
+      : Style(Style), SourceMgr(SourceMgr), Lex(Lex), Line(Line) {}
 
   /// \brief A parser that gathers additional information about tokens.
   ///
@@ -888,7 +904,9 @@
   };
 
   void createAnnotatedTokens(AnnotatedToken &Current) {
-    if (!Current.FormatTok.Children.empty()) {
+    if (Current.FormatTok.Children.empty()) {
+      Line.Last = &Current;
+    } else {
       Current.Children.push_back(AnnotatedToken(Current.FormatTok.Children[0]));
       Current.Children.back().Parent = &Current;
       createAnnotatedTokens(Current.Children.back());
@@ -917,34 +935,30 @@
       calculateExtraInformation(Current.Children[0]);
   }
 
-  bool annotate() {
-    createAnnotatedTokens(RootToken);
-
-    AnnotatingParser Parser(RootToken);
-    CurrentLineType = Parser.parseLine();
-    if (CurrentLineType == LT_Invalid)
-      return false;
-
-    determineTokenTypes(RootToken, /*IsRHS=*/false);
-
-    if (RootToken.Type == TT_ObjCMethodSpecifier)
-      CurrentLineType = LT_ObjCMethodDecl;
-    else if (RootToken.Type == TT_ObjCDecl)
-      CurrentLineType = LT_ObjCDecl;
-    else if (RootToken.Type == TT_ObjCProperty)
-      CurrentLineType = LT_ObjCProperty;
-
-    if (!RootToken.Children.empty())
-      calculateExtraInformation(RootToken.Children[0]);
-    return true;
-  }
-
-  LineType getLineType() {
-    return CurrentLineType;
-  }
+  void annotate() {
+    Line.Last = &Line.First;
+    createAnnotatedTokens(Line.First);
+
+    AnnotatingParser Parser(Line.First);
+    Line.Type = Parser.parseLine();
+    if (Line.Type == LT_Invalid)
+      return;
+
+    determineTokenTypes(Line.First, /*IsRHS=*/false);
+
+    if (Line.First.Type == TT_ObjCMethodSpecifier)
+      Line.Type = LT_ObjCMethodDecl;
+    else if (Line.First.Type == TT_ObjCDecl)
+      Line.Type = LT_ObjCDecl;
+    else if (Line.First.Type == TT_ObjCProperty)
+      Line.Type = LT_ObjCProperty;
+
+    Line.First.SpaceRequiredBefore = true;
+    Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore;
+    Line.First.CanBreakBefore = Line.First.MustBreakBefore;
 
-  const AnnotatedToken &getRootToken() {
-    return RootToken;
+    if (!Line.First.Children.empty())
+      calculateExtraInformation(Line.First.Children[0]);
   }
 
 private:
@@ -1079,7 +1093,7 @@
       return false;
     if (Right.is(tok::less) &&
         (Left.is(tok::kw_template) ||
-         (CurrentLineType == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
+         (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
       return true;
     if (Left.is(tok::arrow) || Right.is(tok::arrow))
       return false;
@@ -1119,7 +1133,7 @@
     if (Left.is(tok::l_paren))
       return false;
     if (Right.is(tok::l_paren)) {
-      return CurrentLineType == LT_ObjCDecl || Left.is(tok::kw_if) ||
+      return Line.Type == LT_ObjCDecl || Left.is(tok::kw_if) ||
              Left.is(tok::kw_for) || Left.is(tok::kw_while) ||
              Left.is(tok::kw_switch) || Left.is(tok::kw_return) ||
              Left.is(tok::kw_catch) || Left.is(tok::kw_new) ||
@@ -1134,7 +1148,7 @@
   }
 
   bool spaceRequiredBefore(const AnnotatedToken &Tok) {
-    if (CurrentLineType == LT_ObjCMethodDecl) {
+    if (Line.Type == LT_ObjCMethodDecl) {
       if (Tok.is(tok::identifier) && !Tok.Children.empty() &&
           Tok.Children[0].is(tok::colon) && Tok.Parent->is(tok::identifier))
         return true;
@@ -1151,7 +1165,7 @@
         // Don't space between ':' and '('
         return false;
     }
-    if (CurrentLineType == LT_ObjCProperty &&
+    if (Line.Type == LT_ObjCProperty &&
         (Tok.is(tok::equal) || Tok.Parent->is(tok::equal)))
       return false;
 
@@ -1167,7 +1181,7 @@
     if (Tok.Parent->Type == TT_OverloadedOperator)
       return false;
     if (Tok.is(tok::colon))
-      return RootToken.isNot(tok::kw_case) && !Tok.Children.empty() &&
+      return Line.First.isNot(tok::kw_case) && !Tok.Children.empty() &&
              Tok.Type != TT_ObjCMethodExpr;
     if (Tok.Parent->Type == TT_UnaryOperator ||
         Tok.Parent->Type == TT_CastRParen)
@@ -1185,7 +1199,7 @@
       return true;
     if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren))
       return false;
-    if (Tok.is(tok::less) && RootToken.is(tok::hash))
+    if (Tok.is(tok::less) && Line.First.is(tok::hash))
       return true;
     if (Tok.Type == TT_TrailingUnaryOperator)
       return false;
@@ -1194,7 +1208,7 @@
 
   bool canBreakBefore(const AnnotatedToken &Right) {
     const AnnotatedToken &Left = *Right.Parent;
-    if (CurrentLineType == LT_ObjCMethodDecl) {
+    if (Line.Type == LT_ObjCMethodDecl) {
       if (Right.is(tok::identifier) && !Right.Children.empty() &&
           Right.Children[0].is(tok::colon) && Left.is(tok::identifier))
         return true;
@@ -1221,7 +1235,7 @@
     if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser ||
         Left.Type == TT_UnaryOperator || Right.Type == TT_ConditionalExpr)
       return false;
-    if (Left.is(tok::equal) && CurrentLineType == LT_VirtualFunctionDecl)
+    if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl)
       return false;
 
     if (Right.is(tok::comment))
@@ -1246,8 +1260,7 @@
   FormatStyle Style;
   SourceManager &SourceMgr;
   Lexer &Lex;
-  LineType CurrentLineType;
-  AnnotatedToken RootToken;
+  AnnotatedLine &Line;
 };
 
 class LexerBasedFormatTokenSource : public FormatTokenSource {
@@ -1347,35 +1360,32 @@
     UnwrappedLineParser Parser(Diag, Style, Tokens, *this);
     StructuralError = Parser.parse();
     unsigned PreviousEndOfLineColumn = 0;
-    for (std::vector<UnwrappedLine>::iterator I = UnwrappedLines.begin(),
-                                              E = UnwrappedLines.end();
+    for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
+      TokenAnnotator Annotator(Style, SourceMgr, Lex, AnnotatedLines[i]);
+      Annotator.annotate();
+    }
+    for (std::vector<AnnotatedLine>::iterator I = AnnotatedLines.begin(),
+                                              E = AnnotatedLines.end();
          I != E; ++I) {
-      const UnwrappedLine &TheLine = *I;
-      if (touchesRanges(TheLine)) {
-        OwningPtr<TokenAnnotator> AnnotatedLine(
-            new TokenAnnotator(TheLine, Style, SourceMgr, Lex));
-        if (!AnnotatedLine->annotate())
-          break;
-        unsigned Indent = formatFirstToken(AnnotatedLine->getRootToken(),
-                                           TheLine.Level, TheLine.InPPDirective,
+      const AnnotatedLine &TheLine = *I;
+      if (touchesRanges(TheLine) && TheLine.Type != LT_Invalid) {
+        unsigned Indent = formatFirstToken(TheLine.First, TheLine.Level,
+                                           TheLine.InPPDirective,
                                            PreviousEndOfLineColumn);
-
-        UnwrappedLine Line(TheLine);
-        bool FitsOnALine = tryFitMultipleLinesInOne(Indent, Line, AnnotatedLine,
-                                                    I, E);
-        UnwrappedLineFormatter Formatter(
-            Style, SourceMgr, Line, Indent, FitsOnALine,
-            AnnotatedLine->getRootToken(), Replaces, StructuralError);
+        bool FitsOnALine = tryFitMultipleLinesInOne(Indent, I, E);
+        UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent,
+                                         FitsOnALine, TheLine.First, Replaces,
+                                         StructuralError);
         PreviousEndOfLineColumn = Formatter.format();
       } else {
         // If we did not reformat this unwrapped line, the column at the end of
         // the last token is unchanged - thus, we can calculate the end of the
         // last token, and return the result.
-        const FormatToken *Last = getLastInLine(TheLine);
         PreviousEndOfLineColumn =
-            SourceMgr.getSpellingColumnNumber(Last->Tok.getLocation()) +
-            Lex.MeasureTokenLength(Last->Tok.getLocation(), SourceMgr,
-                                   Lex.getLangOpts()) -
+            SourceMgr.getSpellingColumnNumber(
+                TheLine.Last->FormatTok.Tok.getLocation()) +
+            Lex.MeasureTokenLength(TheLine.Last->FormatTok.Tok.getLocation(),
+                                   SourceMgr, Lex.getLangOpts()) -
             1;
       }
     }
@@ -1389,81 +1399,85 @@
   /// if possible; note that \c I will be incremented when lines are merged.
   ///
   /// Returns whether the resulting \c Line can fit in a single line.
-  bool tryFitMultipleLinesInOne(unsigned Indent, UnwrappedLine &Line,
-                                OwningPtr<TokenAnnotator> &AnnotatedLine,
-                                std::vector<UnwrappedLine>::iterator &I,
-                                std::vector<UnwrappedLine>::iterator E) {
+  bool tryFitMultipleLinesInOne(unsigned Indent,
+                                std::vector<AnnotatedLine>::iterator &I,
+                                std::vector<AnnotatedLine>::iterator E) {
     unsigned Limit = Style.ColumnLimit - (I->InPPDirective ? 1 : 0) - Indent;
 
     // Check whether the UnwrappedLine can be put onto a single line. If
     // so, this is bound to be the optimal solution (by definition) and we
     // don't need to analyze the entire solution space.
-    bool FitsOnALine = fitsIntoLimit(AnnotatedLine->getRootToken(), Limit);
-    if (!FitsOnALine || I + 1 == E || I + 2 == E)
-      return FitsOnALine;
+    unsigned LengthLine1 = 0;
+    if (!fitsIntoLimit(I->First, Limit, &LengthLine1))
+      return false;
 
-    // Try to merge the next two lines if possible.
-    UnwrappedLine Combined(Line);
+    // Check that we still have three lines and they fit into the limit.
+    if (I + 1 == E || I + 2 == E)
+      return true;
+    unsigned LengthLine2 = 0;
+    unsigned LengthLine3 = 0;
+    if (!fitsIntoLimit((I + 1)->First, Limit, &LengthLine2) ||
+        !fitsIntoLimit((I + 2)->First, Limit, &LengthLine3))
+      return true;
+    if (LengthLine1 + LengthLine2 + LengthLine3 + 2 > Limit) // Two spaces.
+      return true;
 
     // First, check that the current line allows merging. This is the case if
     // we're not in a control flow statement and the last token is an opening
     // brace.
-    FormatToken *Last = &Combined.RootToken;
+    AnnotatedLine& Line = *I;
     bool AllowedTokens =
-        Last->Tok.isNot(tok::kw_if) && Last->Tok.isNot(tok::kw_while) &&
-        Last->Tok.isNot(tok::kw_do) && Last->Tok.isNot(tok::r_brace) &&
-        Last->Tok.isNot(tok::kw_else) && Last->Tok.isNot(tok::kw_try) &&
-        Last->Tok.isNot(tok::kw_catch) && Last->Tok.isNot(tok::kw_for) &&
+        Line.First.isNot(tok::kw_if) && Line.First.isNot(tok::kw_while) &&
+        Line.First.isNot(tok::kw_do) && Line.First.isNot(tok::r_brace) &&
+        Line.First.isNot(tok::kw_else) && Line.First.isNot(tok::kw_try) &&
+        Line.First.isNot(tok::kw_catch) && Line.First.isNot(tok::kw_for) &&
         // This gets rid of all ObjC @ keywords and methods.
-        Last->Tok.isNot(tok::at) && Last->Tok.isNot(tok::minus) &&
-        Last->Tok.isNot(tok::plus);
-    while (!Last->Children.empty())
-      Last = &Last->Children.back();
-    if (!Last->Tok.is(tok::l_brace))
-      return FitsOnALine;
+        Line.First.isNot(tok::at) && Line.First.isNot(tok::minus) &&
+        Line.First.isNot(tok::plus);
+    if (Line.Last->isNot(tok::l_brace) || !AllowedTokens)
+      return true;
 
     // Second, check that the next line does not contain any braces - if it
     // does, readability declines when putting it into a single line.
-    const FormatToken *Next = &(I + 1)->RootToken;
-    while (Next) {
-      AllowedTokens = AllowedTokens && !Next->Tok.is(tok::l_brace) &&
-                      !Next->Tok.is(tok::r_brace);
-      Last->Children.push_back(*Next);
-      Last = &Last->Children[0];
-      Last->Children.clear();
-      Next = Next->Children.empty() ? NULL : &Next->Children.back();
-    }
+    const AnnotatedToken *Tok = &(I + 1)->First;
+    if ((I + 1)->Last->Type == TT_LineComment || Tok->MustBreakBefore)
+      return true;
+    do {
+      if (Tok->is(tok::l_brace) || Tok->is(tok::r_brace))
+        return true;
+      Tok = Tok->Children.empty() ? NULL : &Tok->Children.back();
+    } while (Tok != NULL);
 
     // Last, check that the third line contains a single closing brace.
-    Next = &(I + 2)->RootToken;
-    AllowedTokens = AllowedTokens && Next->Tok.is(tok::r_brace);
-    if (!Next->Children.empty() || !AllowedTokens)
-      return FitsOnALine;
-    Last->Children.push_back(*Next);
-
-    OwningPtr<TokenAnnotator> CombinedAnnotator(
-        new TokenAnnotator(Combined, Style, SourceMgr, Lex));
-    if (CombinedAnnotator->annotate() &&
-        fitsIntoLimit(CombinedAnnotator->getRootToken(), Limit)) {
-      // If the merged line fits, we use that instead and skip the next two
-      // lines.
-      AnnotatedLine.reset(CombinedAnnotator.take());
-      Line = Combined;
-      I += 2;
-    }
-    return FitsOnALine;
-  }
-
-  const FormatToken *getLastInLine(const UnwrappedLine &TheLine) {
-    const FormatToken *Last = &TheLine.RootToken;
-    while (!Last->Children.empty())
-      Last = &Last->Children.back();
-    return Last;
-  }
-
-  bool touchesRanges(const UnwrappedLine &TheLine) {
-    const FormatToken *First = &TheLine.RootToken;
-    const FormatToken *Last = getLastInLine(TheLine);
+    Tok = &(I + 2)->First;
+    if (!Tok->Children.empty() || Tok->isNot(tok::r_brace) ||
+        Tok->MustBreakBefore)
+      return true;
+
+    // If the merged line fits, we use that instead and skip the next two lines.
+    Line.Last->Children.push_back((I + 1)->First);
+    while (!Line.Last->Children.empty()) {
+      Line.Last->Children[0].Parent = Line.Last;
+      Line.Last = &Line.Last->Children[0];
+    }
+
+    join(Line, *(I + 1));
+    join(Line, *(I + 2));
+    I += 2;
+    return true;
+  }
+
+  void join(AnnotatedLine &A, const AnnotatedLine &B) {
+    A.Last->Children.push_back(B.First);
+    while (!A.Last->Children.empty()) {
+      A.Last->Children[0].Parent = A.Last;
+      A.Last = &A.Last->Children[0];
+    }
+  }
+
+  bool touchesRanges(const AnnotatedLine &TheLine) {
+    const FormatToken *First = &TheLine.First.FormatTok;
+    const FormatToken *Last = &TheLine.Last->FormatTok;
     CharSourceRange LineRange = CharSourceRange::getTokenRange(
                                     First->Tok.getLocation(),
                                     Last->Tok.getLocation());
@@ -1478,7 +1492,8 @@
   }
 
   virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) {
-    UnwrappedLines.push_back(TheLine);
+    AnnotatedLines.push_back(
+        AnnotatedLine(TheLine.RootToken, TheLine.Level, TheLine.InPPDirective));
   }
 
   /// \brief Add a new line and the required indent before the first Token
@@ -1526,7 +1541,7 @@
   SourceManager &SourceMgr;
   tooling::Replacements Replaces;
   std::vector<CharSourceRange> Ranges;
-  std::vector<UnwrappedLine> UnwrappedLines;
+  std::vector<AnnotatedLine> AnnotatedLines;
   bool StructuralError;
 };