r173830 - Move the token annotator into separate files.

Daniel Jasper djasper at google.com
Tue Jan 29 13:01:14 PST 2013


Author: djasper
Date: Tue Jan 29 15:01:14 2013
New Revision: 173830

URL: http://llvm.org/viewvc/llvm-project?rev=173830&view=rev
Log:
Move the token annotator into separate files.

No functional changes. Also removed experimental-warning from all of
clang-format's files, as it is no longer accurate.

Added:
    cfe/trunk/lib/Format/TokenAnnotator.cpp
    cfe/trunk/lib/Format/TokenAnnotator.h
Modified:
    cfe/trunk/include/clang/Format/Format.h
    cfe/trunk/lib/Format/CMakeLists.txt
    cfe/trunk/lib/Format/Format.cpp
    cfe/trunk/lib/Format/UnwrappedLineParser.cpp
    cfe/trunk/lib/Format/UnwrappedLineParser.h

Modified: cfe/trunk/include/clang/Format/Format.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Format/Format.h?rev=173830&r1=173829&r2=173830&view=diff
==============================================================================
--- cfe/trunk/include/clang/Format/Format.h (original)
+++ cfe/trunk/include/clang/Format/Format.h Tue Jan 29 15:01:14 2013
@@ -10,9 +10,6 @@
 /// \file
 /// Various functions to configurably format source code.
 ///
-/// This is EXPERIMENTAL code under heavy development. It is not in a state yet,
-/// where it can be used to format real code.
-///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_FORMAT_FORMAT_H

Modified: cfe/trunk/lib/Format/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/CMakeLists.txt?rev=173830&r1=173829&r2=173830&view=diff
==============================================================================
--- cfe/trunk/lib/Format/CMakeLists.txt (original)
+++ cfe/trunk/lib/Format/CMakeLists.txt Tue Jan 29 15:01:14 2013
@@ -1,6 +1,7 @@
 set(LLVM_LINK_COMPONENTS support)
 
 add_clang_library(clangFormat
+  TokenAnnotator.cpp
   UnwrappedLineParser.cpp
   Format.cpp
   )

Modified: cfe/trunk/lib/Format/Format.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=173830&r1=173829&r2=173830&view=diff
==============================================================================
--- cfe/trunk/lib/Format/Format.cpp (original)
+++ cfe/trunk/lib/Format/Format.cpp Tue Jan 29 15:01:14 2013
@@ -11,13 +11,11 @@
 /// \brief This file implements functions declared in Format.h. This will be
 /// split into separate files as we go.
 ///
-/// This is EXPERIMENTAL code under heavy development. It is not in a state yet,
-/// where it can be used to format real code.
-///
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "format-formatter"
 
+#include "TokenAnnotator.h"
 #include "UnwrappedLineParser.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/OperatorPrecedence.h"
@@ -34,140 +32,6 @@
 namespace clang {
 namespace format {
 
-enum TokenType {
-  TT_BinaryOperator,
-  TT_BlockComment,
-  TT_CastRParen,
-  TT_ConditionalExpr,
-  TT_CtorInitializerColon,
-  TT_ImplicitStringLiteral,
-  TT_LineComment,
-  TT_ObjCBlockLParen,
-  TT_ObjCDecl,
-  TT_ObjCMethodSpecifier,
-  TT_ObjCMethodExpr,
-  TT_ObjCProperty,
-  TT_OverloadedOperator,
-  TT_PointerOrReference,
-  TT_PureVirtualSpecifier,
-  TT_RangeBasedForLoopColon,
-  TT_StartOfName,
-  TT_TemplateCloser,
-  TT_TemplateOpener,
-  TT_TrailingUnaryOperator,
-  TT_UnaryOperator,
-  TT_Unknown
-};
-
-enum LineType {
-  LT_Invalid,
-  LT_Other,
-  LT_BuilderTypeCall,
-  LT_PreprocessorDirective,
-  LT_VirtualFunctionDecl,
-  LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
-  LT_ObjCMethodDecl,
-  LT_ObjCProperty // An @property line.
-};
-
-class AnnotatedToken {
-public:
-  explicit AnnotatedToken(const FormatToken &FormatTok)
-      : FormatTok(FormatTok), Type(TT_Unknown), SpaceRequiredBefore(false),
-        CanBreakBefore(false), MustBreakBefore(false),
-        ClosesTemplateDeclaration(false), MatchingParen(NULL),
-        ParameterCount(1), Parent(NULL) {
-  }
-
-  bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); }
-  bool isNot(tok::TokenKind Kind) const { return FormatTok.Tok.isNot(Kind); }
-
-  bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const {
-    return FormatTok.Tok.isObjCAtKeyword(Kind);
-  }
-
-  FormatToken FormatTok;
-
-  TokenType Type;
-
-  bool SpaceRequiredBefore;
-  bool CanBreakBefore;
-  bool MustBreakBefore;
-
-  bool ClosesTemplateDeclaration;
-
-  AnnotatedToken *MatchingParen;
-
-  /// \brief Number of parameters, if this is "(", "[" or "<".
-  ///
-  /// This is initialized to 1 as we don't need to distinguish functions with
-  /// 0 parameters from functions with 1 parameter. Thus, we can simply count
-  /// the number of commas.
-  unsigned ParameterCount;
-
-  /// \brief The total length of the line up to and including this token.
-  unsigned TotalLength;
-
-  /// \brief Penalty for inserting a line break before this token.
-  unsigned SplitPenalty;
-
-  std::vector<AnnotatedToken> Children;
-  AnnotatedToken *Parent;
-
-  const AnnotatedToken *getPreviousNoneComment() const {
-    AnnotatedToken *Tok = Parent;
-    while (Tok != NULL && Tok->is(tok::comment))
-      Tok = Tok->Parent;
-    return Tok;
-  }
-};
-
-class AnnotatedLine {
-public:
-  AnnotatedLine(const UnwrappedLine &Line)
-      : First(Line.Tokens.front()), Level(Line.Level),
-        InPPDirective(Line.InPPDirective),
-        MustBeDeclaration(Line.MustBeDeclaration) {
-    assert(!Line.Tokens.empty());
-    AnnotatedToken *Current = &First;
-    for (std::list<FormatToken>::const_iterator I = ++Line.Tokens.begin(),
-                                                E = Line.Tokens.end();
-         I != E; ++I) {
-      Current->Children.push_back(AnnotatedToken(*I));
-      Current->Children[0].Parent = Current;
-      Current = &Current->Children[0];
-    }
-    Last = Current;
-  }
-  AnnotatedLine(const AnnotatedLine &Other)
-      : First(Other.First), Type(Other.Type), Level(Other.Level),
-        InPPDirective(Other.InPPDirective),
-        MustBeDeclaration(Other.MustBeDeclaration) {
-    Last = &First;
-    while (!Last->Children.empty()) {
-      Last->Children[0].Parent = Last;
-      Last = &Last->Children[0];
-    }
-  }
-
-  AnnotatedToken First;
-  AnnotatedToken *Last;
-
-  LineType Type;
-  unsigned Level;
-  bool InPPDirective;
-  bool MustBeDeclaration;
-};
-
-static prec::Level getPrecedence(const AnnotatedToken &Tok) {
-  return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true);
-}
-
-bool isBinaryOperator(const AnnotatedToken &Tok) {
-  // Comma is a binary operator, but does not behave as such wrt. formatting.
-  return getPrecedence(Tok) > prec::Comma;
-}
-
 FormatStyle getLLVMStyle() {
   FormatStyle LLVMStyle;
   LLVMStyle.ColumnLimit = 80;
@@ -338,15 +202,6 @@ private:
   tooling::Replacements Replaces;
 };
 
-/// \brief Returns if a token is an Objective-C selector name.
-///
-/// For example, "bar" is a selector name in [foo bar:(4 + 5)].
-static bool isObjCSelectorName(const AnnotatedToken &Tok) {
-  return Tok.is(tok::identifier) && !Tok.Children.empty() &&
-         Tok.Children[0].is(tok::colon) &&
-         Tok.Children[0].Type == TT_ObjCMethodExpr;
-}
-
 class UnwrappedLineFormatter {
 public:
   UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr,
@@ -813,890 +668,6 @@ private:
   OptimizationParameters Parameters;
 };
 
-/// \brief Determines extra information about the tokens comprising an
-/// \c UnwrappedLine.
-class TokenAnnotator {
-public:
-  TokenAnnotator(const FormatStyle &Style, SourceManager &SourceMgr, Lexer &Lex,
-                 AnnotatedLine &Line)
-      : Style(Style), SourceMgr(SourceMgr), Lex(Lex), Line(Line) {
-  }
-
-  /// \brief A parser that gathers additional information about tokens.
-  ///
-  /// The \c TokenAnnotator tries to matches parenthesis and square brakets and
-  /// store a parenthesis levels. It also tries to resolve matching "<" and ">"
-  /// into template parameter lists.
-  class AnnotatingParser {
-  public:
-    AnnotatingParser(AnnotatedToken &RootToken)
-        : CurrentToken(&RootToken), KeywordVirtualFound(false),
-          ColonIsObjCMethodExpr(false), ColonIsForRangeExpr(false) {
-    }
-
-    /// \brief A helper class to manage AnnotatingParser::ColonIsObjCMethodExpr.
-    struct ObjCSelectorRAII {
-      AnnotatingParser &P;
-      bool ColonWasObjCMethodExpr;
-
-      ObjCSelectorRAII(AnnotatingParser &P)
-          : P(P), ColonWasObjCMethodExpr(P.ColonIsObjCMethodExpr) {
-      }
-
-      ~ObjCSelectorRAII() { P.ColonIsObjCMethodExpr = ColonWasObjCMethodExpr; }
-
-      void markStart(AnnotatedToken &Left) {
-        P.ColonIsObjCMethodExpr = true;
-        Left.Type = TT_ObjCMethodExpr;
-      }
-
-      void markEnd(AnnotatedToken &Right) { Right.Type = TT_ObjCMethodExpr; }
-    };
-
-    bool parseAngle() {
-      if (CurrentToken == NULL)
-        return false;
-      AnnotatedToken *Left = CurrentToken->Parent;
-      while (CurrentToken != NULL) {
-        if (CurrentToken->is(tok::greater)) {
-          Left->MatchingParen = CurrentToken;
-          CurrentToken->MatchingParen = Left;
-          CurrentToken->Type = TT_TemplateCloser;
-          next();
-          return true;
-        }
-        if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square) ||
-            CurrentToken->is(tok::r_brace))
-          return false;
-        if (CurrentToken->is(tok::pipepipe) || CurrentToken->is(tok::ampamp) ||
-            CurrentToken->is(tok::question) || CurrentToken->is(tok::colon))
-          return false;
-        if (CurrentToken->is(tok::comma))
-          ++Left->ParameterCount;
-        if (!consumeToken())
-          return false;
-      }
-      return false;
-    }
-
-    bool parseParens(bool LookForDecls = false) {
-      if (CurrentToken == NULL)
-        return false;
-      bool StartsObjCMethodExpr = false;
-      AnnotatedToken *Left = CurrentToken->Parent;
-      if (CurrentToken->is(tok::caret)) {
-        // ^( starts a block.
-        Left->Type = TT_ObjCBlockLParen;
-      } else if (AnnotatedToken *MaybeSel = Left->Parent) {
-        // @selector( starts a selector.
-        if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Parent &&
-            MaybeSel->Parent->is(tok::at)) {
-          StartsObjCMethodExpr = true;
-        }
-      }
-
-      ObjCSelectorRAII objCSelector(*this);
-      if (StartsObjCMethodExpr)
-        objCSelector.markStart(*Left);
-
-      while (CurrentToken != NULL) {
-        // LookForDecls is set when "if (" has been seen. Check for
-        // 'identifier' '*' 'identifier' followed by not '=' -- this
-        // '*' has to be a binary operator but determineStarAmpUsage() will
-        // categorize it as an unary operator, so set the right type here.
-        if (LookForDecls && !CurrentToken->Children.empty()) {
-          AnnotatedToken &Prev = *CurrentToken->Parent;
-          AnnotatedToken &Next = CurrentToken->Children[0];
-          if (Prev.Parent->is(tok::identifier) &&
-              (Prev.is(tok::star) || Prev.is(tok::amp)) &&
-              CurrentToken->is(tok::identifier) && Next.isNot(tok::equal)) {
-            Prev.Type = TT_BinaryOperator;
-            LookForDecls = false;
-          }
-        }
-
-        if (CurrentToken->is(tok::r_paren)) {
-          Left->MatchingParen = CurrentToken;
-          CurrentToken->MatchingParen = Left;
-
-          if (StartsObjCMethodExpr)
-            objCSelector.markEnd(*CurrentToken);
-
-          next();
-          return true;
-        }
-        if (CurrentToken->is(tok::r_square) || CurrentToken->is(tok::r_brace))
-          return false;
-        if (CurrentToken->is(tok::comma))
-          ++Left->ParameterCount;
-        if (!consumeToken())
-          return false;
-      }
-      return false;
-    }
-
-    bool parseSquare() {
-      if (!CurrentToken)
-        return false;
-
-      // A '[' could be an index subscript (after an indentifier or after
-      // ')' or ']'), or it could be the start of an Objective-C method
-      // expression.
-      AnnotatedToken *Left = CurrentToken->Parent;
-      bool StartsObjCMethodExpr =
-          !Left->Parent || Left->Parent->is(tok::colon) ||
-          Left->Parent->is(tok::l_square) || Left->Parent->is(tok::l_paren) ||
-          Left->Parent->is(tok::kw_return) || Left->Parent->is(tok::kw_throw) ||
-          getBinOpPrecedence(Left->Parent->FormatTok.Tok.getKind(), true,
-                             true) > prec::Unknown;
-
-      ObjCSelectorRAII objCSelector(*this);
-      if (StartsObjCMethodExpr)
-        objCSelector.markStart(*Left);
-
-      while (CurrentToken != NULL) {
-        if (CurrentToken->is(tok::r_square)) {
-          if (!CurrentToken->Children.empty() &&
-              CurrentToken->Children[0].is(tok::l_paren)) {
-            // An ObjC method call can't be followed by an open parenthesis.
-            // FIXME: Do we incorrectly label ":" with this?
-            StartsObjCMethodExpr = false;
-            Left->Type = TT_Unknown;
-          }
-          if (StartsObjCMethodExpr)
-            objCSelector.markEnd(*CurrentToken);
-          Left->MatchingParen = CurrentToken;
-          CurrentToken->MatchingParen = Left;
-          next();
-          return true;
-        }
-        if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_brace))
-          return false;
-        if (CurrentToken->is(tok::comma))
-          ++Left->ParameterCount;
-        if (!consumeToken())
-          return false;
-      }
-      return false;
-    }
-
-    bool parseBrace() {
-      // Lines are fine to end with '{'.
-      if (CurrentToken == NULL)
-        return true;
-      AnnotatedToken *Left = CurrentToken->Parent;
-      while (CurrentToken != NULL) {
-        if (CurrentToken->is(tok::r_brace)) {
-          Left->MatchingParen = CurrentToken;
-          CurrentToken->MatchingParen = Left;
-          next();
-          return true;
-        }
-        if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square))
-          return false;
-        if (!consumeToken())
-          return false;
-      }
-      return true;
-    }
-
-    bool parseConditional() {
-      while (CurrentToken != NULL) {
-        if (CurrentToken->is(tok::colon)) {
-          CurrentToken->Type = TT_ConditionalExpr;
-          next();
-          return true;
-        }
-        if (!consumeToken())
-          return false;
-      }
-      return false;
-    }
-
-    bool parseTemplateDeclaration() {
-      if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
-        CurrentToken->Type = TT_TemplateOpener;
-        next();
-        if (!parseAngle())
-          return false;
-        CurrentToken->Parent->ClosesTemplateDeclaration = true;
-        return true;
-      }
-      return false;
-    }
-
-    bool consumeToken() {
-      AnnotatedToken *Tok = CurrentToken;
-      next();
-      switch (Tok->FormatTok.Tok.getKind()) {
-      case tok::plus:
-      case tok::minus:
-        // At the start of the line, +/- specific ObjectiveC method
-        // declarations.
-        if (Tok->Parent == NULL)
-          Tok->Type = TT_ObjCMethodSpecifier;
-        break;
-      case tok::colon:
-        // Colons from ?: are handled in parseConditional().
-        if (Tok->Parent->is(tok::r_paren))
-          Tok->Type = TT_CtorInitializerColon;
-        else if (ColonIsObjCMethodExpr)
-          Tok->Type = TT_ObjCMethodExpr;
-        else if (ColonIsForRangeExpr)
-          Tok->Type = TT_RangeBasedForLoopColon;
-        break;
-      case tok::kw_if:
-      case tok::kw_while:
-        if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) {
-          next();
-          if (!parseParens(/*LookForDecls=*/ true))
-            return false;
-        }
-        break;
-      case tok::kw_for:
-        ColonIsForRangeExpr = true;
-        next();
-        if (!parseParens())
-          return false;
-        break;
-      case tok::l_paren:
-        if (!parseParens())
-          return false;
-        break;
-      case tok::l_square:
-        if (!parseSquare())
-          return false;
-        break;
-      case tok::l_brace:
-        if (!parseBrace())
-          return false;
-        break;
-      case tok::less:
-        if (parseAngle())
-          Tok->Type = TT_TemplateOpener;
-        else {
-          Tok->Type = TT_BinaryOperator;
-          CurrentToken = Tok;
-          next();
-        }
-        break;
-      case tok::r_paren:
-      case tok::r_square:
-        return false;
-      case tok::r_brace:
-        // Lines can start with '}'.
-        if (Tok->Parent != NULL)
-          return false;
-        break;
-      case tok::greater:
-        Tok->Type = TT_BinaryOperator;
-        break;
-      case tok::kw_operator:
-        if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) {
-          CurrentToken->Type = TT_OverloadedOperator;
-          next();
-          if (CurrentToken != NULL && CurrentToken->is(tok::r_paren)) {
-            CurrentToken->Type = TT_OverloadedOperator;
-            next();
-          }
-        } else {
-          while (CurrentToken != NULL && CurrentToken->isNot(tok::l_paren)) {
-            CurrentToken->Type = TT_OverloadedOperator;
-            next();
-          }
-        }
-        break;
-      case tok::question:
-        parseConditional();
-        break;
-      case tok::kw_template:
-        parseTemplateDeclaration();
-        break;
-      default:
-        break;
-      }
-      return true;
-    }
-
-    void parseIncludeDirective() {
-      next();
-      if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
-        next();
-        while (CurrentToken != NULL) {
-          if (CurrentToken->isNot(tok::comment) ||
-              !CurrentToken->Children.empty())
-            CurrentToken->Type = TT_ImplicitStringLiteral;
-          next();
-        }
-      } else {
-        while (CurrentToken != NULL) {
-          next();
-        }
-      }
-    }
-
-    void parseWarningOrError() {
-      next();
-      // We still want to format the whitespace left of the first token of the
-      // warning or error.
-      next();
-      while (CurrentToken != NULL) {
-        CurrentToken->Type = TT_ImplicitStringLiteral;
-        next();
-      }
-    }
-
-    void parsePreprocessorDirective() {
-      next();
-      if (CurrentToken == NULL)
-        return;
-      // Hashes in the middle of a line can lead to any strange token
-      // sequence.
-      if (CurrentToken->FormatTok.Tok.getIdentifierInfo() == NULL)
-        return;
-      switch (
-          CurrentToken->FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) {
-      case tok::pp_include:
-      case tok::pp_import:
-        parseIncludeDirective();
-        break;
-      case tok::pp_error:
-      case tok::pp_warning:
-        parseWarningOrError();
-        break;
-      default:
-        break;
-      }
-    }
-
-    LineType parseLine() {
-      int PeriodsAndArrows = 0;
-      bool CanBeBuilderTypeStmt = true;
-      if (CurrentToken->is(tok::hash)) {
-        parsePreprocessorDirective();
-        return LT_PreprocessorDirective;
-      }
-      while (CurrentToken != NULL) {
-        if (CurrentToken->is(tok::kw_virtual))
-          KeywordVirtualFound = true;
-        if (CurrentToken->is(tok::period) || CurrentToken->is(tok::arrow))
-          ++PeriodsAndArrows;
-        if (getPrecedence(*CurrentToken) > prec::Assignment &&
-            CurrentToken->isNot(tok::less) && CurrentToken->isNot(tok::greater))
-          CanBeBuilderTypeStmt = false;
-        if (!consumeToken())
-          return LT_Invalid;
-      }
-      if (KeywordVirtualFound)
-        return LT_VirtualFunctionDecl;
-
-      // Assume a builder-type call if there are 2 or more "." and "->".
-      if (PeriodsAndArrows >= 2 && CanBeBuilderTypeStmt)
-        return LT_BuilderTypeCall;
-
-      return LT_Other;
-    }
-
-    void next() {
-      if (CurrentToken != NULL && !CurrentToken->Children.empty())
-        CurrentToken = &CurrentToken->Children[0];
-      else
-        CurrentToken = NULL;
-    }
-
-  private:
-    AnnotatedToken *CurrentToken;
-    bool KeywordVirtualFound;
-    bool ColonIsObjCMethodExpr;
-    bool ColonIsForRangeExpr;
-  };
-
-  void calculateExtraInformation(AnnotatedToken &Current) {
-    Current.SpaceRequiredBefore = spaceRequiredBefore(Current);
-
-    if (Current.FormatTok.MustBreakBefore) {
-      Current.MustBreakBefore = true;
-    } else {
-      if (Current.Type == TT_LineComment) {
-        Current.MustBreakBefore = Current.FormatTok.NewlinesBefore > 0;
-      } else if ((Current.Parent->is(tok::comment) &&
-                  Current.FormatTok.NewlinesBefore > 0) ||
-                 (Current.is(tok::string_literal) &&
-                  Current.Parent->is(tok::string_literal))) {
-        Current.MustBreakBefore = true;
-      } else {
-        Current.MustBreakBefore = false;
-      }
-    }
-    Current.CanBreakBefore = Current.MustBreakBefore || canBreakBefore(Current);
-    if (Current.MustBreakBefore)
-      Current.TotalLength = Current.Parent->TotalLength + Style.ColumnLimit;
-    else
-      Current.TotalLength =
-          Current.Parent->TotalLength + Current.FormatTok.TokenLength +
-          (Current.SpaceRequiredBefore ? 1 : 0);
-    // FIXME: Only calculate this if CanBreakBefore is true once static
-    // initializers etc. are sorted out.
-    Current.SplitPenalty = splitPenalty(Current);
-    if (!Current.Children.empty())
-      calculateExtraInformation(Current.Children[0]);
-  }
-
-  void annotate() {
-    AnnotatingParser Parser(Line.First);
-    Line.Type = Parser.parseLine();
-    if (Line.Type == LT_Invalid)
-      return;
-
-    bool LookForFunctionName = Line.MustBeDeclaration;
-    determineTokenTypes(Line.First, /*IsExpression=*/ false,
-                        LookForFunctionName);
-
-    if (Line.First.Type == TT_ObjCMethodSpecifier)
-      Line.Type = LT_ObjCMethodDecl;
-    else if (Line.First.Type == TT_ObjCDecl)
-      Line.Type = LT_ObjCDecl;
-    else if (Line.First.Type == TT_ObjCProperty)
-      Line.Type = LT_ObjCProperty;
-
-    Line.First.SpaceRequiredBefore = true;
-    Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore;
-    Line.First.CanBreakBefore = Line.First.MustBreakBefore;
-
-    Line.First.TotalLength = Line.First.FormatTok.TokenLength;
-    if (!Line.First.Children.empty())
-      calculateExtraInformation(Line.First.Children[0]);
-  }
-
-private:
-  /// \brief Calculate the penalty for splitting before \c Tok.
-  unsigned splitPenalty(const AnnotatedToken &Tok) {
-    const AnnotatedToken &Left = *Tok.Parent;
-    const AnnotatedToken &Right = Tok;
-
-    if (Left.is(tok::l_brace) && Right.isNot(tok::l_brace))
-      return 50;
-    if (Left.is(tok::equal) && Right.is(tok::l_brace))
-      return 150;
-    if (Left.is(tok::coloncolon))
-      return 500;
-
-    if (Left.Type == TT_RangeBasedForLoopColon)
-      return 5;
-
-    if (Right.is(tok::arrow) || Right.is(tok::period)) {
-      if (Left.is(tok::r_paren) && Line.Type == LT_BuilderTypeCall)
-        return 5; // Should be smaller than breaking at a nested comma.
-      return 150;
-    }
-
-    // In for-loops, prefer breaking at ',' and ';'.
-    if (Line.First.is(tok::kw_for) &&
-        (Left.isNot(tok::comma) && Left.isNot(tok::semi)))
-      return 20;
-
-    if (Left.is(tok::semi) || Left.is(tok::comma))
-      return 0;
-
-    // In Objective-C method expressions, prefer breaking before "param:" over
-    // breaking after it.
-    if (isObjCSelectorName(Right))
-      return 0;
-    if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr)
-      return 20;
-
-    if (Left.is(tok::l_paren))
-      return 20;
-    // FIXME: The penalty for a trailing "<" or "[" being higher than the
-    // penalty for a trainling "(" is a temporary workaround until we can
-    // properly avoid breaking in array subscripts or template parameters.
-    if (Left.is(tok::l_square) || Left.Type == TT_TemplateOpener)
-      return 50;
-
-    if (Left.Type == TT_ConditionalExpr)
-      return prec::Assignment;
-    prec::Level Level = getPrecedence(Left);
-
-    if (Level != prec::Unknown)
-      return Level;
-
-    return 3;
-  }
-
-  void determineTokenTypes(AnnotatedToken &Current, bool IsExpression,
-                           bool LookForFunctionName) {
-    if (getPrecedence(Current) == prec::Assignment) {
-      IsExpression = true;
-      AnnotatedToken *Previous = Current.Parent;
-      while (Previous != NULL) {
-        if (Previous->Type == TT_BinaryOperator &&
-            (Previous->is(tok::star) || Previous->is(tok::amp))) {
-          Previous->Type = TT_PointerOrReference;
-        }
-        Previous = Previous->Parent;
-      }
-    }
-    if (Current.is(tok::kw_return) || Current.is(tok::kw_throw) ||
-        (Current.is(tok::l_paren) && !Line.MustBeDeclaration &&
-         (Current.Parent == NULL || Current.Parent->isNot(tok::kw_for))))
-      IsExpression = true;
-
-    if (Current.Type == TT_Unknown) {
-      if (LookForFunctionName && Current.is(tok::l_paren)) {
-        findFunctionName(&Current);
-        LookForFunctionName = false;
-      } else if (Current.is(tok::star) || Current.is(tok::amp)) {
-        Current.Type = determineStarAmpUsage(Current, IsExpression);
-      } else if (Current.is(tok::minus) || Current.is(tok::plus) ||
-                 Current.is(tok::caret)) {
-        Current.Type = determinePlusMinusCaretUsage(Current);
-      } else if (Current.is(tok::minusminus) || Current.is(tok::plusplus)) {
-        Current.Type = determineIncrementUsage(Current);
-      } else if (Current.is(tok::exclaim)) {
-        Current.Type = TT_UnaryOperator;
-      } else if (isBinaryOperator(Current)) {
-        Current.Type = TT_BinaryOperator;
-      } else if (Current.is(tok::comment)) {
-        std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr,
-                                            Lex.getLangOpts()));
-        if (StringRef(Data).startswith("//"))
-          Current.Type = TT_LineComment;
-        else
-          Current.Type = TT_BlockComment;
-      } else if (Current.is(tok::r_paren) &&
-                 (Current.Parent->Type == TT_PointerOrReference ||
-                  Current.Parent->Type == TT_TemplateCloser) &&
-                 (Current.Children.empty() ||
-                  (Current.Children[0].isNot(tok::equal) &&
-                   Current.Children[0].isNot(tok::semi) &&
-                   Current.Children[0].isNot(tok::l_brace)))) {
-        // FIXME: We need to get smarter and understand more cases of casts.
-        Current.Type = TT_CastRParen;
-      } else if (Current.is(tok::at) && Current.Children.size()) {
-        switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) {
-        case tok::objc_interface:
-        case tok::objc_implementation:
-        case tok::objc_protocol:
-          Current.Type = TT_ObjCDecl;
-          break;
-        case tok::objc_property:
-          Current.Type = TT_ObjCProperty;
-          break;
-        default:
-          break;
-        }
-      }
-    }
-
-    if (!Current.Children.empty())
-      determineTokenTypes(Current.Children[0], IsExpression,
-                          LookForFunctionName);
-  }
-
-  /// \brief Starting from \p Current, this searches backwards for an
-  /// identifier which could be the start of a function name and marks it.
-  void findFunctionName(AnnotatedToken *Current) {
-    AnnotatedToken *Parent = Current->Parent;
-    while (Parent != NULL && Parent->Parent != NULL) {
-      if (Parent->is(tok::identifier) &&
-          (Parent->Parent->is(tok::identifier) ||
-           Parent->Parent->Type == TT_PointerOrReference ||
-           Parent->Parent->Type == TT_TemplateCloser)) {
-        Parent->Type = TT_StartOfName;
-        break;
-      }
-      Parent = Parent->Parent;
-    }
-  }
-
-  /// \brief Returns the previous token ignoring comments.
-  const AnnotatedToken *getPreviousToken(const AnnotatedToken &Tok) {
-    const AnnotatedToken *PrevToken = Tok.Parent;
-    while (PrevToken != NULL && PrevToken->is(tok::comment))
-      PrevToken = PrevToken->Parent;
-    return PrevToken;
-  }
-
-  /// \brief Returns the next token ignoring comments.
-  const AnnotatedToken *getNextToken(const AnnotatedToken &Tok) {
-    if (Tok.Children.empty())
-      return NULL;
-    const AnnotatedToken *NextToken = &Tok.Children[0];
-    while (NextToken->is(tok::comment)) {
-      if (NextToken->Children.empty())
-        return NULL;
-      NextToken = &NextToken->Children[0];
-    }
-    return NextToken;
-  }
-
-  /// \brief Return the type of the given token assuming it is * or &.
-  TokenType
-  determineStarAmpUsage(const AnnotatedToken &Tok, bool IsExpression) {
-    const AnnotatedToken *PrevToken = getPreviousToken(Tok);
-    if (PrevToken == NULL)
-      return TT_UnaryOperator;
-
-    const AnnotatedToken *NextToken = getNextToken(Tok);
-    if (NextToken == NULL)
-      return TT_Unknown;
-
-    if (NextToken->is(tok::l_square) && NextToken->Type != TT_ObjCMethodExpr)
-      return TT_PointerOrReference;
-
-    if (PrevToken->is(tok::l_paren) || PrevToken->is(tok::l_square) ||
-        PrevToken->is(tok::l_brace) || PrevToken->is(tok::comma) ||
-        PrevToken->is(tok::kw_return) || PrevToken->is(tok::colon) ||
-        PrevToken->Type == TT_BinaryOperator ||
-        PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen)
-      return TT_UnaryOperator;
-
-    if (PrevToken->FormatTok.Tok.isLiteral() || PrevToken->is(tok::r_paren) ||
-        PrevToken->is(tok::r_square) || NextToken->FormatTok.Tok.isLiteral() ||
-        NextToken->is(tok::plus) || NextToken->is(tok::minus) ||
-        NextToken->is(tok::plusplus) || NextToken->is(tok::minusminus) ||
-        NextToken->is(tok::tilde) || NextToken->is(tok::exclaim) ||
-        NextToken->is(tok::l_paren) || NextToken->is(tok::l_square) ||
-        NextToken->is(tok::kw_alignof) || NextToken->is(tok::kw_sizeof))
-      return TT_BinaryOperator;
-
-    if (NextToken->is(tok::comma) || NextToken->is(tok::r_paren) ||
-        NextToken->is(tok::greater))
-      return TT_PointerOrReference;
-
-    // It is very unlikely that we are going to find a pointer or reference type
-    // definition on the RHS of an assignment.
-    if (IsExpression)
-      return TT_BinaryOperator;
-
-    return TT_PointerOrReference;
-  }
-
-  TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) {
-    const AnnotatedToken *PrevToken = getPreviousToken(Tok);
-    if (PrevToken == NULL)
-      return TT_UnaryOperator;
-
-    // Use heuristics to recognize unary operators.
-    if (PrevToken->is(tok::equal) || PrevToken->is(tok::l_paren) ||
-        PrevToken->is(tok::comma) || PrevToken->is(tok::l_square) ||
-        PrevToken->is(tok::question) || PrevToken->is(tok::colon) ||
-        PrevToken->is(tok::kw_return) || PrevToken->is(tok::kw_case) ||
-        PrevToken->is(tok::at) || PrevToken->is(tok::l_brace))
-      return TT_UnaryOperator;
-
-    // There can't be to consecutive binary operators.
-    if (PrevToken->Type == TT_BinaryOperator)
-      return TT_UnaryOperator;
-
-    // Fall back to marking the token as binary operator.
-    return TT_BinaryOperator;
-  }
-
-  /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
-  TokenType determineIncrementUsage(const AnnotatedToken &Tok) {
-    const AnnotatedToken *PrevToken = getPreviousToken(Tok);
-    if (PrevToken == NULL)
-      return TT_UnaryOperator;
-    if (PrevToken->is(tok::r_paren) || PrevToken->is(tok::r_square) ||
-        PrevToken->is(tok::identifier))
-      return TT_TrailingUnaryOperator;
-
-    return TT_UnaryOperator;
-  }
-
-  bool spaceRequiredBetween(const AnnotatedToken &Left,
-                            const AnnotatedToken &Right) {
-    if (Right.is(tok::hashhash))
-      return Left.is(tok::hash);
-    if (Left.is(tok::hashhash) || Left.is(tok::hash))
-      return Right.is(tok::hash);
-    if (Right.is(tok::r_paren) || Right.is(tok::semi) || Right.is(tok::comma))
-      return false;
-    if (Right.is(tok::less) &&
-        (Left.is(tok::kw_template) ||
-         (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
-      return true;
-    if (Left.is(tok::arrow) || Right.is(tok::arrow))
-      return false;
-    if (Left.is(tok::exclaim) || Left.is(tok::tilde))
-      return false;
-    if (Left.is(tok::at) &&
-        (Right.is(tok::identifier) || Right.is(tok::string_literal) ||
-         Right.is(tok::char_constant) || Right.is(tok::numeric_constant) ||
-         Right.is(tok::l_paren) || Right.is(tok::l_brace) ||
-         Right.is(tok::kw_true) || Right.is(tok::kw_false)))
-      return false;
-    if (Left.is(tok::coloncolon))
-      return false;
-    if (Right.is(tok::coloncolon))
-      return Left.isNot(tok::identifier) && Left.isNot(tok::greater);
-    if (Left.is(tok::less) || Right.is(tok::greater) || Right.is(tok::less))
-      return false;
-    if (Right.is(tok::amp) || Right.is(tok::star))
-      return Left.FormatTok.Tok.isLiteral() ||
-             (Left.isNot(tok::star) && Left.isNot(tok::amp) &&
-              !Style.PointerAndReferenceBindToType);
-    if (Left.is(tok::amp) || Left.is(tok::star))
-      return Right.FormatTok.Tok.isLiteral() ||
-             Style.PointerAndReferenceBindToType;
-    if (Right.is(tok::star) && Left.is(tok::l_paren))
-      return false;
-    if (Left.is(tok::l_square) || Right.is(tok::r_square))
-      return false;
-    if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr)
-      return false;
-    if (Left.is(tok::period) || Right.is(tok::period))
-      return false;
-    if (Left.is(tok::colon))
-      return Left.Type != TT_ObjCMethodExpr;
-    if (Right.is(tok::colon))
-      return Right.Type != TT_ObjCMethodExpr;
-    if (Left.is(tok::l_paren))
-      return false;
-    if (Right.is(tok::l_paren)) {
-      return Line.Type == LT_ObjCDecl || Left.is(tok::kw_if) ||
-             Left.is(tok::kw_for) || Left.is(tok::kw_while) ||
-             Left.is(tok::kw_switch) || Left.is(tok::kw_return) ||
-             Left.is(tok::kw_catch) || Left.is(tok::kw_new) ||
-             Left.is(tok::kw_delete);
-    }
-    if (Left.is(tok::at) &&
-        Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword)
-      return false;
-    if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
-      return false;
-    return true;
-  }
-
-  bool spaceRequiredBefore(const AnnotatedToken &Tok) {
-    if (Line.Type == LT_ObjCMethodDecl) {
-      if (Tok.is(tok::identifier) && !Tok.Children.empty() &&
-          Tok.Children[0].is(tok::colon) && Tok.Parent->is(tok::identifier))
-        return true;
-      if (Tok.is(tok::colon))
-        return false;
-      if (Tok.Parent->Type == TT_ObjCMethodSpecifier)
-        return true;
-      if (Tok.Parent->is(tok::r_paren) && Tok.is(tok::identifier))
-        // Don't space between ')' and <id>
-        return false;
-      if (Tok.Parent->is(tok::colon) && Tok.is(tok::l_paren))
-        // Don't space between ':' and '('
-        return false;
-    }
-    if (Line.Type == LT_ObjCProperty &&
-        (Tok.is(tok::equal) || Tok.Parent->is(tok::equal)))
-      return false;
-
-    if (Tok.Parent->is(tok::comma))
-      return true;
-    if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen)
-      return true;
-    if (Tok.Type == TT_OverloadedOperator)
-      return Tok.is(tok::identifier) || Tok.is(tok::kw_new) ||
-             Tok.is(tok::kw_delete) || Tok.is(tok::kw_bool);
-    if (Tok.Parent->Type == TT_OverloadedOperator)
-      return false;
-    if (Tok.is(tok::colon))
-      return Line.First.isNot(tok::kw_case) && !Tok.Children.empty() &&
-             Tok.Type != TT_ObjCMethodExpr;
-    if (Tok.Parent->Type == TT_UnaryOperator ||
-        Tok.Parent->Type == TT_CastRParen)
-      return false;
-    if (Tok.Type == TT_UnaryOperator)
-      return Tok.Parent->isNot(tok::l_paren) &&
-             Tok.Parent->isNot(tok::l_square) && Tok.Parent->isNot(tok::at) &&
-             (Tok.Parent->isNot(tok::colon) ||
-              Tok.Parent->Type != TT_ObjCMethodExpr);
-    if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) {
-      return Tok.Type == TT_TemplateCloser && Tok.Parent->Type ==
-             TT_TemplateCloser && Style.SplitTemplateClosingGreater;
-    }
-    if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator)
-      return true;
-    if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren))
-      return false;
-    if (Tok.is(tok::less) && Line.First.is(tok::hash))
-      return true;
-    if (Tok.Type == TT_TrailingUnaryOperator)
-      return false;
-    return spaceRequiredBetween(*Tok.Parent, Tok);
-  }
-
-  bool canBreakBefore(const AnnotatedToken &Right) {
-    const AnnotatedToken &Left = *Right.Parent;
-    if (Line.Type == LT_ObjCMethodDecl) {
-      if (Right.is(tok::identifier) && !Right.Children.empty() &&
-          Right.Children[0].is(tok::colon) && Left.is(tok::identifier))
-        return true;
-      if (Right.is(tok::identifier) && Left.is(tok::l_paren) &&
-          Left.Parent->is(tok::colon))
-        // Don't break this identifier as ':' or identifier
-        // before it will break.
-        return false;
-      if (Right.is(tok::colon) && Left.is(tok::identifier) &&
-          Left.CanBreakBefore)
-        // Don't break at ':' if identifier before it can beak.
-        return false;
-    }
-    if (Right.Type == TT_StartOfName && Style.AllowReturnTypeOnItsOwnLine)
-      return true;
-    if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr)
-      return false;
-    if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
-      return true;
-    if (isObjCSelectorName(Right))
-      return true;
-    if (Left.ClosesTemplateDeclaration)
-      return true;
-    if (Right.Type == TT_ConditionalExpr || Right.is(tok::question))
-      return true;
-    if (Left.Type == TT_RangeBasedForLoopColon)
-      return true;
-    if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser ||
-        Left.Type == TT_UnaryOperator || Left.Type == TT_ConditionalExpr ||
-        Left.is(tok::question))
-      return false;
-    if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl)
-      return false;
-
-    if (Right.Type == TT_LineComment)
-      // We rely on MustBreakBefore being set correctly here as we should not
-      // change the "binding" behavior of a comment.
-      return false;
-
-    // Allow breaking after a trailing 'const', e.g. after a method declaration,
-    // unless it is follow by ';', '{' or '='.
-    if (Left.is(tok::kw_const) && Left.Parent != NULL &&
-        Left.Parent->is(tok::r_paren))
-      return Right.isNot(tok::l_brace) && Right.isNot(tok::semi) &&
-             Right.isNot(tok::equal);
-
-    // We only break before r_brace if there was a corresponding break before
-    // the l_brace, which is tracked by BreakBeforeClosingBrace.
-    if (Right.is(tok::r_brace))
-      return false;
-
-    if (Right.is(tok::r_paren) || Right.is(tok::greater))
-      return false;
-    return (isBinaryOperator(Left) && Left.isNot(tok::lessless)) ||
-           Left.is(tok::comma) || Right.is(tok::lessless) ||
-           Right.is(tok::arrow) || Right.is(tok::period) ||
-           Right.is(tok::colon) || Left.is(tok::coloncolon) ||
-           Left.is(tok::semi) || Left.is(tok::l_brace) ||
-           (Left.is(tok::r_paren) && Left.Type != TT_CastRParen &&
-            Right.is(tok::identifier)) ||
-           (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) ||
-           (Left.is(tok::l_square) && !Right.is(tok::r_square));
-  }
-
-  FormatStyle Style;
-  SourceManager &SourceMgr;
-  Lexer &Lex;
-  AnnotatedLine &Line;
-};
-
 class LexerBasedFormatTokenSource : public FormatTokenSource {
 public:
   LexerBasedFormatTokenSource(Lexer &Lex, SourceManager &SourceMgr)

Added: cfe/trunk/lib/Format/TokenAnnotator.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/TokenAnnotator.cpp?rev=173830&view=auto
==============================================================================
--- cfe/trunk/lib/Format/TokenAnnotator.cpp (added)
+++ cfe/trunk/lib/Format/TokenAnnotator.cpp Tue Jan 29 15:01:14 2013
@@ -0,0 +1,875 @@
+//===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements a token annotator, i.e. creates
+/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
+///
+//===----------------------------------------------------------------------===//
+
+#include "TokenAnnotator.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Lex/Lexer.h"
+
+namespace clang {
+namespace format {
+
+/// \brief Returns if a token is an Objective-C selector name.
+///
+/// For example, "bar" is a selector name in [foo bar:(4 + 5)].
+static bool isObjCSelectorName(const AnnotatedToken &Tok) {
+  return Tok.is(tok::identifier) && !Tok.Children.empty() &&
+         Tok.Children[0].is(tok::colon) &&
+         Tok.Children[0].Type == TT_ObjCMethodExpr;
+}
+
+static bool isBinaryOperator(const AnnotatedToken &Tok) {
+  // Comma is a binary operator, but does not behave as such wrt. formatting.
+  return getPrecedence(Tok) > prec::Comma;
+}
+
+/// \brief A parser that gathers additional information about tokens.
+///
+/// The \c TokenAnnotator tries to matches parenthesis and square brakets and
+/// store a parenthesis levels. It also tries to resolve matching "<" and ">"
+/// into template parameter lists.
+class AnnotatingParser {
+public:
+  AnnotatingParser(AnnotatedToken &RootToken)
+      : CurrentToken(&RootToken), KeywordVirtualFound(false),
+        ColonIsObjCMethodExpr(false), ColonIsForRangeExpr(false) {
+  }
+
+  /// \brief A helper class to manage AnnotatingParser::ColonIsObjCMethodExpr.
+  struct ObjCSelectorRAII {
+    AnnotatingParser &P;
+    bool ColonWasObjCMethodExpr;
+
+    ObjCSelectorRAII(AnnotatingParser &P)
+        : P(P), ColonWasObjCMethodExpr(P.ColonIsObjCMethodExpr) {
+    }
+
+    ~ObjCSelectorRAII() { P.ColonIsObjCMethodExpr = ColonWasObjCMethodExpr; }
+
+    void markStart(AnnotatedToken &Left) {
+      P.ColonIsObjCMethodExpr = true;
+      Left.Type = TT_ObjCMethodExpr;
+    }
+
+    void markEnd(AnnotatedToken &Right) { Right.Type = TT_ObjCMethodExpr; }
+  };
+
+  bool parseAngle() {
+    if (CurrentToken == NULL)
+      return false;
+    AnnotatedToken *Left = CurrentToken->Parent;
+    while (CurrentToken != NULL) {
+      if (CurrentToken->is(tok::greater)) {
+        Left->MatchingParen = CurrentToken;
+        CurrentToken->MatchingParen = Left;
+        CurrentToken->Type = TT_TemplateCloser;
+        next();
+        return true;
+      }
+      if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square) ||
+          CurrentToken->is(tok::r_brace))
+        return false;
+      if (CurrentToken->is(tok::pipepipe) || CurrentToken->is(tok::ampamp) ||
+          CurrentToken->is(tok::question) || CurrentToken->is(tok::colon))
+        return false;
+      if (CurrentToken->is(tok::comma))
+        ++Left->ParameterCount;
+      if (!consumeToken())
+        return false;
+    }
+    return false;
+  }
+
+  bool parseParens(bool LookForDecls = false) {
+    if (CurrentToken == NULL)
+      return false;
+    bool StartsObjCMethodExpr = false;
+    AnnotatedToken *Left = CurrentToken->Parent;
+    if (CurrentToken->is(tok::caret)) {
+      // ^( starts a block.
+      Left->Type = TT_ObjCBlockLParen;
+    } else if (AnnotatedToken *MaybeSel = Left->Parent) {
+      // @selector( starts a selector.
+      if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Parent &&
+          MaybeSel->Parent->is(tok::at)) {
+        StartsObjCMethodExpr = true;
+      }
+    }
+
+    ObjCSelectorRAII objCSelector(*this);
+    if (StartsObjCMethodExpr)
+      objCSelector.markStart(*Left);
+
+    while (CurrentToken != NULL) {
+      // LookForDecls is set when "if (" has been seen. Check for
+      // 'identifier' '*' 'identifier' followed by not '=' -- this
+      // '*' has to be a binary operator but determineStarAmpUsage() will
+      // categorize it as an unary operator, so set the right type here.
+      if (LookForDecls && !CurrentToken->Children.empty()) {
+        AnnotatedToken &Prev = *CurrentToken->Parent;
+        AnnotatedToken &Next = CurrentToken->Children[0];
+        if (Prev.Parent->is(tok::identifier) &&
+            (Prev.is(tok::star) || Prev.is(tok::amp)) &&
+            CurrentToken->is(tok::identifier) && Next.isNot(tok::equal)) {
+          Prev.Type = TT_BinaryOperator;
+          LookForDecls = false;
+        }
+      }
+
+      if (CurrentToken->is(tok::r_paren)) {
+        Left->MatchingParen = CurrentToken;
+        CurrentToken->MatchingParen = Left;
+
+        if (StartsObjCMethodExpr)
+          objCSelector.markEnd(*CurrentToken);
+
+        next();
+        return true;
+      }
+      if (CurrentToken->is(tok::r_square) || CurrentToken->is(tok::r_brace))
+        return false;
+      if (CurrentToken->is(tok::comma))
+        ++Left->ParameterCount;
+      if (!consumeToken())
+        return false;
+    }
+    return false;
+  }
+
+  bool parseSquare() {
+    if (!CurrentToken)
+      return false;
+
+    // A '[' could be an index subscript (after an indentifier or after
+    // ')' or ']'), or it could be the start of an Objective-C method
+    // expression.
+    AnnotatedToken *Left = CurrentToken->Parent;
+    bool StartsObjCMethodExpr =
+        !Left->Parent || Left->Parent->is(tok::colon) ||
+        Left->Parent->is(tok::l_square) || Left->Parent->is(tok::l_paren) ||
+        Left->Parent->is(tok::kw_return) || Left->Parent->is(tok::kw_throw) ||
+        getBinOpPrecedence(Left->Parent->FormatTok.Tok.getKind(), true, true) >
+        prec::Unknown;
+
+    ObjCSelectorRAII objCSelector(*this);
+    if (StartsObjCMethodExpr)
+      objCSelector.markStart(*Left);
+
+    while (CurrentToken != NULL) {
+      if (CurrentToken->is(tok::r_square)) {
+        if (!CurrentToken->Children.empty() &&
+            CurrentToken->Children[0].is(tok::l_paren)) {
+          // An ObjC method call can't be followed by an open parenthesis.
+          // FIXME: Do we incorrectly label ":" with this?
+          StartsObjCMethodExpr = false;
+          Left->Type = TT_Unknown;
+        }
+        if (StartsObjCMethodExpr)
+          objCSelector.markEnd(*CurrentToken);
+        Left->MatchingParen = CurrentToken;
+        CurrentToken->MatchingParen = Left;
+        next();
+        return true;
+      }
+      if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_brace))
+        return false;
+      if (CurrentToken->is(tok::comma))
+        ++Left->ParameterCount;
+      if (!consumeToken())
+        return false;
+    }
+    return false;
+  }
+
+  bool parseBrace() {
+    // Lines are fine to end with '{'.
+    if (CurrentToken == NULL)
+      return true;
+    AnnotatedToken *Left = CurrentToken->Parent;
+    while (CurrentToken != NULL) {
+      if (CurrentToken->is(tok::r_brace)) {
+        Left->MatchingParen = CurrentToken;
+        CurrentToken->MatchingParen = Left;
+        next();
+        return true;
+      }
+      if (CurrentToken->is(tok::r_paren) || CurrentToken->is(tok::r_square))
+        return false;
+      if (!consumeToken())
+        return false;
+    }
+    return true;
+  }
+
+  bool parseConditional() {
+    while (CurrentToken != NULL) {
+      if (CurrentToken->is(tok::colon)) {
+        CurrentToken->Type = TT_ConditionalExpr;
+        next();
+        return true;
+      }
+      if (!consumeToken())
+        return false;
+    }
+    return false;
+  }
+
+  bool parseTemplateDeclaration() {
+    if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
+      CurrentToken->Type = TT_TemplateOpener;
+      next();
+      if (!parseAngle())
+        return false;
+      CurrentToken->Parent->ClosesTemplateDeclaration = true;
+      return true;
+    }
+    return false;
+  }
+
+  bool consumeToken() {
+    AnnotatedToken *Tok = CurrentToken;
+    next();
+    switch (Tok->FormatTok.Tok.getKind()) {
+    case tok::plus:
+    case tok::minus:
+      // At the start of the line, +/- specific ObjectiveC method
+      // declarations.
+      if (Tok->Parent == NULL)
+        Tok->Type = TT_ObjCMethodSpecifier;
+      break;
+    case tok::colon:
+      // Colons from ?: are handled in parseConditional().
+      if (Tok->Parent->is(tok::r_paren))
+        Tok->Type = TT_CtorInitializerColon;
+      else if (ColonIsObjCMethodExpr)
+        Tok->Type = TT_ObjCMethodExpr;
+      else if (ColonIsForRangeExpr)
+        Tok->Type = TT_RangeBasedForLoopColon;
+      break;
+    case tok::kw_if:
+    case tok::kw_while:
+      if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) {
+        next();
+        if (!parseParens(/*LookForDecls=*/ true))
+          return false;
+      }
+      break;
+    case tok::kw_for:
+      ColonIsForRangeExpr = true;
+      next();
+      if (!parseParens())
+        return false;
+      break;
+    case tok::l_paren:
+      if (!parseParens())
+        return false;
+      break;
+    case tok::l_square:
+      if (!parseSquare())
+        return false;
+      break;
+    case tok::l_brace:
+      if (!parseBrace())
+        return false;
+      break;
+    case tok::less:
+      if (parseAngle())
+        Tok->Type = TT_TemplateOpener;
+      else {
+        Tok->Type = TT_BinaryOperator;
+        CurrentToken = Tok;
+        next();
+      }
+      break;
+    case tok::r_paren:
+    case tok::r_square:
+      return false;
+    case tok::r_brace:
+      // Lines can start with '}'.
+      if (Tok->Parent != NULL)
+        return false;
+      break;
+    case tok::greater:
+      Tok->Type = TT_BinaryOperator;
+      break;
+    case tok::kw_operator:
+      if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) {
+        CurrentToken->Type = TT_OverloadedOperator;
+        next();
+        if (CurrentToken != NULL && CurrentToken->is(tok::r_paren)) {
+          CurrentToken->Type = TT_OverloadedOperator;
+          next();
+        }
+      } else {
+        while (CurrentToken != NULL && CurrentToken->isNot(tok::l_paren)) {
+          CurrentToken->Type = TT_OverloadedOperator;
+          next();
+        }
+      }
+      break;
+    case tok::question:
+      parseConditional();
+      break;
+    case tok::kw_template:
+      parseTemplateDeclaration();
+      break;
+    default:
+      break;
+    }
+    return true;
+  }
+
+  void parseIncludeDirective() {
+    next();
+    if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
+      next();
+      while (CurrentToken != NULL) {
+        if (CurrentToken->isNot(tok::comment) ||
+            !CurrentToken->Children.empty())
+          CurrentToken->Type = TT_ImplicitStringLiteral;
+        next();
+      }
+    } else {
+      while (CurrentToken != NULL) {
+        next();
+      }
+    }
+  }
+
+  void parseWarningOrError() {
+    next();
+    // We still want to format the whitespace left of the first token of the
+    // warning or error.
+    next();
+    while (CurrentToken != NULL) {
+      CurrentToken->Type = TT_ImplicitStringLiteral;
+      next();
+    }
+  }
+
+  void parsePreprocessorDirective() {
+    next();
+    if (CurrentToken == NULL)
+      return;
+    // Hashes in the middle of a line can lead to any strange token
+    // sequence.
+    if (CurrentToken->FormatTok.Tok.getIdentifierInfo() == NULL)
+      return;
+    switch (CurrentToken->FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) {
+    case tok::pp_include:
+    case tok::pp_import:
+      parseIncludeDirective();
+      break;
+    case tok::pp_error:
+    case tok::pp_warning:
+      parseWarningOrError();
+      break;
+    default:
+      break;
+    }
+  }
+
+  LineType parseLine() {
+    int PeriodsAndArrows = 0;
+    bool CanBeBuilderTypeStmt = true;
+    if (CurrentToken->is(tok::hash)) {
+      parsePreprocessorDirective();
+      return LT_PreprocessorDirective;
+    }
+    while (CurrentToken != NULL) {
+      if (CurrentToken->is(tok::kw_virtual))
+        KeywordVirtualFound = true;
+      if (CurrentToken->is(tok::period) || CurrentToken->is(tok::arrow))
+        ++PeriodsAndArrows;
+      if (getPrecedence(*CurrentToken) > prec::Assignment &&
+          CurrentToken->isNot(tok::less) && CurrentToken->isNot(tok::greater))
+        CanBeBuilderTypeStmt = false;
+      if (!consumeToken())
+        return LT_Invalid;
+    }
+    if (KeywordVirtualFound)
+      return LT_VirtualFunctionDecl;
+
+    // Assume a builder-type call if there are 2 or more "." and "->".
+    if (PeriodsAndArrows >= 2 && CanBeBuilderTypeStmt)
+      return LT_BuilderTypeCall;
+
+    return LT_Other;
+  }
+
+  void next() {
+    if (CurrentToken != NULL && !CurrentToken->Children.empty())
+      CurrentToken = &CurrentToken->Children[0];
+    else
+      CurrentToken = NULL;
+  }
+
+private:
+  AnnotatedToken *CurrentToken;
+  bool KeywordVirtualFound;
+  bool ColonIsObjCMethodExpr;
+  bool ColonIsForRangeExpr;
+};
+
+void TokenAnnotator::annotate() {
+  AnnotatingParser Parser(Line.First);
+  Line.Type = Parser.parseLine();
+  if (Line.Type == LT_Invalid)
+    return;
+
+  bool LookForFunctionName = Line.MustBeDeclaration;
+  determineTokenTypes(Line.First, /*IsExpression=*/ false, LookForFunctionName);
+
+  if (Line.First.Type == TT_ObjCMethodSpecifier)
+    Line.Type = LT_ObjCMethodDecl;
+  else if (Line.First.Type == TT_ObjCDecl)
+    Line.Type = LT_ObjCDecl;
+  else if (Line.First.Type == TT_ObjCProperty)
+    Line.Type = LT_ObjCProperty;
+
+  Line.First.SpaceRequiredBefore = true;
+  Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore;
+  Line.First.CanBreakBefore = Line.First.MustBreakBefore;
+
+  Line.First.TotalLength = Line.First.FormatTok.TokenLength;
+  if (!Line.First.Children.empty())
+    calculateExtraInformation(Line.First.Children[0]);
+}
+
+void TokenAnnotator::calculateExtraInformation(AnnotatedToken &Current) {
+  Current.SpaceRequiredBefore = spaceRequiredBefore(Current);
+
+  if (Current.FormatTok.MustBreakBefore) {
+    Current.MustBreakBefore = true;
+  } else {
+    if (Current.Type == TT_LineComment) {
+      Current.MustBreakBefore = Current.FormatTok.NewlinesBefore > 0;
+    } else if ((Current.Parent->is(tok::comment) &&
+                Current.FormatTok.NewlinesBefore > 0) ||
+               (Current.is(tok::string_literal) &&
+                Current.Parent->is(tok::string_literal))) {
+      Current.MustBreakBefore = true;
+    } else {
+      Current.MustBreakBefore = false;
+    }
+  }
+  Current.CanBreakBefore = Current.MustBreakBefore || canBreakBefore(Current);
+  if (Current.MustBreakBefore)
+    Current.TotalLength = Current.Parent->TotalLength + Style.ColumnLimit;
+  else
+    Current.TotalLength =
+        Current.Parent->TotalLength + Current.FormatTok.TokenLength +
+        (Current.SpaceRequiredBefore ? 1 : 0);
+  // FIXME: Only calculate this if CanBreakBefore is true once static
+  // initializers etc. are sorted out.
+  Current.SplitPenalty = splitPenalty(Current);
+  if (!Current.Children.empty())
+    calculateExtraInformation(Current.Children[0]);
+}
+
+unsigned TokenAnnotator::splitPenalty(const AnnotatedToken &Tok) {
+  const AnnotatedToken &Left = *Tok.Parent;
+  const AnnotatedToken &Right = Tok;
+
+  if (Left.is(tok::l_brace) && Right.isNot(tok::l_brace))
+    return 50;
+  if (Left.is(tok::equal) && Right.is(tok::l_brace))
+    return 150;
+  if (Left.is(tok::coloncolon))
+    return 500;
+
+  if (Left.Type == TT_RangeBasedForLoopColon)
+    return 5;
+
+  if (Right.is(tok::arrow) || Right.is(tok::period)) {
+    if (Left.is(tok::r_paren) && Line.Type == LT_BuilderTypeCall)
+      return 5; // Should be smaller than breaking at a nested comma.
+    return 150;
+  }
+
+  // In for-loops, prefer breaking at ',' and ';'.
+  if (Line.First.is(tok::kw_for) &&
+      (Left.isNot(tok::comma) && Left.isNot(tok::semi)))
+    return 20;
+
+  if (Left.is(tok::semi) || Left.is(tok::comma))
+    return 0;
+
+  // In Objective-C method expressions, prefer breaking before "param:" over
+  // breaking after it.
+  if (isObjCSelectorName(Right))
+    return 0;
+  if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr)
+    return 20;
+
+  if (Left.is(tok::l_paren))
+    return 20;
+  // FIXME: The penalty for a trailing "<" or "[" being higher than the
+  // penalty for a trainling "(" is a temporary workaround until we can
+  // properly avoid breaking in array subscripts or template parameters.
+  if (Left.is(tok::l_square) || Left.Type == TT_TemplateOpener)
+    return 50;
+
+  if (Left.Type == TT_ConditionalExpr)
+    return prec::Assignment;
+  prec::Level Level = getPrecedence(Left);
+
+  if (Level != prec::Unknown)
+    return Level;
+
+  return 3;
+}
+
+void TokenAnnotator::determineTokenTypes(
+    AnnotatedToken &Current, bool IsExpression, bool LookForFunctionName) {
+  if (getPrecedence(Current) == prec::Assignment) {
+    IsExpression = true;
+    AnnotatedToken *Previous = Current.Parent;
+    while (Previous != NULL) {
+      if (Previous->Type == TT_BinaryOperator &&
+          (Previous->is(tok::star) || Previous->is(tok::amp))) {
+        Previous->Type = TT_PointerOrReference;
+      }
+      Previous = Previous->Parent;
+    }
+  }
+  if (Current.is(tok::kw_return) || Current.is(tok::kw_throw) ||
+      (Current.is(tok::l_paren) && !Line.MustBeDeclaration &&
+       (Current.Parent == NULL || Current.Parent->isNot(tok::kw_for))))
+    IsExpression = true;
+
+  if (Current.Type == TT_Unknown) {
+    if (LookForFunctionName && Current.is(tok::l_paren)) {
+      findFunctionName(&Current);
+      LookForFunctionName = false;
+    } else if (Current.is(tok::star) || Current.is(tok::amp)) {
+      Current.Type = determineStarAmpUsage(Current, IsExpression);
+    } else if (Current.is(tok::minus) || Current.is(tok::plus) ||
+               Current.is(tok::caret)) {
+      Current.Type = determinePlusMinusCaretUsage(Current);
+    } else if (Current.is(tok::minusminus) || Current.is(tok::plusplus)) {
+      Current.Type = determineIncrementUsage(Current);
+    } else if (Current.is(tok::exclaim)) {
+      Current.Type = TT_UnaryOperator;
+    } else if (isBinaryOperator(Current)) {
+      Current.Type = TT_BinaryOperator;
+    } else if (Current.is(tok::comment)) {
+      std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr,
+                                          Lex.getLangOpts()));
+      if (StringRef(Data).startswith("//"))
+        Current.Type = TT_LineComment;
+      else
+        Current.Type = TT_BlockComment;
+    } else if (Current.is(tok::r_paren) &&
+               (Current.Parent->Type == TT_PointerOrReference ||
+                Current.Parent->Type == TT_TemplateCloser) &&
+               (Current.Children.empty() ||
+                (Current.Children[0].isNot(tok::equal) &&
+                 Current.Children[0].isNot(tok::semi) &&
+                 Current.Children[0].isNot(tok::l_brace)))) {
+      // FIXME: We need to get smarter and understand more cases of casts.
+      Current.Type = TT_CastRParen;
+    } else if (Current.is(tok::at) && Current.Children.size()) {
+      switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) {
+      case tok::objc_interface:
+      case tok::objc_implementation:
+      case tok::objc_protocol:
+        Current.Type = TT_ObjCDecl;
+        break;
+      case tok::objc_property:
+        Current.Type = TT_ObjCProperty;
+        break;
+      default:
+        break;
+      }
+    }
+  }
+
+  if (!Current.Children.empty())
+    determineTokenTypes(Current.Children[0], IsExpression, LookForFunctionName);
+}
+
+void TokenAnnotator::findFunctionName(AnnotatedToken *Current) {
+  AnnotatedToken *Parent = Current->Parent;
+  while (Parent != NULL && Parent->Parent != NULL) {
+    if (Parent->is(tok::identifier) &&
+        (Parent->Parent->is(tok::identifier) || Parent->Parent->Type ==
+         TT_PointerOrReference || Parent->Parent->Type == TT_TemplateCloser)) {
+      Parent->Type = TT_StartOfName;
+      break;
+    }
+    Parent = Parent->Parent;
+  }
+}
+
+TokenType TokenAnnotator::determineStarAmpUsage(const AnnotatedToken &Tok,
+                                                bool IsExpression) {
+  const AnnotatedToken *PrevToken = getPreviousToken(Tok);
+  if (PrevToken == NULL)
+    return TT_UnaryOperator;
+
+  const AnnotatedToken *NextToken = getNextToken(Tok);
+  if (NextToken == NULL)
+    return TT_Unknown;
+
+  if (NextToken->is(tok::l_square) && NextToken->Type != TT_ObjCMethodExpr)
+    return TT_PointerOrReference;
+
+  if (PrevToken->is(tok::l_paren) || PrevToken->is(tok::l_square) ||
+      PrevToken->is(tok::l_brace) || PrevToken->is(tok::comma) ||
+      PrevToken->is(tok::kw_return) || PrevToken->is(tok::colon) ||
+      PrevToken->Type == TT_BinaryOperator ||
+      PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen)
+    return TT_UnaryOperator;
+
+  if (PrevToken->FormatTok.Tok.isLiteral() || PrevToken->is(tok::r_paren) ||
+      PrevToken->is(tok::r_square) || NextToken->FormatTok.Tok.isLiteral() ||
+      NextToken->is(tok::plus) || NextToken->is(tok::minus) ||
+      NextToken->is(tok::plusplus) || NextToken->is(tok::minusminus) ||
+      NextToken->is(tok::tilde) || NextToken->is(tok::exclaim) ||
+      NextToken->is(tok::l_paren) || NextToken->is(tok::l_square) ||
+      NextToken->is(tok::kw_alignof) || NextToken->is(tok::kw_sizeof))
+    return TT_BinaryOperator;
+
+  if (NextToken->is(tok::comma) || NextToken->is(tok::r_paren) ||
+      NextToken->is(tok::greater))
+    return TT_PointerOrReference;
+
+  // It is very unlikely that we are going to find a pointer or reference type
+  // definition on the RHS of an assignment.
+  if (IsExpression)
+    return TT_BinaryOperator;
+
+  return TT_PointerOrReference;
+}
+
+TokenType
+TokenAnnotator::determinePlusMinusCaretUsage(const AnnotatedToken &Tok) {
+  const AnnotatedToken *PrevToken = getPreviousToken(Tok);
+  if (PrevToken == NULL)
+    return TT_UnaryOperator;
+
+  // Use heuristics to recognize unary operators.
+  if (PrevToken->is(tok::equal) || PrevToken->is(tok::l_paren) ||
+      PrevToken->is(tok::comma) || PrevToken->is(tok::l_square) ||
+      PrevToken->is(tok::question) || PrevToken->is(tok::colon) ||
+      PrevToken->is(tok::kw_return) || PrevToken->is(tok::kw_case) ||
+      PrevToken->is(tok::at) || PrevToken->is(tok::l_brace))
+    return TT_UnaryOperator;
+
+  // There can't be to consecutive binary operators.
+  if (PrevToken->Type == TT_BinaryOperator)
+    return TT_UnaryOperator;
+
+  // Fall back to marking the token as binary operator.
+  return TT_BinaryOperator;
+}
+
+TokenType TokenAnnotator::determineIncrementUsage(const AnnotatedToken &Tok) {
+  const AnnotatedToken *PrevToken = getPreviousToken(Tok);
+  if (PrevToken == NULL)
+    return TT_UnaryOperator;
+  if (PrevToken->is(tok::r_paren) || PrevToken->is(tok::r_square) ||
+      PrevToken->is(tok::identifier))
+    return TT_TrailingUnaryOperator;
+
+  return TT_UnaryOperator;
+}
+
+bool TokenAnnotator::spaceRequiredBetween(const AnnotatedToken &Left,
+                                          const AnnotatedToken &Right) {
+  if (Right.is(tok::hashhash))
+    return Left.is(tok::hash);
+  if (Left.is(tok::hashhash) || Left.is(tok::hash))
+    return Right.is(tok::hash);
+  if (Right.is(tok::r_paren) || Right.is(tok::semi) || Right.is(tok::comma))
+    return false;
+  if (Right.is(tok::less) &&
+      (Left.is(tok::kw_template) ||
+       (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
+    return true;
+  if (Left.is(tok::arrow) || Right.is(tok::arrow))
+    return false;
+  if (Left.is(tok::exclaim) || Left.is(tok::tilde))
+    return false;
+  if (Left.is(tok::at) &&
+      (Right.is(tok::identifier) || Right.is(tok::string_literal) ||
+       Right.is(tok::char_constant) || Right.is(tok::numeric_constant) ||
+       Right.is(tok::l_paren) || Right.is(tok::l_brace) ||
+       Right.is(tok::kw_true) || Right.is(tok::kw_false)))
+    return false;
+  if (Left.is(tok::coloncolon))
+    return false;
+  if (Right.is(tok::coloncolon))
+    return Left.isNot(tok::identifier) && Left.isNot(tok::greater);
+  if (Left.is(tok::less) || Right.is(tok::greater) || Right.is(tok::less))
+    return false;
+  if (Right.is(tok::amp) || Right.is(tok::star))
+    return Left.FormatTok.Tok.isLiteral() ||
+           (Left.isNot(tok::star) && Left.isNot(tok::amp) &&
+            !Style.PointerAndReferenceBindToType);
+  if (Left.is(tok::amp) || Left.is(tok::star))
+    return Right.FormatTok.Tok.isLiteral() ||
+           Style.PointerAndReferenceBindToType;
+  if (Right.is(tok::star) && Left.is(tok::l_paren))
+    return false;
+  if (Left.is(tok::l_square) || Right.is(tok::r_square))
+    return false;
+  if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr)
+    return false;
+  if (Left.is(tok::period) || Right.is(tok::period))
+    return false;
+  if (Left.is(tok::colon))
+    return Left.Type != TT_ObjCMethodExpr;
+  if (Right.is(tok::colon))
+    return Right.Type != TT_ObjCMethodExpr;
+  if (Left.is(tok::l_paren))
+    return false;
+  if (Right.is(tok::l_paren)) {
+    return Line.Type == LT_ObjCDecl || Left.is(tok::kw_if) ||
+           Left.is(tok::kw_for) || Left.is(tok::kw_while) ||
+           Left.is(tok::kw_switch) || Left.is(tok::kw_return) ||
+           Left.is(tok::kw_catch) || Left.is(tok::kw_new) ||
+           Left.is(tok::kw_delete);
+  }
+  if (Left.is(tok::at) &&
+      Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword)
+    return false;
+  if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
+    return false;
+  return true;
+}
+
+bool TokenAnnotator::spaceRequiredBefore(const AnnotatedToken &Tok) {
+  if (Line.Type == LT_ObjCMethodDecl) {
+    if (Tok.is(tok::identifier) && !Tok.Children.empty() &&
+        Tok.Children[0].is(tok::colon) && Tok.Parent->is(tok::identifier))
+      return true;
+    if (Tok.is(tok::colon))
+      return false;
+    if (Tok.Parent->Type == TT_ObjCMethodSpecifier)
+      return true;
+    if (Tok.Parent->is(tok::r_paren) && Tok.is(tok::identifier))
+      // Don't space between ')' and <id>
+      return false;
+    if (Tok.Parent->is(tok::colon) && Tok.is(tok::l_paren))
+      // Don't space between ':' and '('
+      return false;
+  }
+  if (Line.Type == LT_ObjCProperty &&
+      (Tok.is(tok::equal) || Tok.Parent->is(tok::equal)))
+    return false;
+
+  if (Tok.Parent->is(tok::comma))
+    return true;
+  if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen)
+    return true;
+  if (Tok.Type == TT_OverloadedOperator)
+    return Tok.is(tok::identifier) || Tok.is(tok::kw_new) ||
+           Tok.is(tok::kw_delete) || Tok.is(tok::kw_bool);
+  if (Tok.Parent->Type == TT_OverloadedOperator)
+    return false;
+  if (Tok.is(tok::colon))
+    return Line.First.isNot(tok::kw_case) && !Tok.Children.empty() &&
+           Tok.Type != TT_ObjCMethodExpr;
+  if (Tok.Parent->Type == TT_UnaryOperator || Tok.Parent->Type == TT_CastRParen)
+    return false;
+  if (Tok.Type == TT_UnaryOperator)
+    return Tok.Parent->isNot(tok::l_paren) &&
+           Tok.Parent->isNot(tok::l_square) && Tok.Parent->isNot(tok::at) &&
+           (Tok.Parent->isNot(tok::colon) ||
+            Tok.Parent->Type != TT_ObjCMethodExpr);
+  if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) {
+    return Tok.Type == TT_TemplateCloser && Tok.Parent->Type ==
+           TT_TemplateCloser && Style.SplitTemplateClosingGreater;
+  }
+  if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator)
+    return true;
+  if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren))
+    return false;
+  if (Tok.is(tok::less) && Line.First.is(tok::hash))
+    return true;
+  if (Tok.Type == TT_TrailingUnaryOperator)
+    return false;
+  return spaceRequiredBetween(*Tok.Parent, Tok);
+}
+
+bool TokenAnnotator::canBreakBefore(const AnnotatedToken &Right) {
+  const AnnotatedToken &Left = *Right.Parent;
+  if (Line.Type == LT_ObjCMethodDecl) {
+    if (Right.is(tok::identifier) && !Right.Children.empty() &&
+        Right.Children[0].is(tok::colon) && Left.is(tok::identifier))
+      return true;
+    if (Right.is(tok::identifier) && Left.is(tok::l_paren) &&
+        Left.Parent->is(tok::colon))
+      // Don't break this identifier as ':' or identifier
+      // before it will break.
+      return false;
+    if (Right.is(tok::colon) && Left.is(tok::identifier) && Left.CanBreakBefore)
+      // Don't break at ':' if identifier before it can beak.
+      return false;
+  }
+  if (Right.Type == TT_StartOfName && Style.AllowReturnTypeOnItsOwnLine)
+    return true;
+  if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr)
+    return false;
+  if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
+    return true;
+  if (isObjCSelectorName(Right))
+    return true;
+  if (Left.ClosesTemplateDeclaration)
+    return true;
+  if (Right.Type == TT_ConditionalExpr || Right.is(tok::question))
+    return true;
+  if (Left.Type == TT_RangeBasedForLoopColon)
+    return true;
+  if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser ||
+      Left.Type == TT_UnaryOperator || Left.Type == TT_ConditionalExpr ||
+      Left.is(tok::question))
+    return false;
+  if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl)
+    return false;
+
+  if (Right.Type == TT_LineComment)
+    // We rely on MustBreakBefore being set correctly here as we should not
+    // change the "binding" behavior of a comment.
+    return false;
+
+  // Allow breaking after a trailing 'const', e.g. after a method declaration,
+  // unless it is follow by ';', '{' or '='.
+  if (Left.is(tok::kw_const) && Left.Parent != NULL &&
+      Left.Parent->is(tok::r_paren))
+    return Right.isNot(tok::l_brace) && Right.isNot(tok::semi) &&
+           Right.isNot(tok::equal);
+
+  // We only break before r_brace if there was a corresponding break before
+  // the l_brace, which is tracked by BreakBeforeClosingBrace.
+  if (Right.is(tok::r_brace))
+    return false;
+
+  if (Right.is(tok::r_paren) || Right.is(tok::greater))
+    return false;
+  return (isBinaryOperator(Left) && Left.isNot(tok::lessless)) ||
+         Left.is(tok::comma) || Right.is(tok::lessless) ||
+         Right.is(tok::arrow) || Right.is(tok::period) ||
+         Right.is(tok::colon) || Left.is(tok::coloncolon) ||
+         Left.is(tok::semi) || Left.is(tok::l_brace) ||
+         (Left.is(tok::r_paren) && Left.Type != TT_CastRParen &&
+          Right.is(tok::identifier)) ||
+         (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) ||
+         (Left.is(tok::l_square) && !Right.is(tok::r_square));
+}
+
+} // namespace format
+} // namespace clang

Added: cfe/trunk/lib/Format/TokenAnnotator.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/TokenAnnotator.h?rev=173830&view=auto
==============================================================================
--- cfe/trunk/lib/Format/TokenAnnotator.h (added)
+++ cfe/trunk/lib/Format/TokenAnnotator.h Tue Jan 29 15:01:14 2013
@@ -0,0 +1,228 @@
+//===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements a token annotator, i.e. creates
+/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H
+#define LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H
+
+#include "UnwrappedLineParser.h"
+#include "clang/Basic/OperatorPrecedence.h"
+#include "clang/Format/Format.h"
+#include <string>
+
+namespace clang {
+class Lexer;
+class SourceManager;
+
+namespace format {
+
+enum TokenType {
+  TT_BinaryOperator,
+  TT_BlockComment,
+  TT_CastRParen,
+  TT_ConditionalExpr,
+  TT_CtorInitializerColon,
+  TT_ImplicitStringLiteral,
+  TT_LineComment,
+  TT_ObjCBlockLParen,
+  TT_ObjCDecl,
+  TT_ObjCMethodSpecifier,
+  TT_ObjCMethodExpr,
+  TT_ObjCProperty,
+  TT_OverloadedOperator,
+  TT_PointerOrReference,
+  TT_PureVirtualSpecifier,
+  TT_RangeBasedForLoopColon,
+  TT_StartOfName,
+  TT_TemplateCloser,
+  TT_TemplateOpener,
+  TT_TrailingUnaryOperator,
+  TT_UnaryOperator,
+  TT_Unknown
+};
+
+enum LineType {
+  LT_Invalid,
+  LT_Other,
+  LT_BuilderTypeCall,
+  LT_PreprocessorDirective,
+  LT_VirtualFunctionDecl,
+  LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
+  LT_ObjCMethodDecl,
+  LT_ObjCProperty // An @property line.
+};
+
+class AnnotatedToken {
+public:
+  explicit AnnotatedToken(const FormatToken &FormatTok)
+      : FormatTok(FormatTok), Type(TT_Unknown), SpaceRequiredBefore(false),
+        CanBreakBefore(false), MustBreakBefore(false),
+        ClosesTemplateDeclaration(false), MatchingParen(NULL),
+        ParameterCount(1), Parent(NULL) {
+  }
+
+  bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); }
+  bool isNot(tok::TokenKind Kind) const { return FormatTok.Tok.isNot(Kind); }
+
+  bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const {
+    return FormatTok.Tok.isObjCAtKeyword(Kind);
+  }
+
+  FormatToken FormatTok;
+
+  TokenType Type;
+
+  bool SpaceRequiredBefore;
+  bool CanBreakBefore;
+  bool MustBreakBefore;
+
+  bool ClosesTemplateDeclaration;
+
+  AnnotatedToken *MatchingParen;
+
+  /// \brief Number of parameters, if this is "(", "[" or "<".
+  ///
+  /// This is initialized to 1 as we don't need to distinguish functions with
+  /// 0 parameters from functions with 1 parameter. Thus, we can simply count
+  /// the number of commas.
+  unsigned ParameterCount;
+
+  /// \brief The total length of the line up to and including this token.
+  unsigned TotalLength;
+
+  /// \brief Penalty for inserting a line break before this token.
+  unsigned SplitPenalty;
+
+  std::vector<AnnotatedToken> Children;
+  AnnotatedToken *Parent;
+
+  const AnnotatedToken *getPreviousNoneComment() const {
+    AnnotatedToken *Tok = Parent;
+    while (Tok != NULL && Tok->is(tok::comment))
+      Tok = Tok->Parent;
+    return Tok;
+  }
+};
+
+class AnnotatedLine {
+public:
+  AnnotatedLine(const UnwrappedLine &Line)
+      : First(Line.Tokens.front()), Level(Line.Level),
+        InPPDirective(Line.InPPDirective),
+        MustBeDeclaration(Line.MustBeDeclaration) {
+    assert(!Line.Tokens.empty());
+    AnnotatedToken *Current = &First;
+    for (std::list<FormatToken>::const_iterator I = ++Line.Tokens.begin(),
+                                                E = Line.Tokens.end();
+         I != E; ++I) {
+      Current->Children.push_back(AnnotatedToken(*I));
+      Current->Children[0].Parent = Current;
+      Current = &Current->Children[0];
+    }
+    Last = Current;
+  }
+  AnnotatedLine(const AnnotatedLine &Other)
+      : First(Other.First), Type(Other.Type), Level(Other.Level),
+        InPPDirective(Other.InPPDirective),
+        MustBeDeclaration(Other.MustBeDeclaration) {
+    Last = &First;
+    while (!Last->Children.empty()) {
+      Last->Children[0].Parent = Last;
+      Last = &Last->Children[0];
+    }
+  }
+
+  AnnotatedToken First;
+  AnnotatedToken *Last;
+
+  LineType Type;
+  unsigned Level;
+  bool InPPDirective;
+  bool MustBeDeclaration;
+};
+
+inline prec::Level getPrecedence(const AnnotatedToken &Tok) {
+  return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true);
+}
+
+/// \brief Determines extra information about the tokens comprising an
+/// \c UnwrappedLine.
+class TokenAnnotator {
+public:
+  TokenAnnotator(const FormatStyle &Style, SourceManager &SourceMgr, Lexer &Lex,
+                 AnnotatedLine &Line)
+      : Style(Style), SourceMgr(SourceMgr), Lex(Lex), Line(Line) {
+  }
+
+  void annotate();
+  void calculateExtraInformation(AnnotatedToken &Current);
+
+private:
+  /// \brief Calculate the penalty for splitting before \c Tok.
+  unsigned splitPenalty(const AnnotatedToken &Tok);
+
+  void determineTokenTypes(AnnotatedToken &Current, bool IsExpression,
+                           bool LookForFunctionName);
+
+  /// \brief Starting from \p Current, this searches backwards for an
+  /// identifier which could be the start of a function name and marks it.
+  void findFunctionName(AnnotatedToken *Current);
+
+  /// \brief Returns the previous token ignoring comments.
+  const AnnotatedToken *getPreviousToken(const AnnotatedToken &Tok) {
+    const AnnotatedToken *PrevToken = Tok.Parent;
+    while (PrevToken != NULL && PrevToken->is(tok::comment))
+      PrevToken = PrevToken->Parent;
+    return PrevToken;
+  }
+
+  /// \brief Returns the next token ignoring comments.
+  const AnnotatedToken *getNextToken(const AnnotatedToken &Tok) {
+    if (Tok.Children.empty())
+      return NULL;
+    const AnnotatedToken *NextToken = &Tok.Children[0];
+    while (NextToken->is(tok::comment)) {
+      if (NextToken->Children.empty())
+        return NULL;
+      NextToken = &NextToken->Children[0];
+    }
+    return NextToken;
+  }
+
+  /// \brief Return the type of the given token assuming it is * or &.
+  TokenType determineStarAmpUsage(const AnnotatedToken &Tok, bool IsExpression);
+
+  TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok);
+
+  /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
+  TokenType determineIncrementUsage(const AnnotatedToken &Tok);
+
+  bool spaceRequiredBetween(const AnnotatedToken &Left,
+                            const AnnotatedToken &Right);
+
+  bool spaceRequiredBefore(const AnnotatedToken &Tok);
+
+  bool canBreakBefore(const AnnotatedToken &Right);
+
+  FormatStyle Style;
+  SourceManager &SourceMgr;
+  Lexer &Lex;
+  AnnotatedLine &Line;
+};
+
+
+} // end namespace format
+} // end namespace clang
+
+#endif // LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H

Modified: cfe/trunk/lib/Format/UnwrappedLineParser.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.cpp?rev=173830&r1=173829&r2=173830&view=diff
==============================================================================
--- cfe/trunk/lib/Format/UnwrappedLineParser.cpp (original)
+++ cfe/trunk/lib/Format/UnwrappedLineParser.cpp Tue Jan 29 15:01:14 2013
@@ -11,9 +11,6 @@
 /// \brief This file contains the implementation of the UnwrappedLineParser,
 /// which turns a stream of tokens into UnwrappedLines.
 ///
-/// This is EXPERIMENTAL code under heavy development. It is not in a state yet,
-/// where it can be used to format real code.
-///
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "format-parser"

Modified: cfe/trunk/lib/Format/UnwrappedLineParser.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.h?rev=173830&r1=173829&r2=173830&view=diff
==============================================================================
--- cfe/trunk/lib/Format/UnwrappedLineParser.h (original)
+++ cfe/trunk/lib/Format/UnwrappedLineParser.h Tue Jan 29 15:01:14 2013
@@ -11,9 +11,6 @@
 /// \brief This file contains the declaration of the UnwrappedLineParser,
 /// which turns a stream of tokens into UnwrappedLines.
 ///
-/// This is EXPERIMENTAL code under heavy development. It is not in a state yet,
-/// where it can be used to format real code.
-///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H





More information about the cfe-commits mailing list