r174307 - Restructuring of token annotation for formatting.

Sun Feb 3 23:21:19 PST 2013

Author: djasper
Date: Mon Feb  4 01:21:18 2013
New Revision: 174307

URL: http://llvm.org/viewvc/llvm-project?rev=174307&view=rev
Log:
Restructuring of token annotation for formatting.

This combines several changes:
* Calculation token type (e.g. for * and &) in the AnnotatingParser.
* Calculate the scope binding strength in the AnnotatingParser.
* Let <> and [] scopes bind stronger than () and {} scopes.
* Add minimal debugging output.

Modified:
    cfe/trunk/include/clang/Format/Format.h
    cfe/trunk/lib/Format/Format.cpp
    cfe/trunk/lib/Format/TokenAnnotator.cpp
    cfe/trunk/lib/Format/TokenAnnotator.h
    cfe/trunk/unittests/Format/FormatTest.cpp

Modified: cfe/trunk/include/clang/Format/Format.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Format/Format.h?rev=174307&r1=174306&r2=174307&view=diff
==============================================================================

--- cfe/trunk/include/clang/Format/Format.h (original)
+++ cfe/trunk/include/clang/Format/Format.h Mon Feb  4 01:21:18 2013
@@ -32,6 +32,9 @@ struct FormatStyle {
   /// \brief The column limit.
   unsigned ColumnLimit;
 
+  /// \brief The penalty for each character outside of the column limit.
+  unsigned PenaltyExcessCharacter;
+
   /// \brief The maximum number of consecutive empty lines to keep.
   unsigned MaxEmptyLinesToKeep;
 

Modified: cfe/trunk/lib/Format/Format.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=174307&r1=174306&r2=174307&view=diff
==============================================================================
--- cfe/trunk/lib/Format/Format.cpp (original)
+++ cfe/trunk/lib/Format/Format.cpp Mon Feb  4 01:21:18 2013
@@ -47,6 +47,7 @@ FormatStyle getLLVMStyle() {
   LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
   LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
   LLVMStyle.ObjCSpaceBeforeProtocolList = true;
+  LLVMStyle.PenaltyExcessCharacter = 1000000;
   return LLVMStyle;
 }
 
@@ -65,6 +66,7 @@ FormatStyle getGoogleStyle() {
   GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
   GoogleStyle.AllowShortIfStatementsOnASingleLine = false;
   GoogleStyle.ObjCSpaceBeforeProtocolList = false;
+  GoogleStyle.PenaltyExcessCharacter = 1000000;
   return GoogleStyle;
 }
 
@@ -75,11 +77,6 @@ FormatStyle getChromiumStyle() {
   return ChromiumStyle;
 }
 
-struct OptimizationParameters {
-  unsigned PenaltyIndentLevel;
-  unsigned PenaltyExcessCharacter;
-};
-
 /// \brief Manages the whitespaces around tokens and their replacements.
 ///
 /// This includes special handling for certain constructs, e.g. the alignment of
@@ -216,8 +213,6 @@ public:
       : Style(Style), SourceMgr(SourceMgr), Line(Line),
         FirstIndent(FirstIndent), RootToken(RootToken),
         Whitespaces(Whitespaces) {
-    Parameters.PenaltyIndentLevel = 20;
-    Parameters.PenaltyExcessCharacter = 1000000;
   }
 
   /// \brief Formats an \c UnwrappedLine.
@@ -594,15 +589,17 @@ private:
     // Insert start element into queue.
     std::multimap<unsigned, QueueItem> Queue;
     Queue.insert(std::pair<unsigned, QueueItem>(
-        0, QueueItem(InitialState, Edge(false, (const LineState *) 0))));
+        0, QueueItem(InitialState, Edge(false, (const LineState *)0))));
     std::map<LineState, Edge> Seen;
 
     // While not empty, take first element and follow edges.
     while (!Queue.empty()) {
       unsigned Penalty = Queue.begin()->first;
       QueueItem Item = Queue.begin()->second;
-      if (Item.first.NextToken == NULL)
+      if (Item.first.NextToken == NULL) {
+        DEBUG(llvm::errs() << "\n---\nPenalty for line: " << Penalty << "\n");
         break;
+      }
       Queue.erase(Queue.begin());
 
       if (Seen.find(Item.first) != Seen.end())
@@ -626,9 +623,16 @@ private:
     Edge *CurrentEdge = &Queue.begin()->second.second;
     while (CurrentEdge->second != NULL) {
       LineState CurrentState = *CurrentEdge->second;
+      if (CurrentEdge->first) {
+        DEBUG(llvm::errs() << "Penalty for splitting before "
+                           << CurrentState.NextToken->FormatTok.Tok.getName()
+                           << ": " << CurrentState.NextToken->SplitPenalty
+                           << "\n");
+      }
       addTokenToState(CurrentEdge->first, false, CurrentState);
       CurrentEdge = &Seen[*CurrentEdge->second];
     }
+    DEBUG(llvm::errs() << "---\n");
 
     // Return the column after the last token of the solution.
     return Queue.begin()->second.first.Column;
@@ -647,12 +651,11 @@ private:
       return;
     LineState State(OldState);
     if (NewLine)
-      Penalty += Parameters.PenaltyIndentLevel * State.Stack.size() +
-                 State.NextToken->SplitPenalty;
+      Penalty += State.NextToken->SplitPenalty;
     addTokenToState(NewLine, true, State);
     if (State.Column > getColumnLimit()) {
       unsigned ExcessCharacters = State.Column - getColumnLimit();
-      Penalty += Parameters.PenaltyExcessCharacter * ExcessCharacters;
+      Penalty += Style.PenaltyExcessCharacter * ExcessCharacters;
     }
     Queue.insert(std::pair<unsigned, QueueItem>(
         Penalty, QueueItem(State, Edge(NewLine, &OldState))));
@@ -673,7 +676,6 @@ private:
     return true;
   }
 
-
   /// \brief Returns \c true, if a line break after \p State is mandatory.
   bool mustBreak(const LineState &State) {
     if (State.NextToken->MustBreakBefore)
@@ -701,8 +703,6 @@ private:
   const unsigned FirstIndent;
   const AnnotatedToken &RootToken;
   WhitespaceManager &Whitespaces;
-
-  OptimizationParameters Parameters;
 };
 
 class LexerBasedFormatTokenSource : public FormatTokenSource {

Modified: cfe/trunk/lib/Format/TokenAnnotator.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/TokenAnnotator.cpp?rev=174307&r1=174306&r2=174307&view=diff
==============================================================================
--- cfe/trunk/lib/Format/TokenAnnotator.cpp (original)
+++ cfe/trunk/lib/Format/TokenAnnotator.cpp Mon Feb  4 01:21:18 2013
@@ -34,6 +34,27 @@ static bool isBinaryOperator(const Annot
   return getPrecedence(Tok) > prec::Comma;
 }
 
+// Returns the previous token ignoring comments.
+static const AnnotatedToken *getPreviousToken(const AnnotatedToken &Tok) {
+  const AnnotatedToken *PrevToken = Tok.Parent;
+  while (PrevToken != NULL && PrevToken->is(tok::comment))
+    PrevToken = PrevToken->Parent;
+  return PrevToken;
+}
+
+// Returns the next token ignoring comments.
+static const AnnotatedToken *getNextToken(const AnnotatedToken &Tok) {
+  if (Tok.Children.empty())
+    return NULL;
+  const AnnotatedToken *NextToken = &Tok.Children[0];
+  while (NextToken->is(tok::comment)) {
+    if (NextToken->Children.empty())
+      return NULL;
+    NextToken = &NextToken->Children[0];
+  }
+  return NextToken;
+}
+
 /// \brief A parser that gathers additional information about tokens.
 ///
 /// The \c TokenAnnotator tries to matches parenthesis and square brakets and
@@ -41,9 +62,11 @@ static bool isBinaryOperator(const Annot
 /// into template parameter lists.
 class AnnotatingParser {
 public:
-  AnnotatingParser(AnnotatedToken &RootToken)
-      : CurrentToken(&RootToken), KeywordVirtualFound(false),
-        ColonIsObjCMethodExpr(false), ColonIsForRangeExpr(false) {
+  AnnotatingParser(SourceManager &SourceMgr, Lexer &Lex, AnnotatedLine &Line)
+      : SourceMgr(SourceMgr), Lex(Lex), Line(Line), CurrentToken(&Line.First),
+        KeywordVirtualFound(false), ColonIsObjCMethodExpr(false),
+        ColonIsForRangeExpr(false), IsExpression(false),
+        LookForFunctionName(Line.MustBeDeclaration), BindingStrength(1) {
   }
 
   /// \brief A helper class to manage AnnotatingParser::ColonIsObjCMethodExpr.
@@ -65,9 +88,22 @@ public:
     void markEnd(AnnotatedToken &Right) { Right.Type = TT_ObjCMethodExpr; }
   };
 
+  struct ScopedBindingStrengthIncrease {
+    AnnotatingParser &P;
+    unsigned Increase;
+
+    ScopedBindingStrengthIncrease(AnnotatingParser &P, unsigned Increase)
+        : P(P), Increase(Increase) {
+      P.BindingStrength += Increase;
+    }
+
+    ~ScopedBindingStrengthIncrease() { P.BindingStrength -= Increase; }
+  };
+
   bool parseAngle() {
     if (CurrentToken == NULL)
       return false;
+    ScopedBindingStrengthIncrease Increase(*this, 10);
     AnnotatedToken *Left = CurrentToken->Parent;
     while (CurrentToken != NULL) {
       if (CurrentToken->is(tok::greater)) {
@@ -94,6 +130,7 @@ public:
   bool parseParens(bool LookForDecls = false) {
     if (CurrentToken == NULL)
       return false;
+    ScopedBindingStrengthIncrease Increase(*this, 1);
     bool StartsObjCMethodExpr = false;
     AnnotatedToken *Left = CurrentToken->Parent;
     if (CurrentToken->is(tok::caret)) {
@@ -150,6 +187,7 @@ public:
   bool parseSquare() {
     if (!CurrentToken)
       return false;
+    ScopedBindingStrengthIncrease Increase(*this, 10);
 
     // A '[' could be an index subscript (after an indentifier or after
     // ')' or ']'), or it could be the start of an Objective-C method
@@ -175,8 +213,12 @@ public:
           StartsObjCMethodExpr = false;
           Left->Type = TT_Unknown;
         }
-        if (StartsObjCMethodExpr)
+        if (StartsObjCMethodExpr) {
           objCSelector.markEnd(*CurrentToken);
+          if (Left->Parent != NULL &&
+              (Left->Parent->is(tok::star) || Left->Parent->is(tok::amp)))
+            Left->Parent->Type = TT_BinaryOperator;
+        }
         Left->MatchingParen = CurrentToken;
         CurrentToken->MatchingParen = Left;
         next();
@@ -196,6 +238,7 @@ public:
     // Lines are fine to end with '{'.
     if (CurrentToken == NULL)
       return true;
+    ScopedBindingStrengthIncrease Increase(*this, 1);
     AnnotatedToken *Left = CurrentToken->Parent;
     while (CurrentToken != NULL) {
       if (CurrentToken->is(tok::r_brace)) {
@@ -411,6 +454,11 @@ public:
   }
 
   void next() {
+    if (CurrentToken != NULL) {
+      determineTokenType(*CurrentToken);
+      CurrentToken->BindingStrength = BindingStrength;
+    }
+
     if (CurrentToken != NULL && !CurrentToken->Children.empty())
       CurrentToken = &CurrentToken->Children[0];
     else
@@ -418,21 +466,181 @@ public:
   }
 
 private:
+  SourceManager &SourceMgr;
+  Lexer &Lex;
+  AnnotatedLine &Line;
   AnnotatedToken *CurrentToken;
   bool KeywordVirtualFound;
   bool ColonIsObjCMethodExpr;
   bool ColonIsForRangeExpr;
+  bool IsExpression;
+  bool LookForFunctionName;
+
+  unsigned BindingStrength;
+
+  void determineTokenType(AnnotatedToken &Current) {
+    if (getPrecedence(Current) == prec::Assignment) {
+      IsExpression = true;
+      AnnotatedToken *Previous = Current.Parent;
+      while (Previous != NULL) {
+        if (Previous->Type == TT_BinaryOperator &&
+            (Previous->is(tok::star) || Previous->is(tok::amp))) {
+          Previous->Type = TT_PointerOrReference;
+        }
+        Previous = Previous->Parent;
+      }
+    }
+    if (Current.is(tok::kw_return) || Current.is(tok::kw_throw) ||
+        (Current.is(tok::l_paren) && !Line.MustBeDeclaration &&
+         (Current.Parent == NULL || Current.Parent->isNot(tok::kw_for))))
+      IsExpression = true;
+
+    if (Current.Type == TT_Unknown) {
+      if (LookForFunctionName && Current.is(tok::l_paren)) {
+        findFunctionName(&Current);
+        LookForFunctionName = false;
+      } else if (Current.is(tok::star) || Current.is(tok::amp)) {
+        Current.Type = determineStarAmpUsage(Current, IsExpression);
+      } else if (Current.is(tok::minus) || Current.is(tok::plus) ||
+                 Current.is(tok::caret)) {
+        Current.Type = determinePlusMinusCaretUsage(Current);
+      } else if (Current.is(tok::minusminus) || Current.is(tok::plusplus)) {
+        Current.Type = determineIncrementUsage(Current);
+      } else if (Current.is(tok::exclaim)) {
+        Current.Type = TT_UnaryOperator;
+      } else if (isBinaryOperator(Current)) {
+        Current.Type = TT_BinaryOperator;
+      } else if (Current.is(tok::comment)) {
+        std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr,
+                                            Lex.getLangOpts()));
+        if (StringRef(Data).startswith("//"))
+          Current.Type = TT_LineComment;
+        else
+          Current.Type = TT_BlockComment;
+      } else if (Current.is(tok::r_paren) &&
+                 (Current.Parent->Type == TT_PointerOrReference ||
+                  Current.Parent->Type == TT_TemplateCloser) &&
+                 (Current.Children.empty() ||
+                  (Current.Children[0].isNot(tok::equal) &&
+                   Current.Children[0].isNot(tok::semi) &&
+                   Current.Children[0].isNot(tok::l_brace)))) {
+        // FIXME: We need to get smarter and understand more cases of casts.
+        Current.Type = TT_CastRParen;
+      } else if (Current.is(tok::at) && Current.Children.size()) {
+        switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) {
+        case tok::objc_interface:
+        case tok::objc_implementation:
+        case tok::objc_protocol:
+          Current.Type = TT_ObjCDecl;
+          break;
+        case tok::objc_property:
+          Current.Type = TT_ObjCProperty;
+          break;
+        default:
+          break;
+        }
+      }
+    }
+  }
+
+  /// \brief Starting from \p Current, this searches backwards for an
+  /// identifier which could be the start of a function name and marks it.
+  void findFunctionName(AnnotatedToken *Current) {
+    AnnotatedToken *Parent = Current->Parent;
+    while (Parent != NULL && Parent->Parent != NULL) {
+      if (Parent->is(tok::identifier) &&
+          (Parent->Parent->is(tok::identifier) ||
+           Parent->Parent->Type == TT_PointerOrReference ||
+           Parent->Parent->Type == TT_TemplateCloser)) {
+        Parent->Type = TT_StartOfName;
+        break;
+      }
+      Parent = Parent->Parent;
+    }
+  }
+
+  /// \brief Return the type of the given token assuming it is * or &.
+  TokenType
+  determineStarAmpUsage(const AnnotatedToken &Tok, bool IsExpression) {
+    const AnnotatedToken *PrevToken = getPreviousToken(Tok);
+    if (PrevToken == NULL)
+      return TT_UnaryOperator;
+
+    const AnnotatedToken *NextToken = getNextToken(Tok);
+    if (NextToken == NULL)
+      return TT_Unknown;
+
+    if (NextToken->is(tok::l_square))
+      return TT_PointerOrReference;
+
+    if (PrevToken->is(tok::l_paren) || PrevToken->is(tok::l_square) ||
+        PrevToken->is(tok::l_brace) || PrevToken->is(tok::comma) ||
+        PrevToken->is(tok::kw_return) || PrevToken->is(tok::colon) ||
+        PrevToken->Type == TT_BinaryOperator ||
+        PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen)
+      return TT_UnaryOperator;
+
+    if (PrevToken->FormatTok.Tok.isLiteral() || PrevToken->is(tok::r_paren) ||
+        PrevToken->is(tok::r_square) || NextToken->FormatTok.Tok.isLiteral() ||
+        NextToken->is(tok::plus) || NextToken->is(tok::minus) ||
+        NextToken->is(tok::plusplus) || NextToken->is(tok::minusminus) ||
+        NextToken->is(tok::tilde) || NextToken->is(tok::exclaim) ||
+        NextToken->is(tok::l_paren) || NextToken->is(tok::l_square) ||
+        NextToken->is(tok::kw_alignof) || NextToken->is(tok::kw_sizeof))
+      return TT_BinaryOperator;
+
+    if (NextToken->is(tok::comma) || NextToken->is(tok::r_paren) ||
+        NextToken->is(tok::greater))
+      return TT_PointerOrReference;
+
+    // It is very unlikely that we are going to find a pointer or reference type
+    // definition on the RHS of an assignment.
+    if (IsExpression)
+      return TT_BinaryOperator;
+
+    return TT_PointerOrReference;
+  }
+
+  TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) {
+    const AnnotatedToken *PrevToken = getPreviousToken(Tok);
+    if (PrevToken == NULL)
+      return TT_UnaryOperator;
+
+    // Use heuristics to recognize unary operators.
+    if (PrevToken->is(tok::equal) || PrevToken->is(tok::l_paren) ||
+        PrevToken->is(tok::comma) || PrevToken->is(tok::l_square) ||
+        PrevToken->is(tok::question) || PrevToken->is(tok::colon) ||
+        PrevToken->is(tok::kw_return) || PrevToken->is(tok::kw_case) ||
+        PrevToken->is(tok::at) || PrevToken->is(tok::l_brace))
+      return TT_UnaryOperator;
+
+    // There can't be to consecutive binary operators.
+    if (PrevToken->Type == TT_BinaryOperator)
+      return TT_UnaryOperator;
+
+    // Fall back to marking the token as binary operator.
+    return TT_BinaryOperator;
+  }
+
+  /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
+  TokenType determineIncrementUsage(const AnnotatedToken &Tok) {
+    const AnnotatedToken *PrevToken = getPreviousToken(Tok);
+    if (PrevToken == NULL)
+      return TT_UnaryOperator;
+    if (PrevToken->is(tok::r_paren) || PrevToken->is(tok::r_square) ||
+        PrevToken->is(tok::identifier))
+      return TT_TrailingUnaryOperator;
+
+    return TT_UnaryOperator;
+  }
 };
 
 void TokenAnnotator::annotate() {
-  AnnotatingParser Parser(Line.First);
+  AnnotatingParser Parser(SourceMgr, Lex, Line);
   Line.Type = Parser.parseLine();
   if (Line.Type == LT_Invalid)
     return;
 
-  bool LookForFunctionName = Line.MustBeDeclaration;
-  determineTokenTypes(Line.First, /*IsExpression=*/ false, LookForFunctionName);
-
   if (Line.First.Type == TT_ObjCMethodSpecifier)
     Line.Type = LT_ObjCMethodDecl;
   else if (Line.First.Type == TT_ObjCDecl)
@@ -446,10 +654,10 @@ void TokenAnnotator::annotate() {
 
   Line.First.TotalLength = Line.First.FormatTok.TokenLength;
   if (!Line.First.Children.empty())
-    calculateExtraInformation(Line.First.Children[0]);
+    calculateFormattingInformation(Line.First.Children[0]);
 }
 
-void TokenAnnotator::calculateExtraInformation(AnnotatedToken &Current) {
+void TokenAnnotator::calculateFormattingInformation(AnnotatedToken &Current) {
   Current.SpaceRequiredBefore = spaceRequiredBefore(Current);
 
   if (Current.FormatTok.MustBreakBefore) {
@@ -475,9 +683,10 @@ void TokenAnnotator::calculateExtraInfor
         (Current.SpaceRequiredBefore ? 1 : 0);
   // FIXME: Only calculate this if CanBreakBefore is true once static
   // initializers etc. are sorted out.
-  Current.SplitPenalty = splitPenalty(Current);
+  // FIXME: Move magic numbers to a better place.
+  Current.SplitPenalty = 20 * Current.BindingStrength + splitPenalty(Current);
   if (!Current.Children.empty())
-    calculateExtraInformation(Current.Children[0]);
+    calculateFormattingInformation(Current.Children[0]);
 }
 
 unsigned TokenAnnotator::splitPenalty(const AnnotatedToken &Tok) {
@@ -515,14 +724,12 @@ unsigned TokenAnnotator::splitPenalty(co
   if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr)
     return 20;
 
-  if (Left.is(tok::l_paren))
+  if (Left.is(tok::l_paren) || Left.is(tok::l_square) ||
+      Left.Type == TT_TemplateOpener)
     return 20;
-  // FIXME: The penalty for a trailing "<" or "[" being higher than the
-  // penalty for a trainling "(" is a temporary workaround until we can
-  // properly avoid breaking in array subscripts or template parameters.
-  if (Left.is(tok::l_square) || Left.Type == TT_TemplateOpener)
-    return 50;
 
+  if (Right.is(tok::lessless))
+    return prec::Shift;
   if (Left.Type == TT_ConditionalExpr)
     return prec::Assignment;
   prec::Level Level = getPrecedence(Left);
@@ -533,162 +740,6 @@ unsigned TokenAnnotator::splitPenalty(co
   return 3;
 }
 
-void TokenAnnotator::determineTokenTypes(
-    AnnotatedToken &Current, bool IsExpression, bool LookForFunctionName) {
-  if (getPrecedence(Current) == prec::Assignment) {
-    IsExpression = true;
-    AnnotatedToken *Previous = Current.Parent;
-    while (Previous != NULL) {
-      if (Previous->Type == TT_BinaryOperator &&
-          (Previous->is(tok::star) || Previous->is(tok::amp))) {
-        Previous->Type = TT_PointerOrReference;
-      }
-      Previous = Previous->Parent;
-    }
-  }
-  if (Current.is(tok::kw_return) || Current.is(tok::kw_throw) ||
-      (Current.is(tok::l_paren) && !Line.MustBeDeclaration &&
-       (Current.Parent == NULL || Current.Parent->isNot(tok::kw_for))))
-    IsExpression = true;
-
-  if (Current.Type == TT_Unknown) {
-    if (LookForFunctionName && Current.is(tok::l_paren)) {
-      findFunctionName(&Current);
-      LookForFunctionName = false;
-    } else if (Current.is(tok::star) || Current.is(tok::amp)) {
-      Current.Type = determineStarAmpUsage(Current, IsExpression);
-    } else if (Current.is(tok::minus) || Current.is(tok::plus) ||
-               Current.is(tok::caret)) {
-      Current.Type = determinePlusMinusCaretUsage(Current);
-    } else if (Current.is(tok::minusminus) || Current.is(tok::plusplus)) {
-      Current.Type = determineIncrementUsage(Current);
-    } else if (Current.is(tok::exclaim)) {
-      Current.Type = TT_UnaryOperator;
-    } else if (isBinaryOperator(Current)) {
-      Current.Type = TT_BinaryOperator;
-    } else if (Current.is(tok::comment)) {
-      std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr,
-                                          Lex.getLangOpts()));
-      if (StringRef(Data).startswith("//"))
-        Current.Type = TT_LineComment;
-      else
-        Current.Type = TT_BlockComment;
-    } else if (Current.is(tok::r_paren) &&
-               (Current.Parent->Type == TT_PointerOrReference ||
-                Current.Parent->Type == TT_TemplateCloser) &&
-               (Current.Children.empty() ||
-                (Current.Children[0].isNot(tok::equal) &&
-                 Current.Children[0].isNot(tok::semi) &&
-                 Current.Children[0].isNot(tok::l_brace)))) {
-      // FIXME: We need to get smarter and understand more cases of casts.
-      Current.Type = TT_CastRParen;
-    } else if (Current.is(tok::at) && Current.Children.size()) {
-      switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) {
-      case tok::objc_interface:
-      case tok::objc_implementation:
-      case tok::objc_protocol:
-        Current.Type = TT_ObjCDecl;
-        break;
-      case tok::objc_property:
-        Current.Type = TT_ObjCProperty;
-        break;
-      default:
-        break;
-      }
-    }
-  }
-
-  if (!Current.Children.empty())
-    determineTokenTypes(Current.Children[0], IsExpression, LookForFunctionName);
-}
-
-void TokenAnnotator::findFunctionName(AnnotatedToken *Current) {
-  AnnotatedToken *Parent = Current->Parent;
-  while (Parent != NULL && Parent->Parent != NULL) {
-    if (Parent->is(tok::identifier) &&
-        (Parent->Parent->is(tok::identifier) || Parent->Parent->Type ==
-         TT_PointerOrReference || Parent->Parent->Type == TT_TemplateCloser)) {
-      Parent->Type = TT_StartOfName;
-      break;
-    }
-    Parent = Parent->Parent;
-  }
-}
-
-TokenType TokenAnnotator::determineStarAmpUsage(const AnnotatedToken &Tok,
-                                                bool IsExpression) {
-  const AnnotatedToken *PrevToken = getPreviousToken(Tok);
-  if (PrevToken == NULL)
-    return TT_UnaryOperator;
-
-  const AnnotatedToken *NextToken = getNextToken(Tok);
-  if (NextToken == NULL)
-    return TT_Unknown;
-
-  if (NextToken->is(tok::l_square) && NextToken->Type != TT_ObjCMethodExpr)
-    return TT_PointerOrReference;
-
-  if (PrevToken->is(tok::l_paren) || PrevToken->is(tok::l_square) ||
-      PrevToken->is(tok::l_brace) || PrevToken->is(tok::comma) ||
-      PrevToken->is(tok::kw_return) || PrevToken->is(tok::colon) ||
-      PrevToken->Type == TT_BinaryOperator ||
-      PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen)
-    return TT_UnaryOperator;
-
-  if (PrevToken->FormatTok.Tok.isLiteral() || PrevToken->is(tok::r_paren) ||
-      PrevToken->is(tok::r_square) || NextToken->FormatTok.Tok.isLiteral() ||
-      NextToken->is(tok::plus) || NextToken->is(tok::minus) ||
-      NextToken->is(tok::plusplus) || NextToken->is(tok::minusminus) ||
-      NextToken->is(tok::tilde) || NextToken->is(tok::exclaim) ||
-      NextToken->is(tok::l_paren) || NextToken->is(tok::l_square) ||
-      NextToken->is(tok::kw_alignof) || NextToken->is(tok::kw_sizeof))
-    return TT_BinaryOperator;
-
-  if (NextToken->is(tok::comma) || NextToken->is(tok::r_paren) ||
-      NextToken->is(tok::greater))
-    return TT_PointerOrReference;
-
-  // It is very unlikely that we are going to find a pointer or reference type
-  // definition on the RHS of an assignment.
-  if (IsExpression)
-    return TT_BinaryOperator;
-
-  return TT_PointerOrReference;
-}
-
-TokenType
-TokenAnnotator::determinePlusMinusCaretUsage(const AnnotatedToken &Tok) {
-  const AnnotatedToken *PrevToken = getPreviousToken(Tok);
-  if (PrevToken == NULL)
-    return TT_UnaryOperator;
-
-  // Use heuristics to recognize unary operators.
-  if (PrevToken->is(tok::equal) || PrevToken->is(tok::l_paren) ||
-      PrevToken->is(tok::comma) || PrevToken->is(tok::l_square) ||
-      PrevToken->is(tok::question) || PrevToken->is(tok::colon) ||
-      PrevToken->is(tok::kw_return) || PrevToken->is(tok::kw_case) ||
-      PrevToken->is(tok::at) || PrevToken->is(tok::l_brace))
-    return TT_UnaryOperator;
-
-  // There can't be to consecutive binary operators.
-  if (PrevToken->Type == TT_BinaryOperator)
-    return TT_UnaryOperator;
-
-  // Fall back to marking the token as binary operator.
-  return TT_BinaryOperator;
-}
-
-TokenType TokenAnnotator::determineIncrementUsage(const AnnotatedToken &Tok) {
-  const AnnotatedToken *PrevToken = getPreviousToken(Tok);
-  if (PrevToken == NULL)
-    return TT_UnaryOperator;
-  if (PrevToken->is(tok::r_paren) || PrevToken->is(tok::r_square) ||
-      PrevToken->is(tok::identifier))
-    return TT_TrailingUnaryOperator;
-
-  return TT_UnaryOperator;
-}
-
 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedToken &Left,
                                           const AnnotatedToken &Right) {
   if (Right.is(tok::hashhash))

Modified: cfe/trunk/lib/Format/TokenAnnotator.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/TokenAnnotator.h?rev=174307&r1=174306&r2=174307&view=diff
==============================================================================
--- cfe/trunk/lib/Format/TokenAnnotator.h (original)
+++ cfe/trunk/lib/Format/TokenAnnotator.h Mon Feb  4 01:21:18 2013
@@ -69,7 +69,7 @@ public:
       : FormatTok(FormatTok), Type(TT_Unknown), SpaceRequiredBefore(false),
         CanBreakBefore(false), MustBreakBefore(false),
         ClosesTemplateDeclaration(false), MatchingParen(NULL),
-        ParameterCount(1), Parent(NULL) {
+        ParameterCount(1), BindingStrength(0), SplitPenalty(0), Parent(NULL) {
   }
 
   bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); }
@@ -101,6 +101,11 @@ public:
   /// \brief The total length of the line up to and including this token.
   unsigned TotalLength;
 
+  // FIXME: Come up with a 'cleaner' concept.
+  /// \brief The binding strength of a token. This is a combined value of
+  /// operator precedence, parenthesis nesting, etc.
+  unsigned BindingStrength;
+
   /// \brief Penalty for inserting a line break before this token.
   unsigned SplitPenalty;
 
@@ -166,48 +171,12 @@ public:
   }
 
   void annotate();
-  void calculateExtraInformation(AnnotatedToken &Current);
+  void calculateFormattingInformation(AnnotatedToken &Current);
 
 private:
   /// \brief Calculate the penalty for splitting before \c Tok.
   unsigned splitPenalty(const AnnotatedToken &Tok);
 
-  void determineTokenTypes(AnnotatedToken &Current, bool IsExpression,
-                           bool LookForFunctionName);
-
-  /// \brief Starting from \p Current, this searches backwards for an
-  /// identifier which could be the start of a function name and marks it.
-  void findFunctionName(AnnotatedToken *Current);
-
-  /// \brief Returns the previous token ignoring comments.
-  const AnnotatedToken *getPreviousToken(const AnnotatedToken &Tok) {
-    const AnnotatedToken *PrevToken = Tok.Parent;
-    while (PrevToken != NULL && PrevToken->is(tok::comment))
-      PrevToken = PrevToken->Parent;
-    return PrevToken;
-  }
-
-  /// \brief Returns the next token ignoring comments.
-  const AnnotatedToken *getNextToken(const AnnotatedToken &Tok) {
-    if (Tok.Children.empty())
-      return NULL;
-    const AnnotatedToken *NextToken = &Tok.Children[0];
-    while (NextToken->is(tok::comment)) {
-      if (NextToken->Children.empty())
-        return NULL;
-      NextToken = &NextToken->Children[0];
-    }
-    return NextToken;
-  }
-
-  /// \brief Return the type of the given token assuming it is * or &.
-  TokenType determineStarAmpUsage(const AnnotatedToken &Tok, bool IsExpression);
-
-  TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok);
-
-  /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
-  TokenType determineIncrementUsage(const AnnotatedToken &Tok);
-
   bool spaceRequiredBetween(const AnnotatedToken &Left,
                             const AnnotatedToken &Right);
 
@@ -221,7 +190,6 @@ private:
   AnnotatedLine &Line;
 };
 
-
 } // end namespace format
 } // end namespace clang
 

Modified: cfe/trunk/unittests/Format/FormatTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=174307&r1=174306&r2=174307&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTest.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTest.cpp Mon Feb  4 01:21:18 2013
@@ -1355,6 +1355,10 @@ TEST_F(FormatTest, WrapsTemplateDeclarat
   verifyFormat(
       "aaaaaaaaaaaaaaaaaaaaaaaa<aaaaaaaaaaaaaaaaa, aaaaaaaaaaaaaaaaa>(\n"
       "    aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa);");
+
+  verifyFormat(
+      "a<aaaaaaaaaaaaaaaaaaaaa, aaaaaaaaaaaaaaaaaaa>(a(aaaaaaaaaaaaaaaaaa,\n"
+      "                                                aaaaaaaaaaaaaaaa));");
 }
 
 TEST_F(FormatTest, WrapsAtNestedNameSpecifiers) {