r174714 - Implement a tiny expression parser to improve formatting decisions.

Daniel Jasper djasper at google.com
Fri Feb 8 07:28:42 PST 2013


Author: djasper
Date: Fri Feb  8 09:28:42 2013
New Revision: 174714

URL: http://llvm.org/viewvc/llvm-project?rev=174714&view=rev
Log:
Implement a tiny expression parser to improve formatting decisions.

With this patch, the formatter introduces 'fake' parenthesis according
to the operator precedence of binary operators.

Before:
return aaaa & AAAAAAAAAAAAAAAAAAAAAAAAAAAAA || bbbb &
       BBBBBBBBBBBBBBBBBBBBBBBBBBBBB || cccc & CCCCCCCCCCCCCCCCCCCCCCCCCC ||
       dddd & DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD;
f(aaaaaaaaaaaaaaaaaaaa && aaaaaaaaaaaaaaaaaaaa, aaaaaaaaaaaaaaaaaaaa &&
  aaaaaaaaaaaaaaaaaaaa, aaaaaaaaaaaaaaaaaaaa && aaaaaaaaaaaaaaaaaaaa);

After:
return aaaa & AAAAAAAAAAAAAAAAAAAAAAAAAAAAA ||
       bbbb & BBBBBBBBBBBBBBBBBBBBBBBBBBBBB ||
       cccc & CCCCCCCCCCCCCCCCCCCCCCCCCC ||
       dddd & DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD;
f(aaaaaaaaaaaaaaaaaaaa && aaaaaaaaaaaaaaaaaaaa,
  aaaaaaaaaaaaaaaaaaaa && aaaaaaaaaaaaaaaaaaaa,
  aaaaaaaaaaaaaaaaaaaa && aaaaaaaaaaaaaaaaaaaa);

Future improvements:
- Get rid of some of the hacky ways to nicely format certain constructs.
- Merge this parser and the AnnotatingParser as we now have several parsers
  that analyze (), [], etc.

Modified:
    cfe/trunk/lib/Format/Format.cpp
    cfe/trunk/lib/Format/TokenAnnotator.cpp
    cfe/trunk/lib/Format/TokenAnnotator.h
    cfe/trunk/unittests/Format/FormatTest.cpp

Modified: cfe/trunk/lib/Format/Format.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=174714&r1=174713&r2=174714&view=diff
==============================================================================
--- cfe/trunk/lib/Format/Format.cpp (original)
+++ cfe/trunk/lib/Format/Format.cpp Fri Feb  8 09:28:42 2013
@@ -205,7 +205,7 @@ private:
     while (I != E) {
       unsigned Spaces = I->Spaces + Column - I->MinColumn;
       storeReplacement(I->Tok, std::string(I->NewLines, '\n') +
-                       std::string(Spaces, ' '));
+                               std::string(Spaces, ' '));
       ++I;
     }
   }
@@ -251,6 +251,7 @@ public:
                    /*HasMultiParameterLine=*/ false));
     State.VariablePos = 0;
     State.LineContainsContinuedForLoopSection = false;
+    State.ParenLevel = 0;
 
     DEBUG({
       DebugTokenState(*State.NextToken);
@@ -287,8 +288,8 @@ private:
   struct ParenState {
     ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking,
                bool HasMultiParameterLine)
-        : Indent(Indent), LastSpace(LastSpace), AssignmentColumn(0),
-          FirstLessLess(0), BreakBeforeClosingBrace(false), QuestionColumn(0),
+        : Indent(Indent), LastSpace(LastSpace), FirstLessLess(0),
+          BreakBeforeClosingBrace(false), QuestionColumn(0),
           AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
           HasMultiParameterLine(HasMultiParameterLine), ColonPos(0) {
     }
@@ -304,9 +305,6 @@ private:
     ///                             OtherParameter));
     unsigned LastSpace;
 
-    /// \brief This is the column of the first token after an assignment.
-    unsigned AssignmentColumn;
-
     /// \brief The position the first "<<" operator encountered on each level.
     ///
     /// Used to align "<<" operators. 0 if no such operator has been encountered
@@ -342,8 +340,6 @@ private:
         return Indent < Other.Indent;
       if (LastSpace != Other.LastSpace)
         return LastSpace < Other.LastSpace;
-      if (AssignmentColumn != Other.AssignmentColumn)
-        return AssignmentColumn < Other.AssignmentColumn;
       if (FirstLessLess != Other.FirstLessLess)
         return FirstLessLess < Other.FirstLessLess;
       if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
@@ -380,6 +376,9 @@ private:
     /// \brief \c true if this line contains a continued for-loop section.
     bool LineContainsContinuedForLoopSection;
 
+    /// \brief The level of nesting inside (), [], <> and {}.
+    unsigned ParenLevel;
+
     /// \brief A stack keeping track of properties applying to parenthesis
     /// levels.
     std::vector<ParenState> Stack;
@@ -395,6 +394,8 @@ private:
       if (Other.LineContainsContinuedForLoopSection !=
           LineContainsContinuedForLoopSection)
         return LineContainsContinuedForLoopSection;
+      if (Other.ParenLevel != ParenLevel)
+        return Other.ParenLevel < ParenLevel;
       return Other.Stack < Stack;
     }
   };
@@ -411,7 +412,6 @@ private:
     const AnnotatedToken &Current = *State.NextToken;
     const AnnotatedToken &Previous = *State.NextToken->Parent;
     assert(State.Stack.size());
-    unsigned ParenLevel = State.Stack.size() - 1;
 
     if (Current.Type == TT_ImplicitStringLiteral) {
       State.Column += State.NextToken->FormatTok.WhiteSpaceLength +
@@ -431,9 +431,9 @@ private:
                  Previous.is(tok::string_literal)) {
         State.Column = State.Column - Previous.FormatTok.TokenLength;
       } else if (Current.is(tok::lessless) &&
-                 State.Stack[ParenLevel].FirstLessLess != 0) {
-        State.Column = State.Stack[ParenLevel].FirstLessLess;
-      } else if (ParenLevel != 0 &&
+                 State.Stack.back().FirstLessLess != 0) {
+        State.Column = State.Stack.back().FirstLessLess;
+      } else if (State.ParenLevel != 0 &&
                  (Previous.is(tok::equal) || Previous.is(tok::coloncolon) ||
                   Current.is(tok::period) || Current.is(tok::arrow) ||
                   Current.is(tok::question))) {
@@ -445,15 +445,12 @@ private:
       } else if (Current.Type == TT_ConditionalExpr) {
         State.Column = State.Stack.back().QuestionColumn;
       } else if (Previous.is(tok::comma) && State.VariablePos != 0 &&
-                 ((RootToken.is(tok::kw_for) && ParenLevel == 1) ||
-                  ParenLevel == 0)) {
+                 ((RootToken.is(tok::kw_for) && State.ParenLevel == 1) ||
+                  State.ParenLevel == 0)) {
         State.Column = State.VariablePos;
       } else if (State.NextToken->Parent->ClosesTemplateDeclaration ||
                  Current.Type == TT_StartOfName) {
-        State.Column = State.Stack[ParenLevel].Indent - 4;
-      } else if (Previous.Type == TT_BinaryOperator &&
-                 State.Stack.back().AssignmentColumn != 0) {
-        State.Column = State.Stack.back().AssignmentColumn;
+        State.Column = State.Stack.back().Indent - 4;
       } else if (Current.Type == TT_ObjCSelectorName) {
         if (State.Stack.back().ColonPos > Current.FormatTok.TokenLength) {
           State.Column =
@@ -466,7 +463,7 @@ private:
       } else if (Previous.Type == TT_ObjCMethodExpr) {
         State.Column = State.Stack.back().Indent + 4;
       } else {
-        State.Column = State.Stack[ParenLevel].Indent;
+        State.Column = State.Stack.back().Indent;
       }
 
       if (Previous.is(tok::comma) && !State.Stack.back().AvoidBinPacking)
@@ -484,12 +481,12 @@ private:
                                           WhitespaceStartColumn, Style);
       }
 
-      State.Stack[ParenLevel].LastSpace = State.Column;
+      State.Stack.back().LastSpace = State.Column;
       if (Current.is(tok::colon) && Current.Type != TT_ConditionalExpr)
-        State.Stack[ParenLevel].Indent += 2;
+        State.Stack.back().Indent += 2;
     } else {
       if (Current.is(tok::equal) &&
-          (RootToken.is(tok::kw_for) || ParenLevel == 0))
+          (RootToken.is(tok::kw_for) || State.ParenLevel == 0))
         State.VariablePos = State.Column - Previous.FormatTok.TokenLength;
 
       unsigned Spaces = State.NextToken->SpaceRequiredBefore ? 1 : 0;
@@ -510,26 +507,19 @@ private:
               State.Column + Spaces + Current.FormatTok.TokenLength;
       }
 
-      // FIXME: Do we need to do this for assignments nested in other
-      // expressions?
-      if (RootToken.isNot(tok::kw_for) && ParenLevel == 0 &&
-          !isTrailingComment(Current) &&
-          (getPrecedence(Previous) == prec::Assignment ||
-           Previous.is(tok::kw_return)))
-        State.Stack.back().AssignmentColumn = State.Column + Spaces;
       if (Current.Type != TT_LineComment &&
           (Previous.is(tok::l_paren) || Previous.is(tok::l_brace) ||
            State.NextToken->Parent->Type == TT_TemplateOpener))
-        State.Stack[ParenLevel].Indent = State.Column + Spaces;
+        State.Stack.back().Indent = State.Column + Spaces;
       if (Previous.is(tok::comma) && !isTrailingComment(Current))
-        State.Stack[ParenLevel].HasMultiParameterLine = true;
+        State.Stack.back().HasMultiParameterLine = true;
 
       State.Column += Spaces;
       if (Current.is(tok::l_paren) && Previous.is(tok::kw_if))
         // Treat the condition inside an if as if it was a second function
         // parameter, i.e. let nested calls have an indent of 4.
         State.Stack.back().LastSpace = State.Column + 1; // 1 is length of "(".
-      else if (Previous.is(tok::comma) && ParenLevel != 0)
+      else if (Previous.is(tok::comma) && State.ParenLevel != 0)
         // Top-level spaces are exempt as that mostly leads to better results.
         State.Stack.back().LastSpace = State.Column;
       else if ((Previous.Type == TT_BinaryOperator ||
@@ -551,7 +541,7 @@ private:
       State.Stack.back().BreakBeforeClosingBrace = true;
 
     if (State.Stack.back().AvoidBinPacking && Newline &&
-        (Line.First.isNot(tok::kw_for) || ParenLevel != 1)) {
+        (Line.First.isNot(tok::kw_for) || State.ParenLevel != 1)) {
       // If we are breaking after '(', '{', '<', this is not bin packing unless
       // AllowAllParametersOfDeclarationOnNextLine is false.
       if ((Previous.isNot(tok::l_paren) && Previous.isNot(tok::l_brace) &&
@@ -587,6 +577,13 @@ private:
         State.Stack.back().BreakBeforeParameter = true;
     }
 
+    // Insert scopes created by fake parenthesis.
+    for (unsigned i = 0, e = Current.FakeLParens; i != e; ++i) {
+      ParenState NewParenState = State.Stack.back();
+      NewParenState.Indent = std::max(State.Column, State.Stack.back().Indent);
+      State.Stack.push_back(NewParenState);
+    }
+
     // If we encounter an opening (, [, { or <, we add a level to our stacks to
     // prepare for the following tokens.
     if (Current.is(tok::l_paren) || Current.is(tok::l_square) ||
@@ -604,6 +601,7 @@ private:
       State.Stack.push_back(
           ParenState(NewIndent, State.Stack.back().LastSpace, AvoidBinPacking,
                      State.Stack.back().HasMultiParameterLine));
+      ++State.ParenLevel;
     }
 
     // If this '[' opens an ObjC call, determine whether all parameters fit into
@@ -620,6 +618,12 @@ private:
         (Current.is(tok::r_brace) && State.NextToken != &RootToken) ||
         State.NextToken->Type == TT_TemplateCloser) {
       State.Stack.pop_back();
+      --State.ParenLevel;
+    }
+
+    // Remove scopes created by fake parenthesis.
+    for (unsigned i = 0, e = Current.FakeRParens; i != e; ++i) {
+      State.Stack.pop_back();
     }
 
     if (State.NextToken->Children.empty())
@@ -760,7 +764,7 @@ private:
       return true;
     if ((State.NextToken->Type == TT_CtorInitializerColon ||
          (State.NextToken->Parent->ClosesTemplateDeclaration &&
-          State.Stack.size() == 1)))
+          State.ParenLevel == 0)))
       return true;
     return false;
   }
@@ -885,8 +889,9 @@ public:
             ++CountBoundToType;
         }
 
-        if (Tok->Type == TT_TemplateCloser && Tok->Parent->Type ==
-            TT_TemplateCloser && Tok->FormatTok.WhiteSpaceLength == 0)
+        if (Tok->Type == TT_TemplateCloser &&
+            Tok->Parent->Type == TT_TemplateCloser &&
+            Tok->FormatTok.WhiteSpaceLength == 0)
           HasCpp03IncompatibleFormat = true;
         Tok = &Tok->Children[0];
       }

Modified: cfe/trunk/lib/Format/TokenAnnotator.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/TokenAnnotator.cpp?rev=174714&r1=174713&r2=174714&view=diff
==============================================================================
--- cfe/trunk/lib/Format/TokenAnnotator.cpp (original)
+++ cfe/trunk/lib/Format/TokenAnnotator.cpp Fri Feb  8 09:28:42 2013
@@ -52,6 +52,12 @@ static const AnnotatedToken *getPrevious
   return getPreviousToken(const_cast<AnnotatedToken &>(Tok));
 }
 
+static bool isTrailingComment(AnnotatedToken *Tok) {
+  return Tok != NULL && Tok->is(tok::comment) &&
+         (Tok->Children.empty() ||
+          Tok->Children[0].FormatTok.NewlinesBefore > 0);
+}
+
 // Returns the next token ignoring comments.
 static const AnnotatedToken *getNextToken(const AnnotatedToken &Tok) {
   if (Tok.Children.empty())
@@ -681,12 +687,91 @@ private:
   bool KeywordVirtualFound;
 };
 
+/// \brief Parses binary expressions by inserting fake parenthesis based on
+/// operator precedence.
+class ExpressionParser {
+public:
+  ExpressionParser(AnnotatedLine &Line) : Current(&Line.First) {}
+
+  /// \brief Parse expressions with the given operatore precedence.
+  void parse(unsigned Precedence = prec::Unknown) {
+    if (Precedence > prec::PointerToMember || Current == NULL)
+      return;
+
+    // Skip over "return" until we can properly parse it.
+    if (Current->is(tok::kw_return))
+      next();
+
+    // Eagerly consume trailing comments.
+    while (isTrailingComment(Current)) {
+      next();
+    }
+
+    AnnotatedToken *Start = Current;
+    bool OperatorFound = false;
+
+    while (Current != NULL) {
+      // Consume operators with higher precedence.
+      parse(Precedence + 1);
+
+      // At the end of the line or when an operator with higher precedence is
+      // found, insert fake parenthesis and return.
+      if (Current == NULL || Current->is(tok::semi) || closesScope(*Current) ||
+          ((Current->Type == TT_BinaryOperator || Current->is(tok::comma)) &&
+           getPrecedence(*Current) < Precedence)) {
+        if (OperatorFound) {
+          ++Start->FakeLParens;
+          if (Current != NULL)
+            ++Current->FakeRParens;
+        }
+        return;
+      }
+
+      // Consume scopes: (), [], <> and {}
+      if (opensScope(*Current)) {
+        while (Current != NULL && !closesScope(*Current)) {
+          next();
+          parse();
+        }
+        next();
+      } else {
+        // Operator found.
+        if (getPrecedence(*Current) == Precedence)
+          OperatorFound = true;
+
+        next();
+      }
+    }
+  }
+
+private:
+  void next() {
+    if (Current != NULL)
+      Current = Current->Children.empty() ? NULL : &Current->Children[0];
+  }
+
+  bool closesScope(const AnnotatedToken &Tok) {
+    return Current->is(tok::r_paren) || Current->Type == TT_TemplateCloser ||
+           Current->is(tok::r_brace) || Current->is(tok::r_square);
+  }
+
+  bool opensScope(const AnnotatedToken &Tok) {
+    return Current->is(tok::l_paren) || Current->Type == TT_TemplateOpener ||
+           Current->is(tok::l_brace) || Current->is(tok::l_square);
+  }
+
+  AnnotatedToken *Current;
+};
+
 void TokenAnnotator::annotate(AnnotatedLine &Line) {
   AnnotatingParser Parser(SourceMgr, Lex, Line);
   Line.Type = Parser.parseLine();
   if (Line.Type == LT_Invalid)
     return;
 
+  ExpressionParser ExprParser(Line);
+  ExprParser.parse();
+
   if (Line.First.Type == TT_ObjCMethodSpecifier)
     Line.Type = LT_ObjCMethodDecl;
   else if (Line.First.Type == TT_ObjCDecl)
@@ -712,8 +797,7 @@ void TokenAnnotator::calculateFormatting
       Current->MustBreakBefore = true;
     } else if (Current->Type == TT_LineComment) {
       Current->MustBreakBefore = Current->FormatTok.NewlinesBefore > 0;
-    } else if ((Current->Parent->is(tok::comment) &&
-                Current->FormatTok.NewlinesBefore > 0) ||
+    } else if (isTrailingComment(Current->Parent) ||
                (Current->is(tok::string_literal) &&
                 Current->Parent->is(tok::string_literal))) {
       Current->MustBreakBefore = true;
@@ -839,8 +923,7 @@ bool TokenAnnotator::spaceRequiredBetwee
            (Left.isNot(tok::star) && Left.isNot(tok::amp) &&
             !Style.PointerBindsToType);
   if (Left.is(tok::amp) || Left.is(tok::star))
-    return Right.FormatTok.Tok.isLiteral() ||
-           Style.PointerBindsToType;
+    return Right.FormatTok.Tok.isLiteral() || Style.PointerBindsToType;
   if (Right.is(tok::star) && Left.is(tok::l_paren))
     return false;
   if (Left.is(tok::l_square) || Right.is(tok::r_square))
@@ -911,8 +994,9 @@ bool TokenAnnotator::spaceRequiredBefore
            (Tok.Parent->isNot(tok::colon) ||
             Tok.Parent->Type != TT_ObjCMethodExpr);
   if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) {
-    return Tok.Type == TT_TemplateCloser && Tok.Parent->Type ==
-           TT_TemplateCloser && Style.Standard != FormatStyle::LS_Cpp11;
+    return Tok.Type == TT_TemplateCloser &&
+           Tok.Parent->Type == TT_TemplateCloser &&
+           Style.Standard != FormatStyle::LS_Cpp11;
   }
   if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator)
     return true;

Modified: cfe/trunk/lib/Format/TokenAnnotator.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/TokenAnnotator.h?rev=174714&r1=174713&r2=174714&view=diff
==============================================================================
--- cfe/trunk/lib/Format/TokenAnnotator.h (original)
+++ cfe/trunk/lib/Format/TokenAnnotator.h Fri Feb  8 09:28:42 2013
@@ -71,7 +71,8 @@ public:
         CanBreakBefore(false), MustBreakBefore(false),
         ClosesTemplateDeclaration(false), MatchingParen(NULL),
         ParameterCount(1), BindingStrength(0), SplitPenalty(0),
-        LongestObjCSelectorName(0), Parent(NULL) {
+        LongestObjCSelectorName(0), Parent(NULL), FakeLParens(0),
+        FakeRParens(0) {
   }
 
   bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); }
@@ -118,6 +119,11 @@ public:
   std::vector<AnnotatedToken> Children;
   AnnotatedToken *Parent;
 
+  /// \brief Insert this many fake ( before this token for correct indentation.
+  unsigned FakeLParens;
+  /// \brief Insert this many fake ) before this token for correct indentation.
+  unsigned FakeRParens;
+
   const AnnotatedToken *getPreviousNoneComment() const {
     AnnotatedToken *Tok = Parent;
     while (Tok != NULL && Tok->is(tok::comment))

Modified: cfe/trunk/unittests/Format/FormatTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=174714&r1=174713&r2=174714&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTest.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTest.cpp Fri Feb  8 09:28:42 2013
@@ -1192,6 +1192,13 @@ TEST_F(FormatTest, BreaksAccordingToOper
   verifyFormat(
       "if ((aaaaaaaaaaaaaaaaaaaaaaaaa || bbbbbbbbbbbbbbbbbbbbbbbbb) &&\n"
       "    ccccccccccccccccccccccccc) {\n}");
+  verifyFormat("return aaaa & AAAAAAAAAAAAAAAAAAAAAAAAAAAAA ||\n"
+               "       bbbb & BBBBBBBBBBBBBBBBBBBBBBBBBBBBB ||\n"
+               "       cccc & CCCCCCCCCCCCCCCCCCCCCCCCCC ||\n"
+               "       dddd & DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD;");
+  verifyFormat("if ((aaaaaaaaaa != aaaaaaaaaaaaaaa ||\n"
+               "     aaaaaaaaaaaaaaaaaaaaaaaa() >= aaaaaaaaaaaaaaaaaaaa) &&\n"
+               "    aaaaaaaaaaaaaaa != aa) {\n}");
 }
 
 TEST_F(FormatTest, BreaksAfterAssignments) {
@@ -1275,7 +1282,7 @@ TEST_F(FormatTest, DeclarationsOfMultipl
   verifyFormat("bool aaaaaaaaaaaaaaaaaaaaaaaaa =\n"
                "    aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa(aaaaaaaaaaaaaaaa),\n"
                "     bbbbbbbbbbbbbbbbbbbbbbbbb =\n"
-               "    bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb(bbbbbbbbbbbbbbbb);");
+               "     bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb(bbbbbbbbbbbbbbbb);");
 
   // FIXME: This is bad as we hide "d".
   verifyFormat(
@@ -1391,6 +1398,9 @@ TEST_F(FormatTest, WrapsTemplateDeclarat
                "virtual void loooooooooooongFunction(int Param1, int Param2);");
   verifyFormat(
       "template <typename T>\n"
+      "using comment_to_xml_conversion = comment_to_xml_conversion<T, int>;");
+  verifyFormat(
+      "template <typename T>\n"
       "void f(int Paaaaaaaaaaaaaaaaaaaaaaaaaaaaaaram1,\n"
       "       int Paaaaaaaaaaaaaaaaaaaaaaaaaaaaaaram2);");
   verifyFormat(





More information about the cfe-commits mailing list