r174714 - Implement a tiny expression parser to improve formatting decisions.
David Blaikie
dblaikie at gmail.com
Fri Feb 8 10:12:50 PST 2013
On Fri, Feb 8, 2013 at 7:28 AM, Daniel Jasper <djasper at google.com> wrote:
> Author: djasper
> Date: Fri Feb 8 09:28:42 2013
> New Revision: 174714
>
> URL: http://llvm.org/viewvc/llvm-project?rev=174714&view=rev
> Log:
> Implement a tiny expression parser to improve formatting decisions.
>
> With this patch, the formatter introduces 'fake' parenthesis according
> to the operator precedence of binary operators.
>
> Before:
> return aaaa & AAAAAAAAAAAAAAAAAAAAAAAAAAAAA || bbbb &
> BBBBBBBBBBBBBBBBBBBBBBBBBBBBB || cccc & CCCCCCCCCCCCCCCCCCCCCCCCCC ||
> dddd & DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD;
> f(aaaaaaaaaaaaaaaaaaaa && aaaaaaaaaaaaaaaaaaaa, aaaaaaaaaaaaaaaaaaaa &&
> aaaaaaaaaaaaaaaaaaaa, aaaaaaaaaaaaaaaaaaaa && aaaaaaaaaaaaaaaaaaaa);
>
> After:
> return aaaa & AAAAAAAAAAAAAAAAAAAAAAAAAAAAA ||
> bbbb & BBBBBBBBBBBBBBBBBBBBBBBBBBBBB ||
> cccc & CCCCCCCCCCCCCCCCCCCCCCCCCC ||
> dddd & DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD;
> f(aaaaaaaaaaaaaaaaaaaa && aaaaaaaaaaaaaaaaaaaa,
> aaaaaaaaaaaaaaaaaaaa && aaaaaaaaaaaaaaaaaaaa,
> aaaaaaaaaaaaaaaaaaaa && aaaaaaaaaaaaaaaaaaaa);
Precedence based line splitting - awesome!
> Future improvements:
> - Get rid of some of the hacky ways to nicely format certain constructs.
> - Merge this parser and the AnnotatingParser as we now have several parsers
> that analyze (), [], etc.
>
> Modified:
> cfe/trunk/lib/Format/Format.cpp
> cfe/trunk/lib/Format/TokenAnnotator.cpp
> cfe/trunk/lib/Format/TokenAnnotator.h
> cfe/trunk/unittests/Format/FormatTest.cpp
>
> Modified: cfe/trunk/lib/Format/Format.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=174714&r1=174713&r2=174714&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Format/Format.cpp (original)
> +++ cfe/trunk/lib/Format/Format.cpp Fri Feb 8 09:28:42 2013
> @@ -205,7 +205,7 @@ private:
> while (I != E) {
> unsigned Spaces = I->Spaces + Column - I->MinColumn;
> storeReplacement(I->Tok, std::string(I->NewLines, '\n') +
> - std::string(Spaces, ' '));
> + std::string(Spaces, ' '));
> ++I;
> }
> }
> @@ -251,6 +251,7 @@ public:
> /*HasMultiParameterLine=*/ false));
> State.VariablePos = 0;
> State.LineContainsContinuedForLoopSection = false;
> + State.ParenLevel = 0;
>
> DEBUG({
> DebugTokenState(*State.NextToken);
> @@ -287,8 +288,8 @@ private:
> struct ParenState {
> ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking,
> bool HasMultiParameterLine)
> - : Indent(Indent), LastSpace(LastSpace), AssignmentColumn(0),
> - FirstLessLess(0), BreakBeforeClosingBrace(false), QuestionColumn(0),
> + : Indent(Indent), LastSpace(LastSpace), FirstLessLess(0),
> + BreakBeforeClosingBrace(false), QuestionColumn(0),
> AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
> HasMultiParameterLine(HasMultiParameterLine), ColonPos(0) {
> }
> @@ -304,9 +305,6 @@ private:
> /// OtherParameter));
> unsigned LastSpace;
>
> - /// \brief This is the column of the first token after an assignment.
> - unsigned AssignmentColumn;
> -
> /// \brief The position the first "<<" operator encountered on each level.
> ///
> /// Used to align "<<" operators. 0 if no such operator has been encountered
> @@ -342,8 +340,6 @@ private:
> return Indent < Other.Indent;
> if (LastSpace != Other.LastSpace)
> return LastSpace < Other.LastSpace;
> - if (AssignmentColumn != Other.AssignmentColumn)
> - return AssignmentColumn < Other.AssignmentColumn;
> if (FirstLessLess != Other.FirstLessLess)
> return FirstLessLess < Other.FirstLessLess;
> if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
> @@ -380,6 +376,9 @@ private:
> /// \brief \c true if this line contains a continued for-loop section.
> bool LineContainsContinuedForLoopSection;
>
> + /// \brief The level of nesting inside (), [], <> and {}.
> + unsigned ParenLevel;
> +
> /// \brief A stack keeping track of properties applying to parenthesis
> /// levels.
> std::vector<ParenState> Stack;
> @@ -395,6 +394,8 @@ private:
> if (Other.LineContainsContinuedForLoopSection !=
> LineContainsContinuedForLoopSection)
> return LineContainsContinuedForLoopSection;
> + if (Other.ParenLevel != ParenLevel)
> + return Other.ParenLevel < ParenLevel;
> return Other.Stack < Stack;
> }
> };
> @@ -411,7 +412,6 @@ private:
> const AnnotatedToken &Current = *State.NextToken;
> const AnnotatedToken &Previous = *State.NextToken->Parent;
> assert(State.Stack.size());
> - unsigned ParenLevel = State.Stack.size() - 1;
>
> if (Current.Type == TT_ImplicitStringLiteral) {
> State.Column += State.NextToken->FormatTok.WhiteSpaceLength +
> @@ -431,9 +431,9 @@ private:
> Previous.is(tok::string_literal)) {
> State.Column = State.Column - Previous.FormatTok.TokenLength;
> } else if (Current.is(tok::lessless) &&
> - State.Stack[ParenLevel].FirstLessLess != 0) {
> - State.Column = State.Stack[ParenLevel].FirstLessLess;
> - } else if (ParenLevel != 0 &&
> + State.Stack.back().FirstLessLess != 0) {
> + State.Column = State.Stack.back().FirstLessLess;
> + } else if (State.ParenLevel != 0 &&
> (Previous.is(tok::equal) || Previous.is(tok::coloncolon) ||
> Current.is(tok::period) || Current.is(tok::arrow) ||
> Current.is(tok::question))) {
> @@ -445,15 +445,12 @@ private:
> } else if (Current.Type == TT_ConditionalExpr) {
> State.Column = State.Stack.back().QuestionColumn;
> } else if (Previous.is(tok::comma) && State.VariablePos != 0 &&
> - ((RootToken.is(tok::kw_for) && ParenLevel == 1) ||
> - ParenLevel == 0)) {
> + ((RootToken.is(tok::kw_for) && State.ParenLevel == 1) ||
> + State.ParenLevel == 0)) {
> State.Column = State.VariablePos;
> } else if (State.NextToken->Parent->ClosesTemplateDeclaration ||
> Current.Type == TT_StartOfName) {
> - State.Column = State.Stack[ParenLevel].Indent - 4;
> - } else if (Previous.Type == TT_BinaryOperator &&
> - State.Stack.back().AssignmentColumn != 0) {
> - State.Column = State.Stack.back().AssignmentColumn;
> + State.Column = State.Stack.back().Indent - 4;
> } else if (Current.Type == TT_ObjCSelectorName) {
> if (State.Stack.back().ColonPos > Current.FormatTok.TokenLength) {
> State.Column =
> @@ -466,7 +463,7 @@ private:
> } else if (Previous.Type == TT_ObjCMethodExpr) {
> State.Column = State.Stack.back().Indent + 4;
> } else {
> - State.Column = State.Stack[ParenLevel].Indent;
> + State.Column = State.Stack.back().Indent;
> }
>
> if (Previous.is(tok::comma) && !State.Stack.back().AvoidBinPacking)
> @@ -484,12 +481,12 @@ private:
> WhitespaceStartColumn, Style);
> }
>
> - State.Stack[ParenLevel].LastSpace = State.Column;
> + State.Stack.back().LastSpace = State.Column;
> if (Current.is(tok::colon) && Current.Type != TT_ConditionalExpr)
> - State.Stack[ParenLevel].Indent += 2;
> + State.Stack.back().Indent += 2;
> } else {
> if (Current.is(tok::equal) &&
> - (RootToken.is(tok::kw_for) || ParenLevel == 0))
> + (RootToken.is(tok::kw_for) || State.ParenLevel == 0))
> State.VariablePos = State.Column - Previous.FormatTok.TokenLength;
>
> unsigned Spaces = State.NextToken->SpaceRequiredBefore ? 1 : 0;
> @@ -510,26 +507,19 @@ private:
> State.Column + Spaces + Current.FormatTok.TokenLength;
> }
>
> - // FIXME: Do we need to do this for assignments nested in other
> - // expressions?
> - if (RootToken.isNot(tok::kw_for) && ParenLevel == 0 &&
> - !isTrailingComment(Current) &&
> - (getPrecedence(Previous) == prec::Assignment ||
> - Previous.is(tok::kw_return)))
> - State.Stack.back().AssignmentColumn = State.Column + Spaces;
> if (Current.Type != TT_LineComment &&
> (Previous.is(tok::l_paren) || Previous.is(tok::l_brace) ||
> State.NextToken->Parent->Type == TT_TemplateOpener))
> - State.Stack[ParenLevel].Indent = State.Column + Spaces;
> + State.Stack.back().Indent = State.Column + Spaces;
> if (Previous.is(tok::comma) && !isTrailingComment(Current))
> - State.Stack[ParenLevel].HasMultiParameterLine = true;
> + State.Stack.back().HasMultiParameterLine = true;
>
> State.Column += Spaces;
> if (Current.is(tok::l_paren) && Previous.is(tok::kw_if))
> // Treat the condition inside an if as if it was a second function
> // parameter, i.e. let nested calls have an indent of 4.
> State.Stack.back().LastSpace = State.Column + 1; // 1 is length of "(".
> - else if (Previous.is(tok::comma) && ParenLevel != 0)
> + else if (Previous.is(tok::comma) && State.ParenLevel != 0)
> // Top-level spaces are exempt as that mostly leads to better results.
> State.Stack.back().LastSpace = State.Column;
> else if ((Previous.Type == TT_BinaryOperator ||
> @@ -551,7 +541,7 @@ private:
> State.Stack.back().BreakBeforeClosingBrace = true;
>
> if (State.Stack.back().AvoidBinPacking && Newline &&
> - (Line.First.isNot(tok::kw_for) || ParenLevel != 1)) {
> + (Line.First.isNot(tok::kw_for) || State.ParenLevel != 1)) {
> // If we are breaking after '(', '{', '<', this is not bin packing unless
> // AllowAllParametersOfDeclarationOnNextLine is false.
> if ((Previous.isNot(tok::l_paren) && Previous.isNot(tok::l_brace) &&
> @@ -587,6 +577,13 @@ private:
> State.Stack.back().BreakBeforeParameter = true;
> }
>
> + // Insert scopes created by fake parenthesis.
> + for (unsigned i = 0, e = Current.FakeLParens; i != e; ++i) {
> + ParenState NewParenState = State.Stack.back();
> + NewParenState.Indent = std::max(State.Column, State.Stack.back().Indent);
> + State.Stack.push_back(NewParenState);
> + }
> +
> // If we encounter an opening (, [, { or <, we add a level to our stacks to
> // prepare for the following tokens.
> if (Current.is(tok::l_paren) || Current.is(tok::l_square) ||
> @@ -604,6 +601,7 @@ private:
> State.Stack.push_back(
> ParenState(NewIndent, State.Stack.back().LastSpace, AvoidBinPacking,
> State.Stack.back().HasMultiParameterLine));
> + ++State.ParenLevel;
> }
>
> // If this '[' opens an ObjC call, determine whether all parameters fit into
> @@ -620,6 +618,12 @@ private:
> (Current.is(tok::r_brace) && State.NextToken != &RootToken) ||
> State.NextToken->Type == TT_TemplateCloser) {
> State.Stack.pop_back();
> + --State.ParenLevel;
> + }
> +
> + // Remove scopes created by fake parenthesis.
> + for (unsigned i = 0, e = Current.FakeRParens; i != e; ++i) {
> + State.Stack.pop_back();
> }
>
> if (State.NextToken->Children.empty())
> @@ -760,7 +764,7 @@ private:
> return true;
> if ((State.NextToken->Type == TT_CtorInitializerColon ||
> (State.NextToken->Parent->ClosesTemplateDeclaration &&
> - State.Stack.size() == 1)))
> + State.ParenLevel == 0)))
> return true;
> return false;
> }
> @@ -885,8 +889,9 @@ public:
> ++CountBoundToType;
> }
>
> - if (Tok->Type == TT_TemplateCloser && Tok->Parent->Type ==
> - TT_TemplateCloser && Tok->FormatTok.WhiteSpaceLength == 0)
> + if (Tok->Type == TT_TemplateCloser &&
> + Tok->Parent->Type == TT_TemplateCloser &&
> + Tok->FormatTok.WhiteSpaceLength == 0)
> HasCpp03IncompatibleFormat = true;
> Tok = &Tok->Children[0];
> }
>
> Modified: cfe/trunk/lib/Format/TokenAnnotator.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/TokenAnnotator.cpp?rev=174714&r1=174713&r2=174714&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Format/TokenAnnotator.cpp (original)
> +++ cfe/trunk/lib/Format/TokenAnnotator.cpp Fri Feb 8 09:28:42 2013
> @@ -52,6 +52,12 @@ static const AnnotatedToken *getPrevious
> return getPreviousToken(const_cast<AnnotatedToken &>(Tok));
> }
>
> +static bool isTrailingComment(AnnotatedToken *Tok) {
> + return Tok != NULL && Tok->is(tok::comment) &&
> + (Tok->Children.empty() ||
> + Tok->Children[0].FormatTok.NewlinesBefore > 0);
> +}
> +
> // Returns the next token ignoring comments.
> static const AnnotatedToken *getNextToken(const AnnotatedToken &Tok) {
> if (Tok.Children.empty())
> @@ -681,12 +687,91 @@ private:
> bool KeywordVirtualFound;
> };
>
> +/// \brief Parses binary expressions by inserting fake parenthesis based on
> +/// operator precedence.
> +class ExpressionParser {
> +public:
> + ExpressionParser(AnnotatedLine &Line) : Current(&Line.First) {}
> +
> + /// \brief Parse expressions with the given operatore precedence.
> + void parse(unsigned Precedence = prec::Unknown) {
> + if (Precedence > prec::PointerToMember || Current == NULL)
> + return;
> +
> + // Skip over "return" until we can properly parse it.
> + if (Current->is(tok::kw_return))
> + next();
> +
> + // Eagerly consume trailing comments.
> + while (isTrailingComment(Current)) {
> + next();
> + }
> +
> + AnnotatedToken *Start = Current;
> + bool OperatorFound = false;
> +
> + while (Current != NULL) {
> + // Consume operators with higher precedence.
> + parse(Precedence + 1);
> +
> + // At the end of the line or when an operator with higher precedence is
> + // found, insert fake parenthesis and return.
> + if (Current == NULL || Current->is(tok::semi) || closesScope(*Current) ||
> + ((Current->Type == TT_BinaryOperator || Current->is(tok::comma)) &&
> + getPrecedence(*Current) < Precedence)) {
> + if (OperatorFound) {
> + ++Start->FakeLParens;
> + if (Current != NULL)
> + ++Current->FakeRParens;
> + }
> + return;
> + }
> +
> + // Consume scopes: (), [], <> and {}
> + if (opensScope(*Current)) {
> + while (Current != NULL && !closesScope(*Current)) {
> + next();
> + parse();
> + }
> + next();
> + } else {
> + // Operator found.
> + if (getPrecedence(*Current) == Precedence)
> + OperatorFound = true;
> +
> + next();
> + }
> + }
> + }
> +
> +private:
> + void next() {
> + if (Current != NULL)
> + Current = Current->Children.empty() ? NULL : &Current->Children[0];
> + }
> +
> + bool closesScope(const AnnotatedToken &Tok) {
> + return Current->is(tok::r_paren) || Current->Type == TT_TemplateCloser ||
> + Current->is(tok::r_brace) || Current->is(tok::r_square);
> + }
> +
> + bool opensScope(const AnnotatedToken &Tok) {
> + return Current->is(tok::l_paren) || Current->Type == TT_TemplateOpener ||
> + Current->is(tok::l_brace) || Current->is(tok::l_square);
> + }
> +
> + AnnotatedToken *Current;
> +};
> +
> void TokenAnnotator::annotate(AnnotatedLine &Line) {
> AnnotatingParser Parser(SourceMgr, Lex, Line);
> Line.Type = Parser.parseLine();
> if (Line.Type == LT_Invalid)
> return;
>
> + ExpressionParser ExprParser(Line);
> + ExprParser.parse();
> +
> if (Line.First.Type == TT_ObjCMethodSpecifier)
> Line.Type = LT_ObjCMethodDecl;
> else if (Line.First.Type == TT_ObjCDecl)
> @@ -712,8 +797,7 @@ void TokenAnnotator::calculateFormatting
> Current->MustBreakBefore = true;
> } else if (Current->Type == TT_LineComment) {
> Current->MustBreakBefore = Current->FormatTok.NewlinesBefore > 0;
> - } else if ((Current->Parent->is(tok::comment) &&
> - Current->FormatTok.NewlinesBefore > 0) ||
> + } else if (isTrailingComment(Current->Parent) ||
> (Current->is(tok::string_literal) &&
> Current->Parent->is(tok::string_literal))) {
> Current->MustBreakBefore = true;
> @@ -839,8 +923,7 @@ bool TokenAnnotator::spaceRequiredBetwee
> (Left.isNot(tok::star) && Left.isNot(tok::amp) &&
> !Style.PointerBindsToType);
> if (Left.is(tok::amp) || Left.is(tok::star))
> - return Right.FormatTok.Tok.isLiteral() ||
> - Style.PointerBindsToType;
> + return Right.FormatTok.Tok.isLiteral() || Style.PointerBindsToType;
> if (Right.is(tok::star) && Left.is(tok::l_paren))
> return false;
> if (Left.is(tok::l_square) || Right.is(tok::r_square))
> @@ -911,8 +994,9 @@ bool TokenAnnotator::spaceRequiredBefore
> (Tok.Parent->isNot(tok::colon) ||
> Tok.Parent->Type != TT_ObjCMethodExpr);
> if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) {
> - return Tok.Type == TT_TemplateCloser && Tok.Parent->Type ==
> - TT_TemplateCloser && Style.Standard != FormatStyle::LS_Cpp11;
> + return Tok.Type == TT_TemplateCloser &&
> + Tok.Parent->Type == TT_TemplateCloser &&
> + Style.Standard != FormatStyle::LS_Cpp11;
> }
> if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator)
> return true;
>
> Modified: cfe/trunk/lib/Format/TokenAnnotator.h
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/TokenAnnotator.h?rev=174714&r1=174713&r2=174714&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Format/TokenAnnotator.h (original)
> +++ cfe/trunk/lib/Format/TokenAnnotator.h Fri Feb 8 09:28:42 2013
> @@ -71,7 +71,8 @@ public:
> CanBreakBefore(false), MustBreakBefore(false),
> ClosesTemplateDeclaration(false), MatchingParen(NULL),
> ParameterCount(1), BindingStrength(0), SplitPenalty(0),
> - LongestObjCSelectorName(0), Parent(NULL) {
> + LongestObjCSelectorName(0), Parent(NULL), FakeLParens(0),
> + FakeRParens(0) {
> }
>
> bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); }
> @@ -118,6 +119,11 @@ public:
> std::vector<AnnotatedToken> Children;
> AnnotatedToken *Parent;
>
> + /// \brief Insert this many fake ( before this token for correct indentation.
> + unsigned FakeLParens;
> + /// \brief Insert this many fake ) before this token for correct indentation.
> + unsigned FakeRParens;
> +
> const AnnotatedToken *getPreviousNoneComment() const {
> AnnotatedToken *Tok = Parent;
> while (Tok != NULL && Tok->is(tok::comment))
>
> Modified: cfe/trunk/unittests/Format/FormatTest.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=174714&r1=174713&r2=174714&view=diff
> ==============================================================================
> --- cfe/trunk/unittests/Format/FormatTest.cpp (original)
> +++ cfe/trunk/unittests/Format/FormatTest.cpp Fri Feb 8 09:28:42 2013
> @@ -1192,6 +1192,13 @@ TEST_F(FormatTest, BreaksAccordingToOper
> verifyFormat(
> "if ((aaaaaaaaaaaaaaaaaaaaaaaaa || bbbbbbbbbbbbbbbbbbbbbbbbb) &&\n"
> " ccccccccccccccccccccccccc) {\n}");
> + verifyFormat("return aaaa & AAAAAAAAAAAAAAAAAAAAAAAAAAAAA ||\n"
> + " bbbb & BBBBBBBBBBBBBBBBBBBBBBBBBBBBB ||\n"
> + " cccc & CCCCCCCCCCCCCCCCCCCCCCCCCC ||\n"
> + " dddd & DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD;");
> + verifyFormat("if ((aaaaaaaaaa != aaaaaaaaaaaaaaa ||\n"
> + " aaaaaaaaaaaaaaaaaaaaaaaa() >= aaaaaaaaaaaaaaaaaaaa) &&\n"
> + " aaaaaaaaaaaaaaa != aa) {\n}");
> }
>
> TEST_F(FormatTest, BreaksAfterAssignments) {
> @@ -1275,7 +1282,7 @@ TEST_F(FormatTest, DeclarationsOfMultipl
> verifyFormat("bool aaaaaaaaaaaaaaaaaaaaaaaaa =\n"
> " aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa(aaaaaaaaaaaaaaaa),\n"
> " bbbbbbbbbbbbbbbbbbbbbbbbb =\n"
> - " bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb(bbbbbbbbbbbbbbbb);");
> + " bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb(bbbbbbbbbbbbbbbb);");
>
> // FIXME: This is bad as we hide "d".
> verifyFormat(
> @@ -1391,6 +1398,9 @@ TEST_F(FormatTest, WrapsTemplateDeclarat
> "virtual void loooooooooooongFunction(int Param1, int Param2);");
> verifyFormat(
> "template <typename T>\n"
> + "using comment_to_xml_conversion = comment_to_xml_conversion<T, int>;");
> + verifyFormat(
> + "template <typename T>\n"
> "void f(int Paaaaaaaaaaaaaaaaaaaaaaaaaaaaaaram1,\n"
> " int Paaaaaaaaaaaaaaaaaaaaaaaaaaaaaaram2);");
> verifyFormat(
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
More information about the cfe-commits
mailing list