r174714 - Implement a tiny expression parser to improve formatting decisions.
Daniel Jasper
djasper at google.com
Fri Feb 8 07:28:42 PST 2013
Author: djasper
Date: Fri Feb 8 09:28:42 2013
New Revision: 174714
URL: http://llvm.org/viewvc/llvm-project?rev=174714&view=rev
Log:
Implement a tiny expression parser to improve formatting decisions.
With this patch, the formatter introduces 'fake' parenthesis according
to the operator precedence of binary operators.
Before:
return aaaa & AAAAAAAAAAAAAAAAAAAAAAAAAAAAA || bbbb &
BBBBBBBBBBBBBBBBBBBBBBBBBBBBB || cccc & CCCCCCCCCCCCCCCCCCCCCCCCCC ||
dddd & DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD;
f(aaaaaaaaaaaaaaaaaaaa && aaaaaaaaaaaaaaaaaaaa, aaaaaaaaaaaaaaaaaaaa &&
aaaaaaaaaaaaaaaaaaaa, aaaaaaaaaaaaaaaaaaaa && aaaaaaaaaaaaaaaaaaaa);
After:
return aaaa & AAAAAAAAAAAAAAAAAAAAAAAAAAAAA ||
bbbb & BBBBBBBBBBBBBBBBBBBBBBBBBBBBB ||
cccc & CCCCCCCCCCCCCCCCCCCCCCCCCC ||
dddd & DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD;
f(aaaaaaaaaaaaaaaaaaaa && aaaaaaaaaaaaaaaaaaaa,
aaaaaaaaaaaaaaaaaaaa && aaaaaaaaaaaaaaaaaaaa,
aaaaaaaaaaaaaaaaaaaa && aaaaaaaaaaaaaaaaaaaa);
Future improvements:
- Get rid of some of the hacky ways to nicely format certain constructs.
- Merge this parser and the AnnotatingParser as we now have several parsers
that analyze (), [], etc.
Modified:
cfe/trunk/lib/Format/Format.cpp
cfe/trunk/lib/Format/TokenAnnotator.cpp
cfe/trunk/lib/Format/TokenAnnotator.h
cfe/trunk/unittests/Format/FormatTest.cpp
Modified: cfe/trunk/lib/Format/Format.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=174714&r1=174713&r2=174714&view=diff
==============================================================================
--- cfe/trunk/lib/Format/Format.cpp (original)
+++ cfe/trunk/lib/Format/Format.cpp Fri Feb 8 09:28:42 2013
@@ -205,7 +205,7 @@ private:
while (I != E) {
unsigned Spaces = I->Spaces + Column - I->MinColumn;
storeReplacement(I->Tok, std::string(I->NewLines, '\n') +
- std::string(Spaces, ' '));
+ std::string(Spaces, ' '));
++I;
}
}
@@ -251,6 +251,7 @@ public:
/*HasMultiParameterLine=*/ false));
State.VariablePos = 0;
State.LineContainsContinuedForLoopSection = false;
+ State.ParenLevel = 0;
DEBUG({
DebugTokenState(*State.NextToken);
@@ -287,8 +288,8 @@ private:
struct ParenState {
ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking,
bool HasMultiParameterLine)
- : Indent(Indent), LastSpace(LastSpace), AssignmentColumn(0),
- FirstLessLess(0), BreakBeforeClosingBrace(false), QuestionColumn(0),
+ : Indent(Indent), LastSpace(LastSpace), FirstLessLess(0),
+ BreakBeforeClosingBrace(false), QuestionColumn(0),
AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
HasMultiParameterLine(HasMultiParameterLine), ColonPos(0) {
}
@@ -304,9 +305,6 @@ private:
/// OtherParameter));
unsigned LastSpace;
- /// \brief This is the column of the first token after an assignment.
- unsigned AssignmentColumn;
-
/// \brief The position the first "<<" operator encountered on each level.
///
/// Used to align "<<" operators. 0 if no such operator has been encountered
@@ -342,8 +340,6 @@ private:
return Indent < Other.Indent;
if (LastSpace != Other.LastSpace)
return LastSpace < Other.LastSpace;
- if (AssignmentColumn != Other.AssignmentColumn)
- return AssignmentColumn < Other.AssignmentColumn;
if (FirstLessLess != Other.FirstLessLess)
return FirstLessLess < Other.FirstLessLess;
if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
@@ -380,6 +376,9 @@ private:
/// \brief \c true if this line contains a continued for-loop section.
bool LineContainsContinuedForLoopSection;
+ /// \brief The level of nesting inside (), [], <> and {}.
+ unsigned ParenLevel;
+
/// \brief A stack keeping track of properties applying to parenthesis
/// levels.
std::vector<ParenState> Stack;
@@ -395,6 +394,8 @@ private:
if (Other.LineContainsContinuedForLoopSection !=
LineContainsContinuedForLoopSection)
return LineContainsContinuedForLoopSection;
+ if (Other.ParenLevel != ParenLevel)
+ return Other.ParenLevel < ParenLevel;
return Other.Stack < Stack;
}
};
@@ -411,7 +412,6 @@ private:
const AnnotatedToken &Current = *State.NextToken;
const AnnotatedToken &Previous = *State.NextToken->Parent;
assert(State.Stack.size());
- unsigned ParenLevel = State.Stack.size() - 1;
if (Current.Type == TT_ImplicitStringLiteral) {
State.Column += State.NextToken->FormatTok.WhiteSpaceLength +
@@ -431,9 +431,9 @@ private:
Previous.is(tok::string_literal)) {
State.Column = State.Column - Previous.FormatTok.TokenLength;
} else if (Current.is(tok::lessless) &&
- State.Stack[ParenLevel].FirstLessLess != 0) {
- State.Column = State.Stack[ParenLevel].FirstLessLess;
- } else if (ParenLevel != 0 &&
+ State.Stack.back().FirstLessLess != 0) {
+ State.Column = State.Stack.back().FirstLessLess;
+ } else if (State.ParenLevel != 0 &&
(Previous.is(tok::equal) || Previous.is(tok::coloncolon) ||
Current.is(tok::period) || Current.is(tok::arrow) ||
Current.is(tok::question))) {
@@ -445,15 +445,12 @@ private:
} else if (Current.Type == TT_ConditionalExpr) {
State.Column = State.Stack.back().QuestionColumn;
} else if (Previous.is(tok::comma) && State.VariablePos != 0 &&
- ((RootToken.is(tok::kw_for) && ParenLevel == 1) ||
- ParenLevel == 0)) {
+ ((RootToken.is(tok::kw_for) && State.ParenLevel == 1) ||
+ State.ParenLevel == 0)) {
State.Column = State.VariablePos;
} else if (State.NextToken->Parent->ClosesTemplateDeclaration ||
Current.Type == TT_StartOfName) {
- State.Column = State.Stack[ParenLevel].Indent - 4;
- } else if (Previous.Type == TT_BinaryOperator &&
- State.Stack.back().AssignmentColumn != 0) {
- State.Column = State.Stack.back().AssignmentColumn;
+ State.Column = State.Stack.back().Indent - 4;
} else if (Current.Type == TT_ObjCSelectorName) {
if (State.Stack.back().ColonPos > Current.FormatTok.TokenLength) {
State.Column =
@@ -466,7 +463,7 @@ private:
} else if (Previous.Type == TT_ObjCMethodExpr) {
State.Column = State.Stack.back().Indent + 4;
} else {
- State.Column = State.Stack[ParenLevel].Indent;
+ State.Column = State.Stack.back().Indent;
}
if (Previous.is(tok::comma) && !State.Stack.back().AvoidBinPacking)
@@ -484,12 +481,12 @@ private:
WhitespaceStartColumn, Style);
}
- State.Stack[ParenLevel].LastSpace = State.Column;
+ State.Stack.back().LastSpace = State.Column;
if (Current.is(tok::colon) && Current.Type != TT_ConditionalExpr)
- State.Stack[ParenLevel].Indent += 2;
+ State.Stack.back().Indent += 2;
} else {
if (Current.is(tok::equal) &&
- (RootToken.is(tok::kw_for) || ParenLevel == 0))
+ (RootToken.is(tok::kw_for) || State.ParenLevel == 0))
State.VariablePos = State.Column - Previous.FormatTok.TokenLength;
unsigned Spaces = State.NextToken->SpaceRequiredBefore ? 1 : 0;
@@ -510,26 +507,19 @@ private:
State.Column + Spaces + Current.FormatTok.TokenLength;
}
- // FIXME: Do we need to do this for assignments nested in other
- // expressions?
- if (RootToken.isNot(tok::kw_for) && ParenLevel == 0 &&
- !isTrailingComment(Current) &&
- (getPrecedence(Previous) == prec::Assignment ||
- Previous.is(tok::kw_return)))
- State.Stack.back().AssignmentColumn = State.Column + Spaces;
if (Current.Type != TT_LineComment &&
(Previous.is(tok::l_paren) || Previous.is(tok::l_brace) ||
State.NextToken->Parent->Type == TT_TemplateOpener))
- State.Stack[ParenLevel].Indent = State.Column + Spaces;
+ State.Stack.back().Indent = State.Column + Spaces;
if (Previous.is(tok::comma) && !isTrailingComment(Current))
- State.Stack[ParenLevel].HasMultiParameterLine = true;
+ State.Stack.back().HasMultiParameterLine = true;
State.Column += Spaces;
if (Current.is(tok::l_paren) && Previous.is(tok::kw_if))
// Treat the condition inside an if as if it was a second function
// parameter, i.e. let nested calls have an indent of 4.
State.Stack.back().LastSpace = State.Column + 1; // 1 is length of "(".
- else if (Previous.is(tok::comma) && ParenLevel != 0)
+ else if (Previous.is(tok::comma) && State.ParenLevel != 0)
// Top-level spaces are exempt as that mostly leads to better results.
State.Stack.back().LastSpace = State.Column;
else if ((Previous.Type == TT_BinaryOperator ||
@@ -551,7 +541,7 @@ private:
State.Stack.back().BreakBeforeClosingBrace = true;
if (State.Stack.back().AvoidBinPacking && Newline &&
- (Line.First.isNot(tok::kw_for) || ParenLevel != 1)) {
+ (Line.First.isNot(tok::kw_for) || State.ParenLevel != 1)) {
// If we are breaking after '(', '{', '<', this is not bin packing unless
// AllowAllParametersOfDeclarationOnNextLine is false.
if ((Previous.isNot(tok::l_paren) && Previous.isNot(tok::l_brace) &&
@@ -587,6 +577,13 @@ private:
State.Stack.back().BreakBeforeParameter = true;
}
+ // Insert scopes created by fake parenthesis.
+ for (unsigned i = 0, e = Current.FakeLParens; i != e; ++i) {
+ ParenState NewParenState = State.Stack.back();
+ NewParenState.Indent = std::max(State.Column, State.Stack.back().Indent);
+ State.Stack.push_back(NewParenState);
+ }
+
// If we encounter an opening (, [, { or <, we add a level to our stacks to
// prepare for the following tokens.
if (Current.is(tok::l_paren) || Current.is(tok::l_square) ||
@@ -604,6 +601,7 @@ private:
State.Stack.push_back(
ParenState(NewIndent, State.Stack.back().LastSpace, AvoidBinPacking,
State.Stack.back().HasMultiParameterLine));
+ ++State.ParenLevel;
}
// If this '[' opens an ObjC call, determine whether all parameters fit into
@@ -620,6 +618,12 @@ private:
(Current.is(tok::r_brace) && State.NextToken != &RootToken) ||
State.NextToken->Type == TT_TemplateCloser) {
State.Stack.pop_back();
+ --State.ParenLevel;
+ }
+
+ // Remove scopes created by fake parenthesis.
+ for (unsigned i = 0, e = Current.FakeRParens; i != e; ++i) {
+ State.Stack.pop_back();
}
if (State.NextToken->Children.empty())
@@ -760,7 +764,7 @@ private:
return true;
if ((State.NextToken->Type == TT_CtorInitializerColon ||
(State.NextToken->Parent->ClosesTemplateDeclaration &&
- State.Stack.size() == 1)))
+ State.ParenLevel == 0)))
return true;
return false;
}
@@ -885,8 +889,9 @@ public:
++CountBoundToType;
}
- if (Tok->Type == TT_TemplateCloser && Tok->Parent->Type ==
- TT_TemplateCloser && Tok->FormatTok.WhiteSpaceLength == 0)
+ if (Tok->Type == TT_TemplateCloser &&
+ Tok->Parent->Type == TT_TemplateCloser &&
+ Tok->FormatTok.WhiteSpaceLength == 0)
HasCpp03IncompatibleFormat = true;
Tok = &Tok->Children[0];
}
Modified: cfe/trunk/lib/Format/TokenAnnotator.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/TokenAnnotator.cpp?rev=174714&r1=174713&r2=174714&view=diff
==============================================================================
--- cfe/trunk/lib/Format/TokenAnnotator.cpp (original)
+++ cfe/trunk/lib/Format/TokenAnnotator.cpp Fri Feb 8 09:28:42 2013
@@ -52,6 +52,12 @@ static const AnnotatedToken *getPrevious
return getPreviousToken(const_cast<AnnotatedToken &>(Tok));
}
+static bool isTrailingComment(AnnotatedToken *Tok) {
+ return Tok != NULL && Tok->is(tok::comment) &&
+ (Tok->Children.empty() ||
+ Tok->Children[0].FormatTok.NewlinesBefore > 0);
+}
+
// Returns the next token ignoring comments.
static const AnnotatedToken *getNextToken(const AnnotatedToken &Tok) {
if (Tok.Children.empty())
@@ -681,12 +687,91 @@ private:
bool KeywordVirtualFound;
};
+/// \brief Parses binary expressions by inserting fake parenthesis based on
+/// operator precedence.
+class ExpressionParser {
+public:
+ ExpressionParser(AnnotatedLine &Line) : Current(&Line.First) {}
+
+ /// \brief Parse expressions with the given operatore precedence.
+ void parse(unsigned Precedence = prec::Unknown) {
+ if (Precedence > prec::PointerToMember || Current == NULL)
+ return;
+
+ // Skip over "return" until we can properly parse it.
+ if (Current->is(tok::kw_return))
+ next();
+
+ // Eagerly consume trailing comments.
+ while (isTrailingComment(Current)) {
+ next();
+ }
+
+ AnnotatedToken *Start = Current;
+ bool OperatorFound = false;
+
+ while (Current != NULL) {
+ // Consume operators with higher precedence.
+ parse(Precedence + 1);
+
+ // At the end of the line or when an operator with higher precedence is
+ // found, insert fake parenthesis and return.
+ if (Current == NULL || Current->is(tok::semi) || closesScope(*Current) ||
+ ((Current->Type == TT_BinaryOperator || Current->is(tok::comma)) &&
+ getPrecedence(*Current) < Precedence)) {
+ if (OperatorFound) {
+ ++Start->FakeLParens;
+ if (Current != NULL)
+ ++Current->FakeRParens;
+ }
+ return;
+ }
+
+ // Consume scopes: (), [], <> and {}
+ if (opensScope(*Current)) {
+ while (Current != NULL && !closesScope(*Current)) {
+ next();
+ parse();
+ }
+ next();
+ } else {
+ // Operator found.
+ if (getPrecedence(*Current) == Precedence)
+ OperatorFound = true;
+
+ next();
+ }
+ }
+ }
+
+private:
+ void next() {
+ if (Current != NULL)
+ Current = Current->Children.empty() ? NULL : &Current->Children[0];
+ }
+
+ bool closesScope(const AnnotatedToken &Tok) {
+ return Current->is(tok::r_paren) || Current->Type == TT_TemplateCloser ||
+ Current->is(tok::r_brace) || Current->is(tok::r_square);
+ }
+
+ bool opensScope(const AnnotatedToken &Tok) {
+ return Current->is(tok::l_paren) || Current->Type == TT_TemplateOpener ||
+ Current->is(tok::l_brace) || Current->is(tok::l_square);
+ }
+
+ AnnotatedToken *Current;
+};
+
void TokenAnnotator::annotate(AnnotatedLine &Line) {
AnnotatingParser Parser(SourceMgr, Lex, Line);
Line.Type = Parser.parseLine();
if (Line.Type == LT_Invalid)
return;
+ ExpressionParser ExprParser(Line);
+ ExprParser.parse();
+
if (Line.First.Type == TT_ObjCMethodSpecifier)
Line.Type = LT_ObjCMethodDecl;
else if (Line.First.Type == TT_ObjCDecl)
@@ -712,8 +797,7 @@ void TokenAnnotator::calculateFormatting
Current->MustBreakBefore = true;
} else if (Current->Type == TT_LineComment) {
Current->MustBreakBefore = Current->FormatTok.NewlinesBefore > 0;
- } else if ((Current->Parent->is(tok::comment) &&
- Current->FormatTok.NewlinesBefore > 0) ||
+ } else if (isTrailingComment(Current->Parent) ||
(Current->is(tok::string_literal) &&
Current->Parent->is(tok::string_literal))) {
Current->MustBreakBefore = true;
@@ -839,8 +923,7 @@ bool TokenAnnotator::spaceRequiredBetwee
(Left.isNot(tok::star) && Left.isNot(tok::amp) &&
!Style.PointerBindsToType);
if (Left.is(tok::amp) || Left.is(tok::star))
- return Right.FormatTok.Tok.isLiteral() ||
- Style.PointerBindsToType;
+ return Right.FormatTok.Tok.isLiteral() || Style.PointerBindsToType;
if (Right.is(tok::star) && Left.is(tok::l_paren))
return false;
if (Left.is(tok::l_square) || Right.is(tok::r_square))
@@ -911,8 +994,9 @@ bool TokenAnnotator::spaceRequiredBefore
(Tok.Parent->isNot(tok::colon) ||
Tok.Parent->Type != TT_ObjCMethodExpr);
if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) {
- return Tok.Type == TT_TemplateCloser && Tok.Parent->Type ==
- TT_TemplateCloser && Style.Standard != FormatStyle::LS_Cpp11;
+ return Tok.Type == TT_TemplateCloser &&
+ Tok.Parent->Type == TT_TemplateCloser &&
+ Style.Standard != FormatStyle::LS_Cpp11;
}
if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator)
return true;
Modified: cfe/trunk/lib/Format/TokenAnnotator.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/TokenAnnotator.h?rev=174714&r1=174713&r2=174714&view=diff
==============================================================================
--- cfe/trunk/lib/Format/TokenAnnotator.h (original)
+++ cfe/trunk/lib/Format/TokenAnnotator.h Fri Feb 8 09:28:42 2013
@@ -71,7 +71,8 @@ public:
CanBreakBefore(false), MustBreakBefore(false),
ClosesTemplateDeclaration(false), MatchingParen(NULL),
ParameterCount(1), BindingStrength(0), SplitPenalty(0),
- LongestObjCSelectorName(0), Parent(NULL) {
+ LongestObjCSelectorName(0), Parent(NULL), FakeLParens(0),
+ FakeRParens(0) {
}
bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); }
@@ -118,6 +119,11 @@ public:
std::vector<AnnotatedToken> Children;
AnnotatedToken *Parent;
+ /// \brief Insert this many fake ( before this token for correct indentation.
+ unsigned FakeLParens;
+ /// \brief Insert this many fake ) before this token for correct indentation.
+ unsigned FakeRParens;
+
const AnnotatedToken *getPreviousNoneComment() const {
AnnotatedToken *Tok = Parent;
while (Tok != NULL && Tok->is(tok::comment))
Modified: cfe/trunk/unittests/Format/FormatTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=174714&r1=174713&r2=174714&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTest.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTest.cpp Fri Feb 8 09:28:42 2013
@@ -1192,6 +1192,13 @@ TEST_F(FormatTest, BreaksAccordingToOper
verifyFormat(
"if ((aaaaaaaaaaaaaaaaaaaaaaaaa || bbbbbbbbbbbbbbbbbbbbbbbbb) &&\n"
" ccccccccccccccccccccccccc) {\n}");
+ verifyFormat("return aaaa & AAAAAAAAAAAAAAAAAAAAAAAAAAAAA ||\n"
+ " bbbb & BBBBBBBBBBBBBBBBBBBBBBBBBBBBB ||\n"
+ " cccc & CCCCCCCCCCCCCCCCCCCCCCCCCC ||\n"
+ " dddd & DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD;");
+ verifyFormat("if ((aaaaaaaaaa != aaaaaaaaaaaaaaa ||\n"
+ " aaaaaaaaaaaaaaaaaaaaaaaa() >= aaaaaaaaaaaaaaaaaaaa) &&\n"
+ " aaaaaaaaaaaaaaa != aa) {\n}");
}
TEST_F(FormatTest, BreaksAfterAssignments) {
@@ -1275,7 +1282,7 @@ TEST_F(FormatTest, DeclarationsOfMultipl
verifyFormat("bool aaaaaaaaaaaaaaaaaaaaaaaaa =\n"
" aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa(aaaaaaaaaaaaaaaa),\n"
" bbbbbbbbbbbbbbbbbbbbbbbbb =\n"
- " bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb(bbbbbbbbbbbbbbbb);");
+ " bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb(bbbbbbbbbbbbbbbb);");
// FIXME: This is bad as we hide "d".
verifyFormat(
@@ -1391,6 +1398,9 @@ TEST_F(FormatTest, WrapsTemplateDeclarat
"virtual void loooooooooooongFunction(int Param1, int Param2);");
verifyFormat(
"template <typename T>\n"
+ "using comment_to_xml_conversion = comment_to_xml_conversion<T, int>;");
+ verifyFormat(
+ "template <typename T>\n"
"void f(int Paaaaaaaaaaaaaaaaaaaaaaaaaaaaaaram1,\n"
" int Paaaaaaaaaaaaaaaaaaaaaaaaaaaaaaram2);");
verifyFormat(
More information about the cfe-commits
mailing list