r174166 - Revamp of the basic layouting algorithm in clang-format.
Timur Iskhodzhanov
timurrrr at google.com
Fri Feb 1 03:16:51 PST 2013
Hi Daniel,
I'm afraid your change breaks the Win build:
Format.cpp
C:\Program Files (x86)\Microsoft Visual Studio
10.0\VC\include\utility(163): error C2440: 'initializing' : cannot
convert from 'int' to 'const
clang::format::UnwrappedLineFormatter::LineState *'
[tools\clang\lib\Format\clangFormat.vcxproj]
Conversion from integral type to pointer type requires
reinterpret_cast, C-style cast or function-style cast
C:\Program Files (x86)\Microsoft Visual Studio
10.0\VC\include\utility(247) : see reference to function template
instantiation 'std::_Pair_base<_Ty1,_Ty2>::_Pair_base<_Ty,int>(_Other1
&&,_Other2 &&)' being compiled
with
[
_Ty1=bool,
_Ty2=const clang::format::UnwrappedLineFormatter::LineState *,
_Ty=bool,
_Other1=bool,
_Other2=int
]
..\..\..\..\..\llvm\tools\clang\lib\Format\Format.cpp(598) :
see reference to function template instantiation
'std::pair<_Ty1,_Ty2>::pair<bool,int>(_Other1 &&,_Other2 &&)' being
compiled
with
[
_Ty1=bool,
_Ty2=const clang::format::UnwrappedLineFormatter::LineState *,
_Other1=bool,
_Other2=int
]
C:\Program Files (x86)\Microsoft Visual Studio
10.0\VC\include\utility(163): error C2439:
'std::_Pair_base<_Ty1,_Ty2>::second' : member could not be initialized
[tools\clang\lib\Format\clangFormat.vcxproj]
with
[
_Ty1=bool,
_Ty2=const clang::format::UnwrappedLineFormatter::LineState *
]
C:\Program Files (x86)\Microsoft Visual Studio
10.0\VC\include\utility(167) : see declaration of
'std::_Pair_base<_Ty1,_Ty2>::second'
with
[
_Ty1=bool,
_Ty2=const clang::format::UnwrappedLineFormatter::LineState *
]
--
Timur
2013/2/1 Daniel Jasper <djasper at google.com>:
> Author: djasper
> Date: Fri Feb 1 05:00:45 2013
> New Revision: 174166
>
> URL: http://llvm.org/viewvc/llvm-project?rev=174166&view=rev
> Log:
> Revamp of the basic layouting algorithm in clang-format.
>
> In order to end up with good solutions, clang-format needs to try
> "all" combinations of line breaks, evaluate them and select the
> best one. Before, we have done this using a DFS with memoization
> and cut-off conditions. However, this approach is very limited
> as shown by the huge static initializer in the attachment of
> llvm.org/PR14959.
>
> Instead, this new implementation uses a variant of Dijkstra's
> algorithm to do a prioritized BFS over the solution space.
>
> Some numbers:
> lib/Format/TokenAnnotator.cpp: 1.5s -> 0.15s
> Attachment of PR14959: 10min+ (didn't finish) -> 10s
>
> No functional changes intended.
>
> Modified:
> cfe/trunk/lib/Format/Format.cpp
>
> Modified: cfe/trunk/lib/Format/Format.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=174166&r1=174165&r2=174166&view=diff
> ==============================================================================
> --- cfe/trunk/lib/Format/Format.cpp (original)
> +++ cfe/trunk/lib/Format/Format.cpp Fri Feb 1 05:00:45 2013
> @@ -241,44 +241,16 @@ public:
> // The first token has already been indented and thus consumed.
> moveStateToNextToken(State);
>
> - // Start iterating at 1 as we have correctly formatted of Token #0 above.
> - while (State.NextToken != NULL) {
> - if (State.NextToken->Type == TT_ImplicitStringLiteral) {
> - // Calculating the column is important for aligning trailing comments.
> - // FIXME: This does not seem to happen in conjunction with escaped
> - // newlines. If it does, fix!
> - State.Column += State.NextToken->FormatTok.WhiteSpaceLength +
> - State.NextToken->FormatTok.TokenLength;
> - State.NextToken = State.NextToken->Children.empty()
> - ? NULL : &State.NextToken->Children[0];
> - } else if (Line.Last->TotalLength <= getColumnLimit() - FirstIndent) {
> + // If everything fits on a single line, just put it there.
> + if (Line.Last->TotalLength <= getColumnLimit() - FirstIndent) {
> + while (State.NextToken != NULL) {
> addTokenToState(false, false, State);
> - } else {
> - unsigned NoBreak = calcPenalty(State, false, UINT_MAX);
> - unsigned Break = calcPenalty(State, true, NoBreak);
> - DEBUG({
> - if (Break < NoBreak)
> - llvm::errs() << "\n";
> - else
> - llvm::errs() << " ";
> - llvm::errs() << "<";
> - DebugPenalty(Break, Break < NoBreak);
> - llvm::errs() << "/";
> - DebugPenalty(NoBreak, !(Break < NoBreak));
> - llvm::errs() << "> ";
> - DebugTokenState(*State.NextToken);
> - });
> - addTokenToState(Break < NoBreak, false, State);
> - if (State.NextToken != NULL &&
> - State.NextToken->Parent->Type == TT_CtorInitializerColon) {
> - if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine &&
> - Line.Last->TotalLength > getColumnLimit() - State.Column - 1)
> - State.Stack.back().BreakAfterComma = true;
> - }
> }
> + return State.Column;
> }
> - DEBUG(llvm::errs() << "\n");
> - return State.Column;
> +
> + // Find best solution in solution space.
> + return analyzeSolutionSpace(State);
> }
>
> private:
> @@ -289,15 +261,6 @@ private:
> llvm::errs();
> }
>
> - void DebugPenalty(unsigned Penalty, bool Winner) {
> - llvm::errs().changeColor(Winner ? raw_ostream::GREEN : raw_ostream::RED);
> - if (Penalty == UINT_MAX)
> - llvm::errs() << "MAX";
> - else
> - llvm::errs() << Penalty;
> - llvm::errs().resetColor();
> - }
> -
> struct ParenState {
> ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking)
> : Indent(Indent), LastSpace(LastSpace), AssignmentColumn(0),
> @@ -421,6 +384,16 @@ private:
> assert(State.Stack.size());
> unsigned ParenLevel = State.Stack.size() - 1;
>
> + if (Current.Type == TT_ImplicitStringLiteral) {
> + State.Column += State.NextToken->FormatTok.WhiteSpaceLength +
> + State.NextToken->FormatTok.TokenLength;
> + if (State.NextToken->Children.empty())
> + State.NextToken = NULL;
> + else
> + State.NextToken = &State.NextToken->Children[0];
> + return;
> + }
> +
> if (Newline) {
> unsigned WhitespaceStartColumn = State.Column;
> if (Current.is(tok::r_brace)) {
> @@ -604,87 +577,123 @@ private:
> return Style.ColumnLimit - (Line.InPPDirective ? 1 : 0);
> }
>
> - /// \brief Calculate the number of lines needed to format the remaining part
> - /// of the unwrapped line.
> - ///
> - /// Assumes the formatting so far has led to
> - /// the \c LineSta \p State. If \p NewLine is set, a new line will be
> - /// added after the previous token.
> + /// \brief An edge in the solution space starting from the \c LineState and
> + /// inserting a newline dependent on the \c bool.
> + typedef std::pair<bool, const LineState *> Edge;
> +
> + /// \brief An item in the prioritized BFS search queue. The \c LineState was
> + /// reached using the \c Edge.
> + typedef std::pair<LineState, Edge> QueueItem;
> +
> + /// \brief Analyze the entire solution space starting from \p InitialState.
> ///
> - /// \param StopAt is used for optimization. If we can determine that we'll
> - /// definitely need at least \p StopAt additional lines, we already know of a
> - /// better solution.
> - unsigned calcPenalty(LineState State, bool NewLine, unsigned StopAt) {
> - // We are at the end of the unwrapped line, so we don't need any more lines.
> - if (State.NextToken == NULL)
> + /// This implements a variant of Dijkstra's algorithm on the graph that spans
> + /// the solution space (\c LineStates are the nodes). The algorithm tries to
> + /// find the shortest path (the one with lowest penalty) from \p InitialState
> + /// to a state where all tokens are placed.
> + unsigned analyzeSolutionSpace(const LineState &InitialState) {
> + // Insert start element into queue.
> + std::multimap<unsigned, QueueItem> Queue;
> + Queue.insert(std::pair<unsigned, QueueItem>(
> + 0, QueueItem(InitialState, Edge(false, NULL))));
> + std::map<LineState, Edge> Seen;
> +
> + // While not empty, take first element and follow edges.
> + while (!Queue.empty()) {
> + unsigned Penalty = Queue.begin()->first;
> + QueueItem Item = Queue.begin()->second;
> + if (Item.first.NextToken == NULL)
> + break;
> + Queue.erase(Queue.begin());
> +
> + if (Seen.find(Item.first) != Seen.end())
> + continue; // State already examined with lower penalty.
> +
> + const LineState &SavedState = Seen.insert(std::pair<LineState, Edge>(
> + Item.first,
> + Edge(Item.second.first, Item.second.second))).first->first;
> +
> + addNextStateToQueue(SavedState, Penalty, /*NewLine=*/ false, Queue);
> + addNextStateToQueue(SavedState, Penalty, /*NewLine=*/ true, Queue);
> + }
> +
> + if (Queue.empty())
> + // We were unable to find a solution, do nothing.
> + // FIXME: Add diagnostic?
> return 0;
>
> - if (!NewLine && State.NextToken->MustBreakBefore)
> - return UINT_MAX;
> - if (NewLine && !State.NextToken->CanBreakBefore &&
> - !(State.NextToken->is(tok::r_brace) &&
> - State.Stack.back().BreakBeforeClosingBrace))
> - return UINT_MAX;
> - if (!NewLine && State.NextToken->is(tok::r_brace) &&
> - State.Stack.back().BreakBeforeClosingBrace)
> - return UINT_MAX;
> - if (!NewLine && State.NextToken->Parent->is(tok::semi) &&
> - State.LineContainsContinuedForLoopSection)
> - return UINT_MAX;
> - if (!NewLine && State.NextToken->Parent->is(tok::comma) &&
> - State.NextToken->Type != TT_LineComment &&
> - State.Stack.back().BreakAfterComma)
> - return UINT_MAX;
> - // Trying to insert a parameter on a new line if there are already more than
> - // one parameter on the current line is bin packing.
> - if (NewLine && State.NextToken->Parent->is(tok::comma) &&
> - State.Stack.back().HasMultiParameterLine &&
> - State.Stack.back().AvoidBinPacking)
> - return UINT_MAX;
> - if (!NewLine && (State.NextToken->Type == TT_CtorInitializerColon ||
> - (State.NextToken->Parent->ClosesTemplateDeclaration &&
> - State.Stack.size() == 1)))
> - return UINT_MAX;
> + // Reconstruct the solution.
> + // FIXME: Add debugging output.
> + Edge *CurrentEdge = &Queue.begin()->second.second;
> + while (CurrentEdge->second != NULL) {
> + LineState CurrentState = *CurrentEdge->second;
> + addTokenToState(CurrentEdge->first, false, CurrentState);
> + CurrentEdge = &Seen[*CurrentEdge->second];
> + }
>
> - unsigned CurrentPenalty = 0;
> - if (NewLine)
> - CurrentPenalty += Parameters.PenaltyIndentLevel * State.Stack.size() +
> - State.NextToken->SplitPenalty;
> + // Return the column after the last token of the solution.
> + return Queue.begin()->second.first.Column;
> + }
>
> + /// \brief Add the following state to the analysis queue \p Queue.
> + ///
> + /// Assume the current state is \p OldState and has been reached with a
> + /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
> + void addNextStateToQueue(const LineState &OldState, unsigned Penalty,
> + bool NewLine,
> + std::multimap<unsigned, QueueItem> &Queue) {
> + if (NewLine && !canBreak(OldState))
> + return;
> + if (!NewLine && mustBreak(OldState))
> + return;
> + LineState State(OldState);
> + if (NewLine)
> + Penalty += Parameters.PenaltyIndentLevel * State.Stack.size() +
> + State.NextToken->SplitPenalty;
> addTokenToState(NewLine, true, State);
> -
> - // Exceeding column limit is bad, assign penalty.
> if (State.Column > getColumnLimit()) {
> unsigned ExcessCharacters = State.Column - getColumnLimit();
> - CurrentPenalty += Parameters.PenaltyExcessCharacter * ExcessCharacters;
> + Penalty += Parameters.PenaltyExcessCharacter * ExcessCharacters;
> }
> + Queue.insert(std::pair<unsigned, QueueItem>(
> + Penalty, QueueItem(State, Edge(NewLine, &OldState))));
> + }
> +
> + /// \brief Returns \c true, if a line break after \p State is allowed.
> + bool canBreak(const LineState &State) {
> + if (!State.NextToken->CanBreakBefore &&
> + !(State.NextToken->is(tok::r_brace) &&
> + State.Stack.back().BreakBeforeClosingBrace))
> + return false;
> + // Trying to insert a parameter on a new line if there are already more than
> + // one parameter on the current line is bin packing.
> + if (State.NextToken->Parent->is(tok::comma) &&
> + State.Stack.back().HasMultiParameterLine &&
> + State.Stack.back().AvoidBinPacking)
> + return false;
> + return true;
> + }
>
> - if (StopAt <= CurrentPenalty)
> - return UINT_MAX;
> - StopAt -= CurrentPenalty;
> - StateMap::iterator I = Memory.find(State);
> - if (I != Memory.end()) {
> - // If this state has already been examined, we can safely return the
> - // previous result if we
> - // - have not hit the optimatization (and thus returned UINT_MAX) OR
> - // - are now computing for a smaller or equal StopAt.
> - unsigned SavedResult = I->second.first;
> - unsigned SavedStopAt = I->second.second;
> - if (SavedResult != UINT_MAX)
> - return SavedResult + CurrentPenalty;
> - else if (StopAt <= SavedStopAt)
> - return UINT_MAX;
> - }
> -
> - unsigned NoBreak = calcPenalty(State, false, StopAt);
> - unsigned WithBreak = calcPenalty(State, true, std::min(StopAt, NoBreak));
> - unsigned Result = std::min(NoBreak, WithBreak);
> -
> - // We have to store 'Result' without adding 'CurrentPenalty' as the latter
> - // can depend on 'NewLine'.
> - Memory[State] = std::pair<unsigned, unsigned>(Result, StopAt);
>
> - return Result == UINT_MAX ? UINT_MAX : Result + CurrentPenalty;
> + /// \brief Returns \c true, if a line break after \p State is mandatory.
> + bool mustBreak(const LineState &State) {
> + if (State.NextToken->MustBreakBefore)
> + return true;
> + if (State.NextToken->is(tok::r_brace) &&
> + State.Stack.back().BreakBeforeClosingBrace)
> + return true;
> + if (State.NextToken->Parent->is(tok::semi) &&
> + State.LineContainsContinuedForLoopSection)
> + return true;
> + if (State.NextToken->Parent->is(tok::comma) &&
> + State.Stack.back().BreakAfterComma &&
> + State.NextToken->Type != TT_LineComment)
> + return true;
> + if ((State.NextToken->Type == TT_CtorInitializerColon ||
> + (State.NextToken->Parent->ClosesTemplateDeclaration &&
> + State.Stack.size() == 1)))
> + return true;
> + return false;
> }
>
> FormatStyle Style;
> @@ -694,10 +703,6 @@ private:
> const AnnotatedToken &RootToken;
> WhitespaceManager &Whitespaces;
>
> - // A map from an indent state to a pair (Result, Used-StopAt).
> - typedef std::map<LineState, std::pair<unsigned, unsigned> > StateMap;
> - StateMap Memory;
> -
> OptimizationParameters Parameters;
> };
>
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
--
Timur Iskhodzhanov,
Google Russia
More information about the cfe-commits
mailing list