r174166 - Revamp of the basic layouting algorithm in clang-format.
Timur Iskhodzhanov
timurrrr at google.com
Fri Feb 1 03:52:56 PST 2013
Thanks!
2013/2/1 Daniel Jasper <djasper at google.com>:
> Sorry, I did not know about that MSVC bug. Workaround submitted in r174169.
>
>
> On Fri, Feb 1, 2013 at 12:16 PM, Timur Iskhodzhanov <timurrrr at google.com>
> wrote:
>>
>> Hi Daniel,
>>
>> I'm afraid your change breaks the Win build:
>>
>> Format.cpp
>> C:\Program Files (x86)\Microsoft Visual Studio
>> 10.0\VC\include\utility(163): error C2440: 'initializing' : cannot
>> convert from 'int' to 'const
>> clang::format::UnwrappedLineFormatter::LineState *'
>> [tools\clang\lib\Format\clangFormat.vcxproj]
>> Conversion from integral type to pointer type requires
>> reinterpret_cast, C-style cast or function-style cast
>> C:\Program Files (x86)\Microsoft Visual Studio
>> 10.0\VC\include\utility(247) : see reference to function template
>> instantiation 'std::_Pair_base<_Ty1,_Ty2>::_Pair_base<_Ty,int>(_Other1
>> &&,_Other2 &&)' being compiled
>> with
>> [
>> _Ty1=bool,
>> _Ty2=const clang::format::UnwrappedLineFormatter::LineState
>> *,
>> _Ty=bool,
>> _Other1=bool,
>> _Other2=int
>> ]
>> ..\..\..\..\..\llvm\tools\clang\lib\Format\Format.cpp(598) :
>> see reference to function template instantiation
>> 'std::pair<_Ty1,_Ty2>::pair<bool,int>(_Other1 &&,_Other2 &&)' being
>> compiled
>> with
>> [
>> _Ty1=bool,
>> _Ty2=const clang::format::UnwrappedLineFormatter::LineState
>> *,
>> _Other1=bool,
>> _Other2=int
>> ]
>>
>> C:\Program Files (x86)\Microsoft Visual Studio
>> 10.0\VC\include\utility(163): error C2439:
>> 'std::_Pair_base<_Ty1,_Ty2>::second' : member could not be initialized
>> [tools\clang\lib\Format\clangFormat.vcxproj]
>> with
>> [
>> _Ty1=bool,
>> _Ty2=const clang::format::UnwrappedLineFormatter::LineState
>> *
>> ]
>> C:\Program Files (x86)\Microsoft Visual Studio
>> 10.0\VC\include\utility(167) : see declaration of
>> 'std::_Pair_base<_Ty1,_Ty2>::second'
>> with
>> [
>> _Ty1=bool,
>> _Ty2=const clang::format::UnwrappedLineFormatter::LineState
>> *
>> ]
>>
>> --
>> Timur
>>
>> 2013/2/1 Daniel Jasper <djasper at google.com>:
>> > Author: djasper
>> > Date: Fri Feb 1 05:00:45 2013
>> > New Revision: 174166
>> >
>> > URL: http://llvm.org/viewvc/llvm-project?rev=174166&view=rev
>> > Log:
>> > Revamp of the basic layouting algorithm in clang-format.
>> >
>> > In order to end up with good solutions, clang-format needs to try
>> > "all" combinations of line breaks, evaluate them and select the
>> > best one. Before, we have done this using a DFS with memoization
>> > and cut-off conditions. However, this approach is very limited
>> > as shown by the huge static initializer in the attachment of
>> > llvm.org/PR14959.
>> >
>> > Instead, this new implementation uses a variant of Dijkstra's
>> > algorithm to do a prioritized BFS over the solution space.
>> >
>> > Some numbers:
>> > lib/Format/TokenAnnotator.cpp: 1.5s -> 0.15s
>> > Attachment of PR14959: 10min+ (didn't finish) -> 10s
>> >
>> > No functional changes intended.
>> >
>> > Modified:
>> > cfe/trunk/lib/Format/Format.cpp
>> >
>> > Modified: cfe/trunk/lib/Format/Format.cpp
>> > URL:
>> > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=174166&r1=174165&r2=174166&view=diff
>> >
>> > ==============================================================================
>> > --- cfe/trunk/lib/Format/Format.cpp (original)
>> > +++ cfe/trunk/lib/Format/Format.cpp Fri Feb 1 05:00:45 2013
>> > @@ -241,44 +241,16 @@ public:
>> > // The first token has already been indented and thus consumed.
>> > moveStateToNextToken(State);
>> >
>> > - // Start iterating at 1 as we have correctly formatted of Token #0
>> > above.
>> > - while (State.NextToken != NULL) {
>> > - if (State.NextToken->Type == TT_ImplicitStringLiteral) {
>> > - // Calculating the column is important for aligning trailing
>> > comments.
>> > - // FIXME: This does not seem to happen in conjunction with
>> > escaped
>> > - // newlines. If it does, fix!
>> > - State.Column += State.NextToken->FormatTok.WhiteSpaceLength +
>> > - State.NextToken->FormatTok.TokenLength;
>> > - State.NextToken = State.NextToken->Children.empty()
>> > - ? NULL : &State.NextToken->Children[0];
>> > - } else if (Line.Last->TotalLength <= getColumnLimit() -
>> > FirstIndent) {
>> > + // If everything fits on a single line, just put it there.
>> > + if (Line.Last->TotalLength <= getColumnLimit() - FirstIndent) {
>> > + while (State.NextToken != NULL) {
>> > addTokenToState(false, false, State);
>> > - } else {
>> > - unsigned NoBreak = calcPenalty(State, false, UINT_MAX);
>> > - unsigned Break = calcPenalty(State, true, NoBreak);
>> > - DEBUG({
>> > - if (Break < NoBreak)
>> > - llvm::errs() << "\n";
>> > - else
>> > - llvm::errs() << " ";
>> > - llvm::errs() << "<";
>> > - DebugPenalty(Break, Break < NoBreak);
>> > - llvm::errs() << "/";
>> > - DebugPenalty(NoBreak, !(Break < NoBreak));
>> > - llvm::errs() << "> ";
>> > - DebugTokenState(*State.NextToken);
>> > - });
>> > - addTokenToState(Break < NoBreak, false, State);
>> > - if (State.NextToken != NULL &&
>> > - State.NextToken->Parent->Type == TT_CtorInitializerColon) {
>> > - if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine &&
>> > - Line.Last->TotalLength > getColumnLimit() - State.Column
>> > - 1)
>> > - State.Stack.back().BreakAfterComma = true;
>> > - }
>> > }
>> > + return State.Column;
>> > }
>> > - DEBUG(llvm::errs() << "\n");
>> > - return State.Column;
>> > +
>> > + // Find best solution in solution space.
>> > + return analyzeSolutionSpace(State);
>> > }
>> >
>> > private:
>> > @@ -289,15 +261,6 @@ private:
>> > llvm::errs();
>> > }
>> >
>> > - void DebugPenalty(unsigned Penalty, bool Winner) {
>> > - llvm::errs().changeColor(Winner ? raw_ostream::GREEN :
>> > raw_ostream::RED);
>> > - if (Penalty == UINT_MAX)
>> > - llvm::errs() << "MAX";
>> > - else
>> > - llvm::errs() << Penalty;
>> > - llvm::errs().resetColor();
>> > - }
>> > -
>> > struct ParenState {
>> > ParenState(unsigned Indent, unsigned LastSpace, bool
>> > AvoidBinPacking)
>> > : Indent(Indent), LastSpace(LastSpace), AssignmentColumn(0),
>> > @@ -421,6 +384,16 @@ private:
>> > assert(State.Stack.size());
>> > unsigned ParenLevel = State.Stack.size() - 1;
>> >
>> > + if (Current.Type == TT_ImplicitStringLiteral) {
>> > + State.Column += State.NextToken->FormatTok.WhiteSpaceLength +
>> > + State.NextToken->FormatTok.TokenLength;
>> > + if (State.NextToken->Children.empty())
>> > + State.NextToken = NULL;
>> > + else
>> > + State.NextToken = &State.NextToken->Children[0];
>> > + return;
>> > + }
>> > +
>> > if (Newline) {
>> > unsigned WhitespaceStartColumn = State.Column;
>> > if (Current.is(tok::r_brace)) {
>> > @@ -604,87 +577,123 @@ private:
>> > return Style.ColumnLimit - (Line.InPPDirective ? 1 : 0);
>> > }
>> >
>> > - /// \brief Calculate the number of lines needed to format the
>> > remaining part
>> > - /// of the unwrapped line.
>> > - ///
>> > - /// Assumes the formatting so far has led to
>> > - /// the \c LineSta \p State. If \p NewLine is set, a new line will be
>> > - /// added after the previous token.
>> > + /// \brief An edge in the solution space starting from the \c
>> > LineState and
>> > + /// inserting a newline dependent on the \c bool.
>> > + typedef std::pair<bool, const LineState *> Edge;
>> > +
>> > + /// \brief An item in the prioritized BFS search queue. The \c
>> > LineState was
>> > + /// reached using the \c Edge.
>> > + typedef std::pair<LineState, Edge> QueueItem;
>> > +
>> > + /// \brief Analyze the entire solution space starting from \p
>> > InitialState.
>> > ///
>> > - /// \param StopAt is used for optimization. If we can determine that
>> > we'll
>> > - /// definitely need at least \p StopAt additional lines, we already
>> > know of a
>> > - /// better solution.
>> > - unsigned calcPenalty(LineState State, bool NewLine, unsigned StopAt)
>> > {
>> > - // We are at the end of the unwrapped line, so we don't need any
>> > more lines.
>> > - if (State.NextToken == NULL)
>> > + /// This implements a variant of Dijkstra's algorithm on the graph
>> > that spans
>> > + /// the solution space (\c LineStates are the nodes). The algorithm
>> > tries to
>> > + /// find the shortest path (the one with lowest penalty) from \p
>> > InitialState
>> > + /// to a state where all tokens are placed.
>> > + unsigned analyzeSolutionSpace(const LineState &InitialState) {
>> > + // Insert start element into queue.
>> > + std::multimap<unsigned, QueueItem> Queue;
>> > + Queue.insert(std::pair<unsigned, QueueItem>(
>> > + 0, QueueItem(InitialState, Edge(false, NULL))));
>> > + std::map<LineState, Edge> Seen;
>> > +
>> > + // While not empty, take first element and follow edges.
>> > + while (!Queue.empty()) {
>> > + unsigned Penalty = Queue.begin()->first;
>> > + QueueItem Item = Queue.begin()->second;
>> > + if (Item.first.NextToken == NULL)
>> > + break;
>> > + Queue.erase(Queue.begin());
>> > +
>> > + if (Seen.find(Item.first) != Seen.end())
>> > + continue; // State already examined with lower penalty.
>> > +
>> > + const LineState &SavedState = Seen.insert(std::pair<LineState,
>> > Edge>(
>> > + Item.first,
>> > + Edge(Item.second.first, Item.second.second))).first->first;
>> > +
>> > + addNextStateToQueue(SavedState, Penalty, /*NewLine=*/ false,
>> > Queue);
>> > + addNextStateToQueue(SavedState, Penalty, /*NewLine=*/ true,
>> > Queue);
>> > + }
>> > +
>> > + if (Queue.empty())
>> > + // We were unable to find a solution, do nothing.
>> > + // FIXME: Add diagnostic?
>> > return 0;
>> >
>> > - if (!NewLine && State.NextToken->MustBreakBefore)
>> > - return UINT_MAX;
>> > - if (NewLine && !State.NextToken->CanBreakBefore &&
>> > - !(State.NextToken->is(tok::r_brace) &&
>> > - State.Stack.back().BreakBeforeClosingBrace))
>> > - return UINT_MAX;
>> > - if (!NewLine && State.NextToken->is(tok::r_brace) &&
>> > - State.Stack.back().BreakBeforeClosingBrace)
>> > - return UINT_MAX;
>> > - if (!NewLine && State.NextToken->Parent->is(tok::semi) &&
>> > - State.LineContainsContinuedForLoopSection)
>> > - return UINT_MAX;
>> > - if (!NewLine && State.NextToken->Parent->is(tok::comma) &&
>> > - State.NextToken->Type != TT_LineComment &&
>> > - State.Stack.back().BreakAfterComma)
>> > - return UINT_MAX;
>> > - // Trying to insert a parameter on a new line if there are already
>> > more than
>> > - // one parameter on the current line is bin packing.
>> > - if (NewLine && State.NextToken->Parent->is(tok::comma) &&
>> > - State.Stack.back().HasMultiParameterLine &&
>> > - State.Stack.back().AvoidBinPacking)
>> > - return UINT_MAX;
>> > - if (!NewLine && (State.NextToken->Type == TT_CtorInitializerColon
>> > ||
>> > -
>> > (State.NextToken->Parent->ClosesTemplateDeclaration &&
>> > - State.Stack.size() == 1)))
>> > - return UINT_MAX;
>> > + // Reconstruct the solution.
>> > + // FIXME: Add debugging output.
>> > + Edge *CurrentEdge = &Queue.begin()->second.second;
>> > + while (CurrentEdge->second != NULL) {
>> > + LineState CurrentState = *CurrentEdge->second;
>> > + addTokenToState(CurrentEdge->first, false, CurrentState);
>> > + CurrentEdge = &Seen[*CurrentEdge->second];
>> > + }
>> >
>> > - unsigned CurrentPenalty = 0;
>> > - if (NewLine)
>> > - CurrentPenalty += Parameters.PenaltyIndentLevel *
>> > State.Stack.size() +
>> > - State.NextToken->SplitPenalty;
>> > + // Return the column after the last token of the solution.
>> > + return Queue.begin()->second.first.Column;
>> > + }
>> >
>> > + /// \brief Add the following state to the analysis queue \p Queue.
>> > + ///
>> > + /// Assume the current state is \p OldState and has been reached with
>> > a
>> > + /// penalty of \p Penalty. Insert a line break if \p NewLine is \c
>> > true.
>> > + void addNextStateToQueue(const LineState &OldState, unsigned Penalty,
>> > + bool NewLine,
>> > + std::multimap<unsigned, QueueItem> &Queue) {
>> > + if (NewLine && !canBreak(OldState))
>> > + return;
>> > + if (!NewLine && mustBreak(OldState))
>> > + return;
>> > + LineState State(OldState);
>> > + if (NewLine)
>> > + Penalty += Parameters.PenaltyIndentLevel * State.Stack.size() +
>> > + State.NextToken->SplitPenalty;
>> > addTokenToState(NewLine, true, State);
>> > -
>> > - // Exceeding column limit is bad, assign penalty.
>> > if (State.Column > getColumnLimit()) {
>> > unsigned ExcessCharacters = State.Column - getColumnLimit();
>> > - CurrentPenalty += Parameters.PenaltyExcessCharacter *
>> > ExcessCharacters;
>> > + Penalty += Parameters.PenaltyExcessCharacter * ExcessCharacters;
>> > }
>> > + Queue.insert(std::pair<unsigned, QueueItem>(
>> > + Penalty, QueueItem(State, Edge(NewLine, &OldState))));
>> > + }
>> > +
>> > + /// \brief Returns \c true, if a line break after \p State is
>> > allowed.
>> > + bool canBreak(const LineState &State) {
>> > + if (!State.NextToken->CanBreakBefore &&
>> > + !(State.NextToken->is(tok::r_brace) &&
>> > + State.Stack.back().BreakBeforeClosingBrace))
>> > + return false;
>> > + // Trying to insert a parameter on a new line if there are already
>> > more than
>> > + // one parameter on the current line is bin packing.
>> > + if (State.NextToken->Parent->is(tok::comma) &&
>> > + State.Stack.back().HasMultiParameterLine &&
>> > + State.Stack.back().AvoidBinPacking)
>> > + return false;
>> > + return true;
>> > + }
>> >
>> > - if (StopAt <= CurrentPenalty)
>> > - return UINT_MAX;
>> > - StopAt -= CurrentPenalty;
>> > - StateMap::iterator I = Memory.find(State);
>> > - if (I != Memory.end()) {
>> > - // If this state has already been examined, we can safely return
>> > the
>> > - // previous result if we
>> > - // - have not hit the optimatization (and thus returned UINT_MAX)
>> > OR
>> > - // - are now computing for a smaller or equal StopAt.
>> > - unsigned SavedResult = I->second.first;
>> > - unsigned SavedStopAt = I->second.second;
>> > - if (SavedResult != UINT_MAX)
>> > - return SavedResult + CurrentPenalty;
>> > - else if (StopAt <= SavedStopAt)
>> > - return UINT_MAX;
>> > - }
>> > -
>> > - unsigned NoBreak = calcPenalty(State, false, StopAt);
>> > - unsigned WithBreak = calcPenalty(State, true, std::min(StopAt,
>> > NoBreak));
>> > - unsigned Result = std::min(NoBreak, WithBreak);
>> > -
>> > - // We have to store 'Result' without adding 'CurrentPenalty' as the
>> > latter
>> > - // can depend on 'NewLine'.
>> > - Memory[State] = std::pair<unsigned, unsigned>(Result, StopAt);
>> >
>> > - return Result == UINT_MAX ? UINT_MAX : Result + CurrentPenalty;
>> > + /// \brief Returns \c true, if a line break after \p State is
>> > mandatory.
>> > + bool mustBreak(const LineState &State) {
>> > + if (State.NextToken->MustBreakBefore)
>> > + return true;
>> > + if (State.NextToken->is(tok::r_brace) &&
>> > + State.Stack.back().BreakBeforeClosingBrace)
>> > + return true;
>> > + if (State.NextToken->Parent->is(tok::semi) &&
>> > + State.LineContainsContinuedForLoopSection)
>> > + return true;
>> > + if (State.NextToken->Parent->is(tok::comma) &&
>> > + State.Stack.back().BreakAfterComma &&
>> > + State.NextToken->Type != TT_LineComment)
>> > + return true;
>> > + if ((State.NextToken->Type == TT_CtorInitializerColon ||
>> > + (State.NextToken->Parent->ClosesTemplateDeclaration &&
>> > + State.Stack.size() == 1)))
>> > + return true;
>> > + return false;
>> > }
>> >
>> > FormatStyle Style;
>> > @@ -694,10 +703,6 @@ private:
>> > const AnnotatedToken &RootToken;
>> > WhitespaceManager &Whitespaces;
>> >
>> > - // A map from an indent state to a pair (Result, Used-StopAt).
>> > - typedef std::map<LineState, std::pair<unsigned, unsigned> > StateMap;
>> > - StateMap Memory;
>> > -
>> > OptimizationParameters Parameters;
>> > };
>> >
>> >
>> >
>> > _______________________________________________
>> > cfe-commits mailing list
>> > cfe-commits at cs.uiuc.edu
>> > http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
>>
>>
>>
>> --
>> Timur Iskhodzhanov,
>> Google Russia
>
>
--
Timur Iskhodzhanov,
Google Russia
More information about the cfe-commits
mailing list