r195240 - Fix bug where optimization would lead to strange line breaks.

Wed Nov 20 07:56:59 PST 2013

On Wed, Nov 20, 2013 at 7:39 AM, Manuel Klimek <klimek at google.com> wrote:

> On Wed, Nov 20, 2013 at 4:29 PM, Daniel Jasper <djasper at google.com> wrote:
>
>>
>>
>>
>> On Wed, Nov 20, 2013 at 7:19 AM, Manuel Klimek <klimek at google.com> wrote:
>>
>>> On Wed, Nov 20, 2013 at 4:16 PM, Daniel Jasper <djasper at google.com>wrote:
>>>
>>>>
>>>>
>>>>
>>>> On Wed, Nov 20, 2013 at 7:12 AM, Manuel Klimek <klimek at google.com>wrote:
>>>>
>>>>> On Wed, Nov 20, 2013 at 4:05 PM, Daniel Jasper <djasper at google.com>wrote:
>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>> On Wed, Nov 20, 2013 at 7:04 AM, Manuel Klimek <klimek at google.com>wrote:
>>>>>>
>>>>>>> On Wed, Nov 20, 2013 at 3:58 PM, Daniel Jasper <djasper at google.com>wrote:
>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> On Wed, Nov 20, 2013 at 3:20 AM, Manuel Klimek <klimek at google.com>wrote:
>>>>>>>>
>>>>>>>>> Author: klimek
>>>>>>>>> Date: Wed Nov 20 05:20:32 2013
>>>>>>>>> New Revision: 195240
>>>>>>>>>
>>>>>>>>> URL: http://llvm.org/viewvc/llvm-project?rev=195240&view=rev
>>>>>>>>> Log:
>>>>>>>>> Fix bug where optimization would lead to strange line breaks.
>>>>>>>>>
>>>>>>>>> Before:
>>>>>>>>>   void f() {
>>>>>>>>>     CHECK_EQ(aaaa, (
>>>>>>>>>                        *bbbbbbbbb)->cccccc)
>>>>>>>>>         << "qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq";
>>>>>>>>>   }
>>>>>>>>>
>>>>>>>>> After:
>>>>>>>>>   void f() {
>>>>>>>>>     CHECK_EQ(aaaa, (*bbbbbbbbb)->cccccc)
>>>>>>>>>         << "qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq";
>>>>>>>>>   }
>>>>>>>>>
>>>>>>>>> Modified:
>>>>>>>>>     cfe/trunk/lib/Format/ContinuationIndenter.cpp
>>>>>>>>>     cfe/trunk/lib/Format/ContinuationIndenter.h
>>>>>>>>>     cfe/trunk/unittests/Format/FormatTest.cpp
>>>>>>>>>
>>>>>>>>> Modified: cfe/trunk/lib/Format/ContinuationIndenter.cpp
>>>>>>>>> URL:
>>>>>>>>> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/ContinuationIndenter.cpp?rev=195240&r1=195239&r2=195240&view=diff
>>>>>>>>>
>>>>>>>>> ==============================================================================
>>>>>>>>> --- cfe/trunk/lib/Format/ContinuationIndenter.cpp (original)
>>>>>>>>> +++ cfe/trunk/lib/Format/ContinuationIndenter.cpp Wed Nov 20
>>>>>>>>> 05:20:32 2013
>>>>>>>>> @@ -224,13 +224,14 @@ unsigned ContinuationIndenter::addTokenT
>>>>>>>>>    if (Newline)
>>>>>>>>>      Penalty = addTokenOnNewLine(State, DryRun);
>>>>>>>>>    else
>>>>>>>>> -    addTokenOnCurrentLine(State, DryRun, ExtraSpaces);
>>>>>>>>> +    Penalty = addTokenOnCurrentLine(State, DryRun, ExtraSpaces);
>>>>>>>>>
>>>>>>>>>    return moveStateToNextToken(State, DryRun, Newline) + Penalty;
>>>>>>>>>  }
>>>>>>>>>
>>>>>>>>> -void ContinuationIndenter::addTokenOnCurrentLine(LineState
>>>>>>>>> &State, bool DryRun,
>>>>>>>>> -                                                 unsigned
>>>>>>>>> ExtraSpaces) {
>>>>>>>>> +unsigned ContinuationIndenter::addTokenOnCurrentLine(LineState
>>>>>>>>> &State,
>>>>>>>>> +                                                     bool DryRun,
>>>>>>>>> +                                                     unsigned
>>>>>>>>> ExtraSpaces) {
>>>>>>>>>    FormatToken &Current = *State.NextToken;
>>>>>>>>>    const FormatToken &Previous = *State.NextToken->Previous;
>>>>>>>>>    if (Current.is(tok::equal) &&
>>>>>>>>> @@ -249,6 +250,15 @@ void ContinuationIndenter::addTokenOnCur
>>>>>>>>>        State.Stack.back().LastSpace =
>>>>>>>>> State.Stack.back().VariablePos;
>>>>>>>>>    }
>>>>>>>>>
>>>>>>>>> +  unsigned Penalty = 0;
>>>>>>>>> +  // A break before a "<<" will get
>>>>>>>>> Style.PenaltyBreakFirstLessLess, so a
>>>>>>>>> +  // continuation with "<<" has a smaller penalty in general.
>>>>>>>>> +  // If the LHS is long, we don't want to penalize the break
>>>>>>>>> though, so we
>>>>>>>>> +  // also add Style.PenaltyBreakFirstLessLess.
>>>>>>>>>
>>>>>>>>
>>>>>>>> The comment is significantly harder to understand than the code
>>>>>>>> itself (I only understood it after I read the code).
>>>>>>>>
>>>>>>>>
>>>>>>>>> +  if (Current.is(tok::lessless) &&
>>>>>>>>> State.Stack.back().FirstLessLess == 0 &&
>>>>>>>>> +      State.Column > Style.ColumnLimit / 2)
>>>>>>>>> +    Penalty += Style.PenaltyBreakFirstLessLess;
>>>>>>>>>
>>>>>>>>
>>>>>>>> I don't like this approach as the logic behind it is very
>>>>>>>> convoluted and now spread over multiple code sites (in addition to
>>>>>>>> addTokenOnNewLine and addTokenOnCurrentLine it also relies on mustBreak for
>>>>>>>> correct behavior). Also adding a PenaltyBREAK... when not actually breaking
>>>>>>>> seems bad to me.
>>>>>>>>
>>>>>>>>
>>>>>>>>> +
>>>>>>>>>    unsigned Spaces = Current.SpacesRequiredBefore + ExtraSpaces;
>>>>>>>>>
>>>>>>>>>    if (!DryRun)
>>>>>>>>> @@ -307,6 +317,7 @@ void ContinuationIndenter::addTokenOnCur
>>>>>>>>>          State.Stack[State.Stack.size() - 2].CallContinuation == 0)
>>>>>>>>>        State.Stack.back().LastSpace = State.Column;
>>>>>>>>>    }
>>>>>>>>> +  return Penalty;
>>>>>>>>>  }
>>>>>>>>>
>>>>>>>>>  unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
>>>>>>>>> @@ -332,9 +343,8 @@ unsigned ContinuationIndenter::addTokenO
>>>>>>>>>    Penalty += State.NextToken->SplitPenalty;
>>>>>>>>>
>>>>>>>>>    // Breaking before the first "<<" is generally not desirable if
>>>>>>>>> the LHS is
>>>>>>>>> -  // short.
>>>>>>>>> -  if (Current.is(tok::lessless) &&
>>>>>>>>> State.Stack.back().FirstLessLess == 0 &&
>>>>>>>>> -      State.Column <= Style.ColumnLimit / 2)
>>>>>>>>> +  // short (not breaking with a long LHS is penalized in
>>>>>>>>> addTokenOnCurrentLine).
>>>>>>>>> +  if (Current.is(tok::lessless) &&
>>>>>>>>> State.Stack.back().FirstLessLess == 0)
>>>>>>>>>
>>>>>>>>
>>>>>>>> Simply adding the penalty whenever the LHS broken should do almost
>>>>>>>> the same thing. Did that instead in r195253.
>>>>>>>>
>>>>>>>
>>>>>>> I think that solution is worse, as it still doesn't get rid of the
>>>>>>> very subtle coupling between the previous line's indent and the penalty
>>>>>>> where the previous line shouldn't matter (in this case we're breaking,
>>>>>>> after all). It definitely was very confusing to me - it took me forever to
>>>>>>> understand what the current code was doing, and what it was actually trying
>>>>>>> to do.
>>>>>>>
>>>>>>
>>>>>> How is anything relying on an indent?
>>>>>>
>>>>>
>>>>> Are you asking this question because you make the distinction between
>>>>> indent and alignment?
>>>>>
>>>>
>>>> No. I am probably completely not understanding you. All this does is
>>>> look at the current column. If that is past half of the column limit, we
>>>> add a penalty. Indentation/alignment is not involved.
>>>>
>>>
>>> Well, we're breaking, so it's the "previous end of line column".
>>>
>>
>> That has nothing to do with alignment or indentation in this case.
>>
>>
>>> To rephrase, I think the problem really is that we want to give a
>>> negative penalty to "not breaking a first lessless that's in the left half
>>> of the screen".
>>>
>>
>> No, that is not what we want to do. Especially, we do not want to give
>> this negative penalty when the LHS is split over multiple lines. So I'd
>> phrase it as a negative penalty if we can keep the "<<" on the first line
>> and in the left part of the screen (although the latter is more of an
>> exception).
>>
>> My proposed solution does this by giving penalties to the inverse (break
>>> before lessless and continuation of first lessless in the second half of
>>> the screen).
>>>
>>> From reading the current implementation, I don't understand that that's
>>> what's going on, or why looking whether the line was broken before has
>>> anything to do with it. I think that's very subtle.
>>>
>>
>> I think your solution is way more subtle as it depends on a number of
>> other things. For instance it took me some time to convince myself that it
>> would not create something like:
>>
>> Diag(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa,
>>      aaa) << "test";
>>
>> Instead of:
>>
>> Diag(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa, aaa)
>>     << "test";
>>
>> Which it doesn't because it relies on behavior in mustBreak().
>>
>
> I actually would rather make that explicit in the addTokenToCurrentLine.
>
> My main problem with the penalty-on-break depending on the previous line's
> length is that I completely didn't expect a line break where the alignment
> doesn't depend on the previous line's length to have a penalty that depends
> on the previous line's length.
>

It is not the previous line length, though. It is the current line length
right before inserting something on the next line :-).

To me it is much more confusing to add a penalty in one code path and then
negating that in another code path by adding the same penalty to all other
solutions. I would give it a high chance that some day we alter one of the
code paths introducing (probably latent) bugs.

For example, if we had other state options than breaks that changed the
> length of the line, it would lead to the same problem we had now.
>

The same is true for your solution just in the opposite way (yours would
try to make the line unnecessarily short).

Anyway, both ways are subtle and we wasted enough time implementing and
> reimplementing it, so I'll stop arguing now...
>

I agree 100%.

>>
>>>
>>>>
>>>>
>>>>>
>>>>>
>>>>>
>>>>>>
>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>>      Penalty += Style.PenaltyBreakFirstLessLess;
>>>>>>>>>
>>>>>>>>>    if (Current.is(tok::l_brace) && Current.BlockKind == BK_Block) {
>>>>>>>>>
>>>>>>>>> Modified: cfe/trunk/lib/Format/ContinuationIndenter.h
>>>>>>>>> URL:
>>>>>>>>> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/ContinuationIndenter.h?rev=195240&r1=195239&r2=195240&view=diff
>>>>>>>>>
>>>>>>>>> ==============================================================================
>>>>>>>>> --- cfe/trunk/lib/Format/ContinuationIndenter.h (original)
>>>>>>>>> +++ cfe/trunk/lib/Format/ContinuationIndenter.h Wed Nov 20
>>>>>>>>> 05:20:32 2013
>>>>>>>>> @@ -91,8 +91,8 @@ private:
>>>>>>>>>    ///
>>>>>>>>>    /// If \p DryRun is \c false, also creates and stores the
>>>>>>>>> required
>>>>>>>>>    /// \c Replacement.
>>>>>>>>> -  void addTokenOnCurrentLine(LineState &State, bool DryRun,
>>>>>>>>> -                             unsigned ExtraSpaces);
>>>>>>>>> +  unsigned addTokenOnCurrentLine(LineState &State, bool DryRun,
>>>>>>>>> +                                 unsigned ExtraSpaces);
>>>>>>>>>
>>>>>>>>>    /// \brief Appends the next token to \p State and updates
>>>>>>>>> information
>>>>>>>>>    /// necessary for indentation.
>>>>>>>>>
>>>>>>>>> Modified: cfe/trunk/unittests/Format/FormatTest.cpp
>>>>>>>>> URL:
>>>>>>>>> http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=195240&r1=195239&r2=195240&view=diff
>>>>>>>>>
>>>>>>>>> ==============================================================================
>>>>>>>>> --- cfe/trunk/unittests/Format/FormatTest.cpp (original)
>>>>>>>>> +++ cfe/trunk/unittests/Format/FormatTest.cpp Wed Nov 20 05:20:32
>>>>>>>>> 2013
>>>>>>>>> @@ -3744,6 +3744,11 @@ TEST_F(FormatTest, AlignsPipes) {
>>>>>>>>>    EXPECT_EQ("llvm::errs() << \"\n"
>>>>>>>>>              "             << a;",
>>>>>>>>>              format("llvm::errs() << \"\n<<a;"));
>>>>>>>>> +
>>>>>>>>> +  verifyFormat("void f() {\n"
>>>>>>>>> +               "  CHECK_EQ(aaaa, (*bbbbbbbbb)->cccccc)\n"
>>>>>>>>> +               "      <<
>>>>>>>>> \"qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq\";\n"
>>>>>>>>> +               "}");
>>>>>>>>>  }
>>>>>>>>>
>>>>>>>>>  TEST_F(FormatTest, UnderstandsEquals) {
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> _______________________________________________
>>>>>>>>> cfe-commits mailing list
>>>>>>>>> cfe-commits at cs.uiuc.edu
>>>>>>>>> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
>>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>
>>>>>>
>>>>>
>>>>
>>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20131120/24ffe7e5/attachment.html>