r179379 - Revamps structural error detection / handling.

Tue May 28 11:51:26 PDT 2013

This might work again with r182788. Please give it a try.

On Mon, May 27, 2013 at 8:05 AM, Manuel Klimek <klimek at google.com> wrote:

> On Sun, May 26, 2013 at 7:45 AM, Nico Weber <thakis at chromium.org> wrote:
>
>> On Fri, Apr 12, 2013 at 7:13 AM, Manuel Klimek <klimek at google.com> wrote:
>>
>>> Author: klimek
>>> Date: Fri Apr 12 09:13:36 2013
>>> New Revision: 179379
>>>
>>> URL: http://llvm.org/viewvc/llvm-project?rev=179379&view=rev
>>> Log:
>>> Revamps structural error detection / handling.
>>>
>>> Previously we'd only detect structural errors on the very first level.
>>> This leads to incorrectly balanced braces not being discovered, and thus
>>> incorrect indentation.
>>>
>>> This change fixes the problem by:
>>> - changing the parser to use an error state that can be detected
>>>   anywhere inside the productions, for example if we get an eof on
>>>   SOME_MACRO({ some block <eof>
>>> - previously we'd never break lines when we discovered a structural
>>>   error; now we break even in the case of a structural error if there
>>>   are two unwrapped lines within the same line; thus,
>>>   void f() { while (true) { g(); y(); } }
>>>   will still be re-formatted, even if there's missing braces somewhere
>>>   in the file
>>> - still exclude macro definitions from generating structural error;
>>>   macro definitions are inbalanced snippets
>>>
>>> Modified:
>>>     cfe/trunk/lib/Format/Format.cpp
>>>     cfe/trunk/lib/Format/UnwrappedLineParser.cpp
>>>     cfe/trunk/lib/Format/UnwrappedLineParser.h
>>>     cfe/trunk/unittests/Format/FormatTest.cpp
>>>
>>> Modified: cfe/trunk/lib/Format/Format.cpp
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=179379&r1=179378&r2=179379&view=diff
>>>
>>> ==============================================================================
>>> --- cfe/trunk/lib/Format/Format.cpp (original)
>>> +++ cfe/trunk/lib/Format/Format.cpp Fri Apr 12 09:13:36 2013
>>> @@ -459,7 +459,7 @@ public:
>>>    UnwrappedLineFormatter(const FormatStyle &Style, SourceManager
>>> &SourceMgr,
>>>                           const AnnotatedLine &Line, unsigned
>>> FirstIndent,
>>>                           const AnnotatedToken &RootToken,
>>> -                         WhitespaceManager &Whitespaces, bool
>>> StructuralError)
>>> +                         WhitespaceManager &Whitespaces)
>>>        : Style(Style), SourceMgr(SourceMgr), Line(Line),
>>>          FirstIndent(FirstIndent), RootToken(RootToken),
>>>          Whitespaces(Whitespaces), Count(0) {}
>>> @@ -1381,7 +1381,7 @@ public:
>>>    tooling::Replacements format() {
>>>      LexerBasedFormatTokenSource Tokens(Lex, SourceMgr);
>>>      UnwrappedLineParser Parser(Diag, Style, Tokens, *this);
>>> -    StructuralError = Parser.parse();
>>> +    bool StructuralError = Parser.parse();
>>>      unsigned PreviousEndOfLineColumn = 0;
>>>      TokenAnnotator Annotator(Style, SourceMgr, Lex,
>>>                               Tokens.getIdentTable().get("in"));
>>> @@ -1431,17 +1431,19 @@ public:
>>>          unsigned Indent = LevelIndent;
>>>          if (static_cast<int>(Indent) + Offset >= 0)
>>>            Indent += Offset;
>>> -        if (!FirstTok.WhiteSpaceStart.isValid() || StructuralError) {
>>> -          Indent = LevelIndent =
>>> -
>>>  SourceMgr.getSpellingColumnNumber(FirstTok.Tok.getLocation()) - 1;
>>> -        } else {
>>> +        if (FirstTok.WhiteSpaceStart.isValid() &&
>>> +            // Insert a break even if there is a structural error in
>>> case where
>>> +            // we break apart a line consisting of multiple unwrapped
>>> lines.
>>> +            (FirstTok.NewlinesBefore == 0 || !StructuralError)) {
>>>            formatFirstToken(TheLine.First, PreviousLineLastToken, Indent,
>>>                             TheLine.InPPDirective,
>>> PreviousEndOfLineColumn);
>>> +        } else {
>>> +          Indent = LevelIndent =
>>> +
>>>  SourceMgr.getSpellingColumnNumber(FirstTok.Tok.getLocation()) - 1;
>>>          }
>>>          tryFitMultipleLinesInOne(Indent, I, E);
>>>          UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine,
>>> Indent,
>>> -                                         TheLine.First, Whitespaces,
>>> -                                         StructuralError);
>>> +                                         TheLine.First, Whitespaces);
>>>          PreviousEndOfLineColumn =
>>>              Formatter.format(I + 1 != E ? &*(I + 1) : NULL);
>>>          IndentForLevel[TheLine.Level] = LevelIndent;
>>> @@ -1742,7 +1744,6 @@ private:
>>>    WhitespaceManager Whitespaces;
>>>    std::vector<CharSourceRange> Ranges;
>>>    std::vector<AnnotatedLine> AnnotatedLines;
>>> -  bool StructuralError;
>>>  };
>>>
>>>  tooling::Replacements
>>>
>>> Modified: cfe/trunk/lib/Format/UnwrappedLineParser.cpp
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.cpp?rev=179379&r1=179378&r2=179379&view=diff
>>>
>>> ==============================================================================
>>> --- cfe/trunk/lib/Format/UnwrappedLineParser.cpp (original)
>>> +++ cfe/trunk/lib/Format/UnwrappedLineParser.cpp Fri Apr 12 09:13:36 2013
>>> @@ -45,9 +45,11 @@ private:
>>>  class ScopedMacroState : public FormatTokenSource {
>>>  public:
>>>    ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
>>> -                   FormatToken &ResetToken)
>>> +                   FormatToken &ResetToken, bool &StructuralError)
>>>        : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
>>> -        PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource)
>>> {
>>> +        PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
>>> +        StructuralError(StructuralError),
>>> +        PreviousStructuralError(StructuralError) {
>>>      TokenSource = this;
>>>      Line.Level = 0;
>>>      Line.InPPDirective = true;
>>> @@ -58,6 +60,7 @@ public:
>>>      ResetToken = Token;
>>>      Line.InPPDirective = false;
>>>      Line.Level = PreviousLineLevel;
>>> +    StructuralError = PreviousStructuralError;
>>>    }
>>>
>>>    virtual FormatToken getNextToken() {
>>> @@ -85,6 +88,8 @@ private:
>>>    FormatToken &ResetToken;
>>>    unsigned PreviousLineLevel;
>>>    FormatTokenSource *PreviousTokenSource;
>>> +  bool &StructuralError;
>>> +  bool PreviousStructuralError;
>>>
>>>    FormatToken Token;
>>>  };
>>> @@ -124,13 +129,13 @@ UnwrappedLineParser::UnwrappedLineParser
>>>      clang::DiagnosticsEngine &Diag, const FormatStyle &Style,
>>>      FormatTokenSource &Tokens, UnwrappedLineConsumer &Callback)
>>>      : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
>>> -      CurrentLines(&Lines), Diag(Diag), Style(Style), Tokens(&Tokens),
>>> -      Callback(Callback) {}
>>> +      CurrentLines(&Lines), StructuralError(false), Diag(Diag),
>>> Style(Style),
>>> +      Tokens(&Tokens), Callback(Callback) {}
>>>
>>>  bool UnwrappedLineParser::parse() {
>>>    DEBUG(llvm::dbgs() << "----\n");
>>>    readToken();
>>> -  bool Error = parseFile();
>>> +  parseFile();
>>>    for (std::vector<UnwrappedLine>::iterator I = Lines.begin(), E =
>>> Lines.end();
>>>         I != E; ++I) {
>>>      Callback.consumeUnwrappedLine(*I);
>>> @@ -139,23 +144,20 @@ bool UnwrappedLineParser::parse() {
>>>    // Create line with eof token.
>>>    pushToken(FormatTok);
>>>    Callback.consumeUnwrappedLine(*Line);
>>> -
>>> -  return Error;
>>> +  return StructuralError;
>>>  }
>>>
>>> -bool UnwrappedLineParser::parseFile() {
>>> +void UnwrappedLineParser::parseFile() {
>>>    ScopedDeclarationState DeclarationState(
>>>        *Line, DeclarationScopeStack,
>>>        /*MustBeDeclaration=*/ !Line->InPPDirective);
>>> -  bool Error = parseLevel(/*HasOpeningBrace=*/ false);
>>> +  parseLevel(/*HasOpeningBrace=*/ false);
>>>    // Make sure to format the remaining tokens.
>>>    flushComments(true);
>>>    addUnwrappedLine();
>>> -  return Error;
>>>  }
>>>
>>> -bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
>>> -  bool Error = false;
>>> +void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
>>>    do {
>>>      switch (FormatTok.Tok.getKind()) {
>>>      case tok::comment:
>>> @@ -165,30 +167,27 @@ bool UnwrappedLineParser::parseLevel(boo
>>>      case tok::l_brace:
>>>        // FIXME: Add parameter whether this can happen - if this
>>> happens, we must
>>>        // be in a non-declaration context.
>>> -      Error |= parseBlock(/*MustBeDeclaration=*/ false);
>>> +      parseBlock(/*MustBeDeclaration=*/ false);
>>>        addUnwrappedLine();
>>>        break;
>>>      case tok::r_brace:
>>> -      if (HasOpeningBrace) {
>>> -        return false;
>>> -      } else {
>>> -        Diag.Report(FormatTok.Tok.getLocation(),
>>> -
>>>  Diag.getCustomDiagID(clang::DiagnosticsEngine::Error,
>>> -                                         "unexpected '}'"));
>>> -        Error = true;
>>> -        nextToken();
>>> -        addUnwrappedLine();
>>> -      }
>>> +      if (HasOpeningBrace)
>>> +        return;
>>> +      Diag.Report(FormatTok.Tok.getLocation(),
>>> +                  Diag.getCustomDiagID(clang::DiagnosticsEngine::Error,
>>> +                                       "unexpected '}'"));
>>> +      StructuralError = true;
>>> +      nextToken();
>>> +      addUnwrappedLine();
>>>        break;
>>>      default:
>>>        parseStructuralElement();
>>>        break;
>>>      }
>>>    } while (!eof());
>>> -  return Error;
>>>  }
>>>
>>> -bool UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
>>> +void UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
>>>                                       unsigned AddLevels) {
>>>    assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected");
>>>    nextToken();
>>> @@ -202,17 +201,17 @@ bool UnwrappedLineParser::parseBlock(boo
>>>
>>>    if (!FormatTok.Tok.is(tok::r_brace)) {
>>>      Line->Level -= AddLevels;
>>> -    return true;
>>> +    StructuralError = true;
>>> +    return;
>>>    }
>>>
>>>    nextToken(); // Munch the closing brace.
>>>    Line->Level -= AddLevels;
>>> -  return false;
>>>  }
>>>
>>>  void UnwrappedLineParser::parsePPDirective() {
>>>    assert(FormatTok.Tok.is(tok::hash) && "'#' expected");
>>> -  ScopedMacroState MacroState(*Line, Tokens, FormatTok);
>>> +  ScopedMacroState MacroState(*Line, Tokens, FormatTok,
>>> StructuralError);
>>>    nextToken();
>>>
>>>    if (FormatTok.Tok.getIdentifierInfo() == NULL) {
>>>
>>> Modified: cfe/trunk/lib/Format/UnwrappedLineParser.h
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.h?rev=179379&r1=179378&r2=179379&view=diff
>>>
>>> ==============================================================================
>>> --- cfe/trunk/lib/Format/UnwrappedLineParser.h (original)
>>> +++ cfe/trunk/lib/Format/UnwrappedLineParser.h Fri Apr 12 09:13:36 2013
>>> @@ -125,9 +125,9 @@ public:
>>>    bool parse();
>>>
>>>  private:
>>> -  bool parseFile();
>>> -  bool parseLevel(bool HasOpeningBrace);
>>> -  bool parseBlock(bool MustBeDeclaration, unsigned AddLevels = 1);
>>> +  void parseFile();
>>> +  void parseLevel(bool HasOpeningBrace);
>>> +  void parseBlock(bool MustBeDeclaration, unsigned AddLevels = 1);
>>>    void parsePPDirective();
>>>    void parsePPDefine();
>>>    void parsePPUnknown();
>>> @@ -187,6 +187,10 @@ private:
>>>    // whether we are in a compound statement or not.
>>>    std::vector<bool> DeclarationScopeStack;
>>>
>>> +  // Will be true if we encounter an error that leads to possibily
>>> incorrect
>>> +  // indentation levels.
>>> +  bool StructuralError;
>>> +
>>>    clang::DiagnosticsEngine &Diag;
>>>    const FormatStyle &Style;
>>>    FormatTokenSource *Tokens;
>>>
>>> Modified: cfe/trunk/unittests/Format/FormatTest.cpp
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=179379&r1=179378&r2=179379&view=diff
>>>
>>> ==============================================================================
>>> --- cfe/trunk/unittests/Format/FormatTest.cpp (original)
>>> +++ cfe/trunk/unittests/Format/FormatTest.cpp Fri Apr 12 09:13:36 2013
>>> @@ -430,6 +430,7 @@ TEST_F(FormatTest, FormatsSwitchStatemen
>>>    verifyFormat("switch (x) {\n"
>>>                 "default: {\n"
>>>                 "  // Do nothing.\n"
>>> +               "}\n"
>>>                 "}");
>>>    verifyFormat("switch (x) {\n"
>>>                 "// comment\n"
>>> @@ -3563,8 +3564,18 @@ TEST_F(FormatTest, ObjCLiterals) {
>>>    verifyFormat("@{ @\"one\" : @1 }");
>>>    verifyFormat("return @{ @\"one\" : @1 };");
>>>    verifyFormat("@{ @\"one\" : @1, }");
>>> -  verifyFormat("@{ @\"one\" : @{ @2 : @1 } }");
>>> -  verifyFormat("@{ @\"one\" : @{ @2 : @1 }, }");
>>> +
>>> +  // FIXME: Breaking in cases where we think there's a structural error
>>> +  // showed that we're incorrectly parsing this code. We need to fix the
>>> +  // parsing here.
>>>
>>
>> Any progress on this? This regressed working functionality.
>>
>
> Unfortunately I have no idea about Obj-C and a short search didn't get me
> a BNS that explained what can be done here... One question is: do we really
> need to special case every identifier with an "@" in front of it, or is
> there a mode where "@{" for example gets output as a single token (or is
> there a fundamental concept for @ in the language that I need to understand
> first?)
>
> Thx,
> /Manuel
>
>
>>
>>
>>> +  verifyFormat("@{ @\"one\" : @\n"
>>> +               "{ @2 : @1 }\n"
>>> +               "}");
>>> +  verifyFormat("@{ @\"one\" : @\n"
>>> +               "{ @2 : @1 }\n"
>>> +               ",\n"
>>> +               "}");
>>> +
>>>    verifyFormat("@{ 1 > 2 ? @\"one\" : @\"two\" : 1 > 2 ? @1 : @2 }");
>>>    verifyFormat("[self setDict:@{}");
>>>    verifyFormat("[self setDict:@{ @1 : @2 }");
>>>
>>>
>>> _______________________________________________
>>> cfe-commits mailing list
>>> cfe-commits at cs.uiuc.edu
>>> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
>>>
>>
>>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20130528/da060d4b/attachment.html>