r179379 - Revamps structural error detection / handling.

Sun May 26 23:05:10 PDT 2013

On Sun, May 26, 2013 at 7:45 AM, Nico Weber <thakis at chromium.org> wrote:

> On Fri, Apr 12, 2013 at 7:13 AM, Manuel Klimek <klimek at google.com> wrote:
>
>> Author: klimek
>> Date: Fri Apr 12 09:13:36 2013
>> New Revision: 179379
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=179379&view=rev
>> Log:
>> Revamps structural error detection / handling.
>>
>> Previously we'd only detect structural errors on the very first level.
>> This leads to incorrectly balanced braces not being discovered, and thus
>> incorrect indentation.
>>
>> This change fixes the problem by:
>> - changing the parser to use an error state that can be detected
>>   anywhere inside the productions, for example if we get an eof on
>>   SOME_MACRO({ some block <eof>
>> - previously we'd never break lines when we discovered a structural
>>   error; now we break even in the case of a structural error if there
>>   are two unwrapped lines within the same line; thus,
>>   void f() { while (true) { g(); y(); } }
>>   will still be re-formatted, even if there's missing braces somewhere
>>   in the file
>> - still exclude macro definitions from generating structural error;
>>   macro definitions are inbalanced snippets
>>
>> Modified:
>>     cfe/trunk/lib/Format/Format.cpp
>>     cfe/trunk/lib/Format/UnwrappedLineParser.cpp
>>     cfe/trunk/lib/Format/UnwrappedLineParser.h
>>     cfe/trunk/unittests/Format/FormatTest.cpp
>>
>> Modified: cfe/trunk/lib/Format/Format.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=179379&r1=179378&r2=179379&view=diff
>>
>> ==============================================================================
>> --- cfe/trunk/lib/Format/Format.cpp (original)
>> +++ cfe/trunk/lib/Format/Format.cpp Fri Apr 12 09:13:36 2013
>> @@ -459,7 +459,7 @@ public:
>>    UnwrappedLineFormatter(const FormatStyle &Style, SourceManager
>> &SourceMgr,
>>                           const AnnotatedLine &Line, unsigned FirstIndent,
>>                           const AnnotatedToken &RootToken,
>> -                         WhitespaceManager &Whitespaces, bool
>> StructuralError)
>> +                         WhitespaceManager &Whitespaces)
>>        : Style(Style), SourceMgr(SourceMgr), Line(Line),
>>          FirstIndent(FirstIndent), RootToken(RootToken),
>>          Whitespaces(Whitespaces), Count(0) {}
>> @@ -1381,7 +1381,7 @@ public:
>>    tooling::Replacements format() {
>>      LexerBasedFormatTokenSource Tokens(Lex, SourceMgr);
>>      UnwrappedLineParser Parser(Diag, Style, Tokens, *this);
>> -    StructuralError = Parser.parse();
>> +    bool StructuralError = Parser.parse();
>>      unsigned PreviousEndOfLineColumn = 0;
>>      TokenAnnotator Annotator(Style, SourceMgr, Lex,
>>                               Tokens.getIdentTable().get("in"));
>> @@ -1431,17 +1431,19 @@ public:
>>          unsigned Indent = LevelIndent;
>>          if (static_cast<int>(Indent) + Offset >= 0)
>>            Indent += Offset;
>> -        if (!FirstTok.WhiteSpaceStart.isValid() || StructuralError) {
>> -          Indent = LevelIndent =
>> -
>>  SourceMgr.getSpellingColumnNumber(FirstTok.Tok.getLocation()) - 1;
>> -        } else {
>> +        if (FirstTok.WhiteSpaceStart.isValid() &&
>> +            // Insert a break even if there is a structural error in
>> case where
>> +            // we break apart a line consisting of multiple unwrapped
>> lines.
>> +            (FirstTok.NewlinesBefore == 0 || !StructuralError)) {
>>            formatFirstToken(TheLine.First, PreviousLineLastToken, Indent,
>>                             TheLine.InPPDirective,
>> PreviousEndOfLineColumn);
>> +        } else {
>> +          Indent = LevelIndent =
>> +
>>  SourceMgr.getSpellingColumnNumber(FirstTok.Tok.getLocation()) - 1;
>>          }
>>          tryFitMultipleLinesInOne(Indent, I, E);
>>          UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine,
>> Indent,
>> -                                         TheLine.First, Whitespaces,
>> -                                         StructuralError);
>> +                                         TheLine.First, Whitespaces);
>>          PreviousEndOfLineColumn =
>>              Formatter.format(I + 1 != E ? &*(I + 1) : NULL);
>>          IndentForLevel[TheLine.Level] = LevelIndent;
>> @@ -1742,7 +1744,6 @@ private:
>>    WhitespaceManager Whitespaces;
>>    std::vector<CharSourceRange> Ranges;
>>    std::vector<AnnotatedLine> AnnotatedLines;
>> -  bool StructuralError;
>>  };
>>
>>  tooling::Replacements
>>
>> Modified: cfe/trunk/lib/Format/UnwrappedLineParser.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.cpp?rev=179379&r1=179378&r2=179379&view=diff
>>
>> ==============================================================================
>> --- cfe/trunk/lib/Format/UnwrappedLineParser.cpp (original)
>> +++ cfe/trunk/lib/Format/UnwrappedLineParser.cpp Fri Apr 12 09:13:36 2013
>> @@ -45,9 +45,11 @@ private:
>>  class ScopedMacroState : public FormatTokenSource {
>>  public:
>>    ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
>> -                   FormatToken &ResetToken)
>> +                   FormatToken &ResetToken, bool &StructuralError)
>>        : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
>> -        PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource) {
>> +        PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
>> +        StructuralError(StructuralError),
>> +        PreviousStructuralError(StructuralError) {
>>      TokenSource = this;
>>      Line.Level = 0;
>>      Line.InPPDirective = true;
>> @@ -58,6 +60,7 @@ public:
>>      ResetToken = Token;
>>      Line.InPPDirective = false;
>>      Line.Level = PreviousLineLevel;
>> +    StructuralError = PreviousStructuralError;
>>    }
>>
>>    virtual FormatToken getNextToken() {
>> @@ -85,6 +88,8 @@ private:
>>    FormatToken &ResetToken;
>>    unsigned PreviousLineLevel;
>>    FormatTokenSource *PreviousTokenSource;
>> +  bool &StructuralError;
>> +  bool PreviousStructuralError;
>>
>>    FormatToken Token;
>>  };
>> @@ -124,13 +129,13 @@ UnwrappedLineParser::UnwrappedLineParser
>>      clang::DiagnosticsEngine &Diag, const FormatStyle &Style,
>>      FormatTokenSource &Tokens, UnwrappedLineConsumer &Callback)
>>      : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
>> -      CurrentLines(&Lines), Diag(Diag), Style(Style), Tokens(&Tokens),
>> -      Callback(Callback) {}
>> +      CurrentLines(&Lines), StructuralError(false), Diag(Diag),
>> Style(Style),
>> +      Tokens(&Tokens), Callback(Callback) {}
>>
>>  bool UnwrappedLineParser::parse() {
>>    DEBUG(llvm::dbgs() << "----\n");
>>    readToken();
>> -  bool Error = parseFile();
>> +  parseFile();
>>    for (std::vector<UnwrappedLine>::iterator I = Lines.begin(), E =
>> Lines.end();
>>         I != E; ++I) {
>>      Callback.consumeUnwrappedLine(*I);
>> @@ -139,23 +144,20 @@ bool UnwrappedLineParser::parse() {
>>    // Create line with eof token.
>>    pushToken(FormatTok);
>>    Callback.consumeUnwrappedLine(*Line);
>> -
>> -  return Error;
>> +  return StructuralError;
>>  }
>>
>> -bool UnwrappedLineParser::parseFile() {
>> +void UnwrappedLineParser::parseFile() {
>>    ScopedDeclarationState DeclarationState(
>>        *Line, DeclarationScopeStack,
>>        /*MustBeDeclaration=*/ !Line->InPPDirective);
>> -  bool Error = parseLevel(/*HasOpeningBrace=*/ false);
>> +  parseLevel(/*HasOpeningBrace=*/ false);
>>    // Make sure to format the remaining tokens.
>>    flushComments(true);
>>    addUnwrappedLine();
>> -  return Error;
>>  }
>>
>> -bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
>> -  bool Error = false;
>> +void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
>>    do {
>>      switch (FormatTok.Tok.getKind()) {
>>      case tok::comment:
>> @@ -165,30 +167,27 @@ bool UnwrappedLineParser::parseLevel(boo
>>      case tok::l_brace:
>>        // FIXME: Add parameter whether this can happen - if this happens,
>> we must
>>        // be in a non-declaration context.
>> -      Error |= parseBlock(/*MustBeDeclaration=*/ false);
>> +      parseBlock(/*MustBeDeclaration=*/ false);
>>        addUnwrappedLine();
>>        break;
>>      case tok::r_brace:
>> -      if (HasOpeningBrace) {
>> -        return false;
>> -      } else {
>> -        Diag.Report(FormatTok.Tok.getLocation(),
>> -                    Diag.getCustomDiagID(clang::DiagnosticsEngine::Error,
>> -                                         "unexpected '}'"));
>> -        Error = true;
>> -        nextToken();
>> -        addUnwrappedLine();
>> -      }
>> +      if (HasOpeningBrace)
>> +        return;
>> +      Diag.Report(FormatTok.Tok.getLocation(),
>> +                  Diag.getCustomDiagID(clang::DiagnosticsEngine::Error,
>> +                                       "unexpected '}'"));
>> +      StructuralError = true;
>> +      nextToken();
>> +      addUnwrappedLine();
>>        break;
>>      default:
>>        parseStructuralElement();
>>        break;
>>      }
>>    } while (!eof());
>> -  return Error;
>>  }
>>
>> -bool UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
>> +void UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
>>                                       unsigned AddLevels) {
>>    assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected");
>>    nextToken();
>> @@ -202,17 +201,17 @@ bool UnwrappedLineParser::parseBlock(boo
>>
>>    if (!FormatTok.Tok.is(tok::r_brace)) {
>>      Line->Level -= AddLevels;
>> -    return true;
>> +    StructuralError = true;
>> +    return;
>>    }
>>
>>    nextToken(); // Munch the closing brace.
>>    Line->Level -= AddLevels;
>> -  return false;
>>  }
>>
>>  void UnwrappedLineParser::parsePPDirective() {
>>    assert(FormatTok.Tok.is(tok::hash) && "'#' expected");
>> -  ScopedMacroState MacroState(*Line, Tokens, FormatTok);
>> +  ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError);
>>    nextToken();
>>
>>    if (FormatTok.Tok.getIdentifierInfo() == NULL) {
>>
>> Modified: cfe/trunk/lib/Format/UnwrappedLineParser.h
>> URL:
>> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.h?rev=179379&r1=179378&r2=179379&view=diff
>>
>> ==============================================================================
>> --- cfe/trunk/lib/Format/UnwrappedLineParser.h (original)
>> +++ cfe/trunk/lib/Format/UnwrappedLineParser.h Fri Apr 12 09:13:36 2013
>> @@ -125,9 +125,9 @@ public:
>>    bool parse();
>>
>>  private:
>> -  bool parseFile();
>> -  bool parseLevel(bool HasOpeningBrace);
>> -  bool parseBlock(bool MustBeDeclaration, unsigned AddLevels = 1);
>> +  void parseFile();
>> +  void parseLevel(bool HasOpeningBrace);
>> +  void parseBlock(bool MustBeDeclaration, unsigned AddLevels = 1);
>>    void parsePPDirective();
>>    void parsePPDefine();
>>    void parsePPUnknown();
>> @@ -187,6 +187,10 @@ private:
>>    // whether we are in a compound statement or not.
>>    std::vector<bool> DeclarationScopeStack;
>>
>> +  // Will be true if we encounter an error that leads to possibily
>> incorrect
>> +  // indentation levels.
>> +  bool StructuralError;
>> +
>>    clang::DiagnosticsEngine &Diag;
>>    const FormatStyle &Style;
>>    FormatTokenSource *Tokens;
>>
>> Modified: cfe/trunk/unittests/Format/FormatTest.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=179379&r1=179378&r2=179379&view=diff
>>
>> ==============================================================================
>> --- cfe/trunk/unittests/Format/FormatTest.cpp (original)
>> +++ cfe/trunk/unittests/Format/FormatTest.cpp Fri Apr 12 09:13:36 2013
>> @@ -430,6 +430,7 @@ TEST_F(FormatTest, FormatsSwitchStatemen
>>    verifyFormat("switch (x) {\n"
>>                 "default: {\n"
>>                 "  // Do nothing.\n"
>> +               "}\n"
>>                 "}");
>>    verifyFormat("switch (x) {\n"
>>                 "// comment\n"
>> @@ -3563,8 +3564,18 @@ TEST_F(FormatTest, ObjCLiterals) {
>>    verifyFormat("@{ @\"one\" : @1 }");
>>    verifyFormat("return @{ @\"one\" : @1 };");
>>    verifyFormat("@{ @\"one\" : @1, }");
>> -  verifyFormat("@{ @\"one\" : @{ @2 : @1 } }");
>> -  verifyFormat("@{ @\"one\" : @{ @2 : @1 }, }");
>> +
>> +  // FIXME: Breaking in cases where we think there's a structural error
>> +  // showed that we're incorrectly parsing this code. We need to fix the
>> +  // parsing here.
>>
>
> Any progress on this? This regressed working functionality.
>

Unfortunately I have no idea about Obj-C and a short search didn't get me a
BNS that explained what can be done here... One question is: do we really
need to special case every identifier with an "@" in front of it, or is
there a mode where "@{" for example gets output as a single token (or is
there a fundamental concept for @ in the language that I need to understand
first?)

Thx,
/Manuel

>
>
>> +  verifyFormat("@{ @\"one\" : @\n"
>> +               "{ @2 : @1 }\n"
>> +               "}");
>> +  verifyFormat("@{ @\"one\" : @\n"
>> +               "{ @2 : @1 }\n"
>> +               ",\n"
>> +               "}");
>> +
>>    verifyFormat("@{ 1 > 2 ? @\"one\" : @\"two\" : 1 > 2 ? @1 : @2 }");
>>    verifyFormat("[self setDict:@{}");
>>    verifyFormat("[self setDict:@{ @1 : @2 }");
>>
>>
>> _______________________________________________
>> cfe-commits mailing list
>> cfe-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20130527/0036c12b/attachment.html>