r179379 - Revamps structural error detection / handling.

Nico Weber thakis at chromium.org
Sat May 25 22:45:36 PDT 2013


On Fri, Apr 12, 2013 at 7:13 AM, Manuel Klimek <klimek at google.com> wrote:

> Author: klimek
> Date: Fri Apr 12 09:13:36 2013
> New Revision: 179379
>
> URL: http://llvm.org/viewvc/llvm-project?rev=179379&view=rev
> Log:
> Revamps structural error detection / handling.
>
> Previously we'd only detect structural errors on the very first level.
> This leads to incorrectly balanced braces not being discovered, and thus
> incorrect indentation.
>
> This change fixes the problem by:
> - changing the parser to use an error state that can be detected
>   anywhere inside the productions, for example if we get an eof on
>   SOME_MACRO({ some block <eof>
> - previously we'd never break lines when we discovered a structural
>   error; now we break even in the case of a structural error if there
>   are two unwrapped lines within the same line; thus,
>   void f() { while (true) { g(); y(); } }
>   will still be re-formatted, even if there's missing braces somewhere
>   in the file
> - still exclude macro definitions from generating structural error;
>   macro definitions are inbalanced snippets
>
> Modified:
>     cfe/trunk/lib/Format/Format.cpp
>     cfe/trunk/lib/Format/UnwrappedLineParser.cpp
>     cfe/trunk/lib/Format/UnwrappedLineParser.h
>     cfe/trunk/unittests/Format/FormatTest.cpp
>
> Modified: cfe/trunk/lib/Format/Format.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=179379&r1=179378&r2=179379&view=diff
>
> ==============================================================================
> --- cfe/trunk/lib/Format/Format.cpp (original)
> +++ cfe/trunk/lib/Format/Format.cpp Fri Apr 12 09:13:36 2013
> @@ -459,7 +459,7 @@ public:
>    UnwrappedLineFormatter(const FormatStyle &Style, SourceManager
> &SourceMgr,
>                           const AnnotatedLine &Line, unsigned FirstIndent,
>                           const AnnotatedToken &RootToken,
> -                         WhitespaceManager &Whitespaces, bool
> StructuralError)
> +                         WhitespaceManager &Whitespaces)
>        : Style(Style), SourceMgr(SourceMgr), Line(Line),
>          FirstIndent(FirstIndent), RootToken(RootToken),
>          Whitespaces(Whitespaces), Count(0) {}
> @@ -1381,7 +1381,7 @@ public:
>    tooling::Replacements format() {
>      LexerBasedFormatTokenSource Tokens(Lex, SourceMgr);
>      UnwrappedLineParser Parser(Diag, Style, Tokens, *this);
> -    StructuralError = Parser.parse();
> +    bool StructuralError = Parser.parse();
>      unsigned PreviousEndOfLineColumn = 0;
>      TokenAnnotator Annotator(Style, SourceMgr, Lex,
>                               Tokens.getIdentTable().get("in"));
> @@ -1431,17 +1431,19 @@ public:
>          unsigned Indent = LevelIndent;
>          if (static_cast<int>(Indent) + Offset >= 0)
>            Indent += Offset;
> -        if (!FirstTok.WhiteSpaceStart.isValid() || StructuralError) {
> -          Indent = LevelIndent =
> -
>  SourceMgr.getSpellingColumnNumber(FirstTok.Tok.getLocation()) - 1;
> -        } else {
> +        if (FirstTok.WhiteSpaceStart.isValid() &&
> +            // Insert a break even if there is a structural error in case
> where
> +            // we break apart a line consisting of multiple unwrapped
> lines.
> +            (FirstTok.NewlinesBefore == 0 || !StructuralError)) {
>            formatFirstToken(TheLine.First, PreviousLineLastToken, Indent,
>                             TheLine.InPPDirective,
> PreviousEndOfLineColumn);
> +        } else {
> +          Indent = LevelIndent =
> +
>  SourceMgr.getSpellingColumnNumber(FirstTok.Tok.getLocation()) - 1;
>          }
>          tryFitMultipleLinesInOne(Indent, I, E);
>          UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine,
> Indent,
> -                                         TheLine.First, Whitespaces,
> -                                         StructuralError);
> +                                         TheLine.First, Whitespaces);
>          PreviousEndOfLineColumn =
>              Formatter.format(I + 1 != E ? &*(I + 1) : NULL);
>          IndentForLevel[TheLine.Level] = LevelIndent;
> @@ -1742,7 +1744,6 @@ private:
>    WhitespaceManager Whitespaces;
>    std::vector<CharSourceRange> Ranges;
>    std::vector<AnnotatedLine> AnnotatedLines;
> -  bool StructuralError;
>  };
>
>  tooling::Replacements
>
> Modified: cfe/trunk/lib/Format/UnwrappedLineParser.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.cpp?rev=179379&r1=179378&r2=179379&view=diff
>
> ==============================================================================
> --- cfe/trunk/lib/Format/UnwrappedLineParser.cpp (original)
> +++ cfe/trunk/lib/Format/UnwrappedLineParser.cpp Fri Apr 12 09:13:36 2013
> @@ -45,9 +45,11 @@ private:
>  class ScopedMacroState : public FormatTokenSource {
>  public:
>    ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
> -                   FormatToken &ResetToken)
> +                   FormatToken &ResetToken, bool &StructuralError)
>        : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
> -        PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource) {
> +        PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
> +        StructuralError(StructuralError),
> +        PreviousStructuralError(StructuralError) {
>      TokenSource = this;
>      Line.Level = 0;
>      Line.InPPDirective = true;
> @@ -58,6 +60,7 @@ public:
>      ResetToken = Token;
>      Line.InPPDirective = false;
>      Line.Level = PreviousLineLevel;
> +    StructuralError = PreviousStructuralError;
>    }
>
>    virtual FormatToken getNextToken() {
> @@ -85,6 +88,8 @@ private:
>    FormatToken &ResetToken;
>    unsigned PreviousLineLevel;
>    FormatTokenSource *PreviousTokenSource;
> +  bool &StructuralError;
> +  bool PreviousStructuralError;
>
>    FormatToken Token;
>  };
> @@ -124,13 +129,13 @@ UnwrappedLineParser::UnwrappedLineParser
>      clang::DiagnosticsEngine &Diag, const FormatStyle &Style,
>      FormatTokenSource &Tokens, UnwrappedLineConsumer &Callback)
>      : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
> -      CurrentLines(&Lines), Diag(Diag), Style(Style), Tokens(&Tokens),
> -      Callback(Callback) {}
> +      CurrentLines(&Lines), StructuralError(false), Diag(Diag),
> Style(Style),
> +      Tokens(&Tokens), Callback(Callback) {}
>
>  bool UnwrappedLineParser::parse() {
>    DEBUG(llvm::dbgs() << "----\n");
>    readToken();
> -  bool Error = parseFile();
> +  parseFile();
>    for (std::vector<UnwrappedLine>::iterator I = Lines.begin(), E =
> Lines.end();
>         I != E; ++I) {
>      Callback.consumeUnwrappedLine(*I);
> @@ -139,23 +144,20 @@ bool UnwrappedLineParser::parse() {
>    // Create line with eof token.
>    pushToken(FormatTok);
>    Callback.consumeUnwrappedLine(*Line);
> -
> -  return Error;
> +  return StructuralError;
>  }
>
> -bool UnwrappedLineParser::parseFile() {
> +void UnwrappedLineParser::parseFile() {
>    ScopedDeclarationState DeclarationState(
>        *Line, DeclarationScopeStack,
>        /*MustBeDeclaration=*/ !Line->InPPDirective);
> -  bool Error = parseLevel(/*HasOpeningBrace=*/ false);
> +  parseLevel(/*HasOpeningBrace=*/ false);
>    // Make sure to format the remaining tokens.
>    flushComments(true);
>    addUnwrappedLine();
> -  return Error;
>  }
>
> -bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
> -  bool Error = false;
> +void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
>    do {
>      switch (FormatTok.Tok.getKind()) {
>      case tok::comment:
> @@ -165,30 +167,27 @@ bool UnwrappedLineParser::parseLevel(boo
>      case tok::l_brace:
>        // FIXME: Add parameter whether this can happen - if this happens,
> we must
>        // be in a non-declaration context.
> -      Error |= parseBlock(/*MustBeDeclaration=*/ false);
> +      parseBlock(/*MustBeDeclaration=*/ false);
>        addUnwrappedLine();
>        break;
>      case tok::r_brace:
> -      if (HasOpeningBrace) {
> -        return false;
> -      } else {
> -        Diag.Report(FormatTok.Tok.getLocation(),
> -                    Diag.getCustomDiagID(clang::DiagnosticsEngine::Error,
> -                                         "unexpected '}'"));
> -        Error = true;
> -        nextToken();
> -        addUnwrappedLine();
> -      }
> +      if (HasOpeningBrace)
> +        return;
> +      Diag.Report(FormatTok.Tok.getLocation(),
> +                  Diag.getCustomDiagID(clang::DiagnosticsEngine::Error,
> +                                       "unexpected '}'"));
> +      StructuralError = true;
> +      nextToken();
> +      addUnwrappedLine();
>        break;
>      default:
>        parseStructuralElement();
>        break;
>      }
>    } while (!eof());
> -  return Error;
>  }
>
> -bool UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
> +void UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
>                                       unsigned AddLevels) {
>    assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected");
>    nextToken();
> @@ -202,17 +201,17 @@ bool UnwrappedLineParser::parseBlock(boo
>
>    if (!FormatTok.Tok.is(tok::r_brace)) {
>      Line->Level -= AddLevels;
> -    return true;
> +    StructuralError = true;
> +    return;
>    }
>
>    nextToken(); // Munch the closing brace.
>    Line->Level -= AddLevels;
> -  return false;
>  }
>
>  void UnwrappedLineParser::parsePPDirective() {
>    assert(FormatTok.Tok.is(tok::hash) && "'#' expected");
> -  ScopedMacroState MacroState(*Line, Tokens, FormatTok);
> +  ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError);
>    nextToken();
>
>    if (FormatTok.Tok.getIdentifierInfo() == NULL) {
>
> Modified: cfe/trunk/lib/Format/UnwrappedLineParser.h
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.h?rev=179379&r1=179378&r2=179379&view=diff
>
> ==============================================================================
> --- cfe/trunk/lib/Format/UnwrappedLineParser.h (original)
> +++ cfe/trunk/lib/Format/UnwrappedLineParser.h Fri Apr 12 09:13:36 2013
> @@ -125,9 +125,9 @@ public:
>    bool parse();
>
>  private:
> -  bool parseFile();
> -  bool parseLevel(bool HasOpeningBrace);
> -  bool parseBlock(bool MustBeDeclaration, unsigned AddLevels = 1);
> +  void parseFile();
> +  void parseLevel(bool HasOpeningBrace);
> +  void parseBlock(bool MustBeDeclaration, unsigned AddLevels = 1);
>    void parsePPDirective();
>    void parsePPDefine();
>    void parsePPUnknown();
> @@ -187,6 +187,10 @@ private:
>    // whether we are in a compound statement or not.
>    std::vector<bool> DeclarationScopeStack;
>
> +  // Will be true if we encounter an error that leads to possibily
> incorrect
> +  // indentation levels.
> +  bool StructuralError;
> +
>    clang::DiagnosticsEngine &Diag;
>    const FormatStyle &Style;
>    FormatTokenSource *Tokens;
>
> Modified: cfe/trunk/unittests/Format/FormatTest.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=179379&r1=179378&r2=179379&view=diff
>
> ==============================================================================
> --- cfe/trunk/unittests/Format/FormatTest.cpp (original)
> +++ cfe/trunk/unittests/Format/FormatTest.cpp Fri Apr 12 09:13:36 2013
> @@ -430,6 +430,7 @@ TEST_F(FormatTest, FormatsSwitchStatemen
>    verifyFormat("switch (x) {\n"
>                 "default: {\n"
>                 "  // Do nothing.\n"
> +               "}\n"
>                 "}");
>    verifyFormat("switch (x) {\n"
>                 "// comment\n"
> @@ -3563,8 +3564,18 @@ TEST_F(FormatTest, ObjCLiterals) {
>    verifyFormat("@{ @\"one\" : @1 }");
>    verifyFormat("return @{ @\"one\" : @1 };");
>    verifyFormat("@{ @\"one\" : @1, }");
> -  verifyFormat("@{ @\"one\" : @{ @2 : @1 } }");
> -  verifyFormat("@{ @\"one\" : @{ @2 : @1 }, }");
> +
> +  // FIXME: Breaking in cases where we think there's a structural error
> +  // showed that we're incorrectly parsing this code. We need to fix the
> +  // parsing here.
>

Any progress on this? This regressed working functionality.


> +  verifyFormat("@{ @\"one\" : @\n"
> +               "{ @2 : @1 }\n"
> +               "}");
> +  verifyFormat("@{ @\"one\" : @\n"
> +               "{ @2 : @1 }\n"
> +               ",\n"
> +               "}");
> +
>    verifyFormat("@{ 1 > 2 ? @\"one\" : @\"two\" : 1 > 2 ? @1 : @2 }");
>    verifyFormat("[self setDict:@{}");
>    verifyFormat("[self setDict:@{ @1 : @2 }");
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20130525/43ca81c1/attachment.html>


More information about the cfe-commits mailing list