r179379 - Revamps structural error detection / handling.
Manuel Klimek
klimek at google.com
Sun May 26 23:05:10 PDT 2013
On Sun, May 26, 2013 at 7:45 AM, Nico Weber <thakis at chromium.org> wrote:
> On Fri, Apr 12, 2013 at 7:13 AM, Manuel Klimek <klimek at google.com> wrote:
>
>> Author: klimek
>> Date: Fri Apr 12 09:13:36 2013
>> New Revision: 179379
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=179379&view=rev
>> Log:
>> Revamps structural error detection / handling.
>>
>> Previously we'd only detect structural errors on the very first level.
>> This leads to incorrectly balanced braces not being discovered, and thus
>> incorrect indentation.
>>
>> This change fixes the problem by:
>> - changing the parser to use an error state that can be detected
>> anywhere inside the productions, for example if we get an eof on
>> SOME_MACRO({ some block <eof>
>> - previously we'd never break lines when we discovered a structural
>> error; now we break even in the case of a structural error if there
>> are two unwrapped lines within the same line; thus,
>> void f() { while (true) { g(); y(); } }
>> will still be re-formatted, even if there's missing braces somewhere
>> in the file
>> - still exclude macro definitions from generating structural error;
>> macro definitions are inbalanced snippets
>>
>> Modified:
>> cfe/trunk/lib/Format/Format.cpp
>> cfe/trunk/lib/Format/UnwrappedLineParser.cpp
>> cfe/trunk/lib/Format/UnwrappedLineParser.h
>> cfe/trunk/unittests/Format/FormatTest.cpp
>>
>> Modified: cfe/trunk/lib/Format/Format.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=179379&r1=179378&r2=179379&view=diff
>>
>> ==============================================================================
>> --- cfe/trunk/lib/Format/Format.cpp (original)
>> +++ cfe/trunk/lib/Format/Format.cpp Fri Apr 12 09:13:36 2013
>> @@ -459,7 +459,7 @@ public:
>> UnwrappedLineFormatter(const FormatStyle &Style, SourceManager
>> &SourceMgr,
>> const AnnotatedLine &Line, unsigned FirstIndent,
>> const AnnotatedToken &RootToken,
>> - WhitespaceManager &Whitespaces, bool
>> StructuralError)
>> + WhitespaceManager &Whitespaces)
>> : Style(Style), SourceMgr(SourceMgr), Line(Line),
>> FirstIndent(FirstIndent), RootToken(RootToken),
>> Whitespaces(Whitespaces), Count(0) {}
>> @@ -1381,7 +1381,7 @@ public:
>> tooling::Replacements format() {
>> LexerBasedFormatTokenSource Tokens(Lex, SourceMgr);
>> UnwrappedLineParser Parser(Diag, Style, Tokens, *this);
>> - StructuralError = Parser.parse();
>> + bool StructuralError = Parser.parse();
>> unsigned PreviousEndOfLineColumn = 0;
>> TokenAnnotator Annotator(Style, SourceMgr, Lex,
>> Tokens.getIdentTable().get("in"));
>> @@ -1431,17 +1431,19 @@ public:
>> unsigned Indent = LevelIndent;
>> if (static_cast<int>(Indent) + Offset >= 0)
>> Indent += Offset;
>> - if (!FirstTok.WhiteSpaceStart.isValid() || StructuralError) {
>> - Indent = LevelIndent =
>> -
>> SourceMgr.getSpellingColumnNumber(FirstTok.Tok.getLocation()) - 1;
>> - } else {
>> + if (FirstTok.WhiteSpaceStart.isValid() &&
>> + // Insert a break even if there is a structural error in
>> case where
>> + // we break apart a line consisting of multiple unwrapped
>> lines.
>> + (FirstTok.NewlinesBefore == 0 || !StructuralError)) {
>> formatFirstToken(TheLine.First, PreviousLineLastToken, Indent,
>> TheLine.InPPDirective,
>> PreviousEndOfLineColumn);
>> + } else {
>> + Indent = LevelIndent =
>> +
>> SourceMgr.getSpellingColumnNumber(FirstTok.Tok.getLocation()) - 1;
>> }
>> tryFitMultipleLinesInOne(Indent, I, E);
>> UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine,
>> Indent,
>> - TheLine.First, Whitespaces,
>> - StructuralError);
>> + TheLine.First, Whitespaces);
>> PreviousEndOfLineColumn =
>> Formatter.format(I + 1 != E ? &*(I + 1) : NULL);
>> IndentForLevel[TheLine.Level] = LevelIndent;
>> @@ -1742,7 +1744,6 @@ private:
>> WhitespaceManager Whitespaces;
>> std::vector<CharSourceRange> Ranges;
>> std::vector<AnnotatedLine> AnnotatedLines;
>> - bool StructuralError;
>> };
>>
>> tooling::Replacements
>>
>> Modified: cfe/trunk/lib/Format/UnwrappedLineParser.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.cpp?rev=179379&r1=179378&r2=179379&view=diff
>>
>> ==============================================================================
>> --- cfe/trunk/lib/Format/UnwrappedLineParser.cpp (original)
>> +++ cfe/trunk/lib/Format/UnwrappedLineParser.cpp Fri Apr 12 09:13:36 2013
>> @@ -45,9 +45,11 @@ private:
>> class ScopedMacroState : public FormatTokenSource {
>> public:
>> ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
>> - FormatToken &ResetToken)
>> + FormatToken &ResetToken, bool &StructuralError)
>> : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
>> - PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource) {
>> + PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
>> + StructuralError(StructuralError),
>> + PreviousStructuralError(StructuralError) {
>> TokenSource = this;
>> Line.Level = 0;
>> Line.InPPDirective = true;
>> @@ -58,6 +60,7 @@ public:
>> ResetToken = Token;
>> Line.InPPDirective = false;
>> Line.Level = PreviousLineLevel;
>> + StructuralError = PreviousStructuralError;
>> }
>>
>> virtual FormatToken getNextToken() {
>> @@ -85,6 +88,8 @@ private:
>> FormatToken &ResetToken;
>> unsigned PreviousLineLevel;
>> FormatTokenSource *PreviousTokenSource;
>> + bool &StructuralError;
>> + bool PreviousStructuralError;
>>
>> FormatToken Token;
>> };
>> @@ -124,13 +129,13 @@ UnwrappedLineParser::UnwrappedLineParser
>> clang::DiagnosticsEngine &Diag, const FormatStyle &Style,
>> FormatTokenSource &Tokens, UnwrappedLineConsumer &Callback)
>> : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
>> - CurrentLines(&Lines), Diag(Diag), Style(Style), Tokens(&Tokens),
>> - Callback(Callback) {}
>> + CurrentLines(&Lines), StructuralError(false), Diag(Diag),
>> Style(Style),
>> + Tokens(&Tokens), Callback(Callback) {}
>>
>> bool UnwrappedLineParser::parse() {
>> DEBUG(llvm::dbgs() << "----\n");
>> readToken();
>> - bool Error = parseFile();
>> + parseFile();
>> for (std::vector<UnwrappedLine>::iterator I = Lines.begin(), E =
>> Lines.end();
>> I != E; ++I) {
>> Callback.consumeUnwrappedLine(*I);
>> @@ -139,23 +144,20 @@ bool UnwrappedLineParser::parse() {
>> // Create line with eof token.
>> pushToken(FormatTok);
>> Callback.consumeUnwrappedLine(*Line);
>> -
>> - return Error;
>> + return StructuralError;
>> }
>>
>> -bool UnwrappedLineParser::parseFile() {
>> +void UnwrappedLineParser::parseFile() {
>> ScopedDeclarationState DeclarationState(
>> *Line, DeclarationScopeStack,
>> /*MustBeDeclaration=*/ !Line->InPPDirective);
>> - bool Error = parseLevel(/*HasOpeningBrace=*/ false);
>> + parseLevel(/*HasOpeningBrace=*/ false);
>> // Make sure to format the remaining tokens.
>> flushComments(true);
>> addUnwrappedLine();
>> - return Error;
>> }
>>
>> -bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
>> - bool Error = false;
>> +void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
>> do {
>> switch (FormatTok.Tok.getKind()) {
>> case tok::comment:
>> @@ -165,30 +167,27 @@ bool UnwrappedLineParser::parseLevel(boo
>> case tok::l_brace:
>> // FIXME: Add parameter whether this can happen - if this happens,
>> we must
>> // be in a non-declaration context.
>> - Error |= parseBlock(/*MustBeDeclaration=*/ false);
>> + parseBlock(/*MustBeDeclaration=*/ false);
>> addUnwrappedLine();
>> break;
>> case tok::r_brace:
>> - if (HasOpeningBrace) {
>> - return false;
>> - } else {
>> - Diag.Report(FormatTok.Tok.getLocation(),
>> - Diag.getCustomDiagID(clang::DiagnosticsEngine::Error,
>> - "unexpected '}'"));
>> - Error = true;
>> - nextToken();
>> - addUnwrappedLine();
>> - }
>> + if (HasOpeningBrace)
>> + return;
>> + Diag.Report(FormatTok.Tok.getLocation(),
>> + Diag.getCustomDiagID(clang::DiagnosticsEngine::Error,
>> + "unexpected '}'"));
>> + StructuralError = true;
>> + nextToken();
>> + addUnwrappedLine();
>> break;
>> default:
>> parseStructuralElement();
>> break;
>> }
>> } while (!eof());
>> - return Error;
>> }
>>
>> -bool UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
>> +void UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
>> unsigned AddLevels) {
>> assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected");
>> nextToken();
>> @@ -202,17 +201,17 @@ bool UnwrappedLineParser::parseBlock(boo
>>
>> if (!FormatTok.Tok.is(tok::r_brace)) {
>> Line->Level -= AddLevels;
>> - return true;
>> + StructuralError = true;
>> + return;
>> }
>>
>> nextToken(); // Munch the closing brace.
>> Line->Level -= AddLevels;
>> - return false;
>> }
>>
>> void UnwrappedLineParser::parsePPDirective() {
>> assert(FormatTok.Tok.is(tok::hash) && "'#' expected");
>> - ScopedMacroState MacroState(*Line, Tokens, FormatTok);
>> + ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError);
>> nextToken();
>>
>> if (FormatTok.Tok.getIdentifierInfo() == NULL) {
>>
>> Modified: cfe/trunk/lib/Format/UnwrappedLineParser.h
>> URL:
>> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.h?rev=179379&r1=179378&r2=179379&view=diff
>>
>> ==============================================================================
>> --- cfe/trunk/lib/Format/UnwrappedLineParser.h (original)
>> +++ cfe/trunk/lib/Format/UnwrappedLineParser.h Fri Apr 12 09:13:36 2013
>> @@ -125,9 +125,9 @@ public:
>> bool parse();
>>
>> private:
>> - bool parseFile();
>> - bool parseLevel(bool HasOpeningBrace);
>> - bool parseBlock(bool MustBeDeclaration, unsigned AddLevels = 1);
>> + void parseFile();
>> + void parseLevel(bool HasOpeningBrace);
>> + void parseBlock(bool MustBeDeclaration, unsigned AddLevels = 1);
>> void parsePPDirective();
>> void parsePPDefine();
>> void parsePPUnknown();
>> @@ -187,6 +187,10 @@ private:
>> // whether we are in a compound statement or not.
>> std::vector<bool> DeclarationScopeStack;
>>
>> + // Will be true if we encounter an error that leads to possibily
>> incorrect
>> + // indentation levels.
>> + bool StructuralError;
>> +
>> clang::DiagnosticsEngine &Diag;
>> const FormatStyle &Style;
>> FormatTokenSource *Tokens;
>>
>> Modified: cfe/trunk/unittests/Format/FormatTest.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=179379&r1=179378&r2=179379&view=diff
>>
>> ==============================================================================
>> --- cfe/trunk/unittests/Format/FormatTest.cpp (original)
>> +++ cfe/trunk/unittests/Format/FormatTest.cpp Fri Apr 12 09:13:36 2013
>> @@ -430,6 +430,7 @@ TEST_F(FormatTest, FormatsSwitchStatemen
>> verifyFormat("switch (x) {\n"
>> "default: {\n"
>> " // Do nothing.\n"
>> + "}\n"
>> "}");
>> verifyFormat("switch (x) {\n"
>> "// comment\n"
>> @@ -3563,8 +3564,18 @@ TEST_F(FormatTest, ObjCLiterals) {
>> verifyFormat("@{ @\"one\" : @1 }");
>> verifyFormat("return @{ @\"one\" : @1 };");
>> verifyFormat("@{ @\"one\" : @1, }");
>> - verifyFormat("@{ @\"one\" : @{ @2 : @1 } }");
>> - verifyFormat("@{ @\"one\" : @{ @2 : @1 }, }");
>> +
>> + // FIXME: Breaking in cases where we think there's a structural error
>> + // showed that we're incorrectly parsing this code. We need to fix the
>> + // parsing here.
>>
>
> Any progress on this? This regressed working functionality.
>
Unfortunately I have no idea about Obj-C and a short search didn't get me a
BNS that explained what can be done here... One question is: do we really
need to special case every identifier with an "@" in front of it, or is
there a mode where "@{" for example gets output as a single token (or is
there a fundamental concept for @ in the language that I need to understand
first?)
Thx,
/Manuel
>
>
>> + verifyFormat("@{ @\"one\" : @\n"
>> + "{ @2 : @1 }\n"
>> + "}");
>> + verifyFormat("@{ @\"one\" : @\n"
>> + "{ @2 : @1 }\n"
>> + ",\n"
>> + "}");
>> +
>> verifyFormat("@{ 1 > 2 ? @\"one\" : @\"two\" : 1 > 2 ? @1 : @2 }");
>> verifyFormat("[self setDict:@{}");
>> verifyFormat("[self setDict:@{ @1 : @2 }");
>>
>>
>> _______________________________________________
>> cfe-commits mailing list
>> cfe-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
>>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20130527/0036c12b/attachment.html>
More information about the cfe-commits
mailing list