r179379 - Revamps structural error detection / handling.
Daniel Jasper
djasper at google.com
Tue May 28 11:51:26 PDT 2013
This might work again with r182788. Please give it a try.
On Mon, May 27, 2013 at 8:05 AM, Manuel Klimek <klimek at google.com> wrote:
> On Sun, May 26, 2013 at 7:45 AM, Nico Weber <thakis at chromium.org> wrote:
>
>> On Fri, Apr 12, 2013 at 7:13 AM, Manuel Klimek <klimek at google.com> wrote:
>>
>>> Author: klimek
>>> Date: Fri Apr 12 09:13:36 2013
>>> New Revision: 179379
>>>
>>> URL: http://llvm.org/viewvc/llvm-project?rev=179379&view=rev
>>> Log:
>>> Revamps structural error detection / handling.
>>>
>>> Previously we'd only detect structural errors on the very first level.
>>> This leads to incorrectly balanced braces not being discovered, and thus
>>> incorrect indentation.
>>>
>>> This change fixes the problem by:
>>> - changing the parser to use an error state that can be detected
>>> anywhere inside the productions, for example if we get an eof on
>>> SOME_MACRO({ some block <eof>
>>> - previously we'd never break lines when we discovered a structural
>>> error; now we break even in the case of a structural error if there
>>> are two unwrapped lines within the same line; thus,
>>> void f() { while (true) { g(); y(); } }
>>> will still be re-formatted, even if there's missing braces somewhere
>>> in the file
>>> - still exclude macro definitions from generating structural error;
>>> macro definitions are inbalanced snippets
>>>
>>> Modified:
>>> cfe/trunk/lib/Format/Format.cpp
>>> cfe/trunk/lib/Format/UnwrappedLineParser.cpp
>>> cfe/trunk/lib/Format/UnwrappedLineParser.h
>>> cfe/trunk/unittests/Format/FormatTest.cpp
>>>
>>> Modified: cfe/trunk/lib/Format/Format.cpp
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=179379&r1=179378&r2=179379&view=diff
>>>
>>> ==============================================================================
>>> --- cfe/trunk/lib/Format/Format.cpp (original)
>>> +++ cfe/trunk/lib/Format/Format.cpp Fri Apr 12 09:13:36 2013
>>> @@ -459,7 +459,7 @@ public:
>>> UnwrappedLineFormatter(const FormatStyle &Style, SourceManager
>>> &SourceMgr,
>>> const AnnotatedLine &Line, unsigned
>>> FirstIndent,
>>> const AnnotatedToken &RootToken,
>>> - WhitespaceManager &Whitespaces, bool
>>> StructuralError)
>>> + WhitespaceManager &Whitespaces)
>>> : Style(Style), SourceMgr(SourceMgr), Line(Line),
>>> FirstIndent(FirstIndent), RootToken(RootToken),
>>> Whitespaces(Whitespaces), Count(0) {}
>>> @@ -1381,7 +1381,7 @@ public:
>>> tooling::Replacements format() {
>>> LexerBasedFormatTokenSource Tokens(Lex, SourceMgr);
>>> UnwrappedLineParser Parser(Diag, Style, Tokens, *this);
>>> - StructuralError = Parser.parse();
>>> + bool StructuralError = Parser.parse();
>>> unsigned PreviousEndOfLineColumn = 0;
>>> TokenAnnotator Annotator(Style, SourceMgr, Lex,
>>> Tokens.getIdentTable().get("in"));
>>> @@ -1431,17 +1431,19 @@ public:
>>> unsigned Indent = LevelIndent;
>>> if (static_cast<int>(Indent) + Offset >= 0)
>>> Indent += Offset;
>>> - if (!FirstTok.WhiteSpaceStart.isValid() || StructuralError) {
>>> - Indent = LevelIndent =
>>> -
>>> SourceMgr.getSpellingColumnNumber(FirstTok.Tok.getLocation()) - 1;
>>> - } else {
>>> + if (FirstTok.WhiteSpaceStart.isValid() &&
>>> + // Insert a break even if there is a structural error in
>>> case where
>>> + // we break apart a line consisting of multiple unwrapped
>>> lines.
>>> + (FirstTok.NewlinesBefore == 0 || !StructuralError)) {
>>> formatFirstToken(TheLine.First, PreviousLineLastToken, Indent,
>>> TheLine.InPPDirective,
>>> PreviousEndOfLineColumn);
>>> + } else {
>>> + Indent = LevelIndent =
>>> +
>>> SourceMgr.getSpellingColumnNumber(FirstTok.Tok.getLocation()) - 1;
>>> }
>>> tryFitMultipleLinesInOne(Indent, I, E);
>>> UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine,
>>> Indent,
>>> - TheLine.First, Whitespaces,
>>> - StructuralError);
>>> + TheLine.First, Whitespaces);
>>> PreviousEndOfLineColumn =
>>> Formatter.format(I + 1 != E ? &*(I + 1) : NULL);
>>> IndentForLevel[TheLine.Level] = LevelIndent;
>>> @@ -1742,7 +1744,6 @@ private:
>>> WhitespaceManager Whitespaces;
>>> std::vector<CharSourceRange> Ranges;
>>> std::vector<AnnotatedLine> AnnotatedLines;
>>> - bool StructuralError;
>>> };
>>>
>>> tooling::Replacements
>>>
>>> Modified: cfe/trunk/lib/Format/UnwrappedLineParser.cpp
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.cpp?rev=179379&r1=179378&r2=179379&view=diff
>>>
>>> ==============================================================================
>>> --- cfe/trunk/lib/Format/UnwrappedLineParser.cpp (original)
>>> +++ cfe/trunk/lib/Format/UnwrappedLineParser.cpp Fri Apr 12 09:13:36 2013
>>> @@ -45,9 +45,11 @@ private:
>>> class ScopedMacroState : public FormatTokenSource {
>>> public:
>>> ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
>>> - FormatToken &ResetToken)
>>> + FormatToken &ResetToken, bool &StructuralError)
>>> : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
>>> - PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource)
>>> {
>>> + PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
>>> + StructuralError(StructuralError),
>>> + PreviousStructuralError(StructuralError) {
>>> TokenSource = this;
>>> Line.Level = 0;
>>> Line.InPPDirective = true;
>>> @@ -58,6 +60,7 @@ public:
>>> ResetToken = Token;
>>> Line.InPPDirective = false;
>>> Line.Level = PreviousLineLevel;
>>> + StructuralError = PreviousStructuralError;
>>> }
>>>
>>> virtual FormatToken getNextToken() {
>>> @@ -85,6 +88,8 @@ private:
>>> FormatToken &ResetToken;
>>> unsigned PreviousLineLevel;
>>> FormatTokenSource *PreviousTokenSource;
>>> + bool &StructuralError;
>>> + bool PreviousStructuralError;
>>>
>>> FormatToken Token;
>>> };
>>> @@ -124,13 +129,13 @@ UnwrappedLineParser::UnwrappedLineParser
>>> clang::DiagnosticsEngine &Diag, const FormatStyle &Style,
>>> FormatTokenSource &Tokens, UnwrappedLineConsumer &Callback)
>>> : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
>>> - CurrentLines(&Lines), Diag(Diag), Style(Style), Tokens(&Tokens),
>>> - Callback(Callback) {}
>>> + CurrentLines(&Lines), StructuralError(false), Diag(Diag),
>>> Style(Style),
>>> + Tokens(&Tokens), Callback(Callback) {}
>>>
>>> bool UnwrappedLineParser::parse() {
>>> DEBUG(llvm::dbgs() << "----\n");
>>> readToken();
>>> - bool Error = parseFile();
>>> + parseFile();
>>> for (std::vector<UnwrappedLine>::iterator I = Lines.begin(), E =
>>> Lines.end();
>>> I != E; ++I) {
>>> Callback.consumeUnwrappedLine(*I);
>>> @@ -139,23 +144,20 @@ bool UnwrappedLineParser::parse() {
>>> // Create line with eof token.
>>> pushToken(FormatTok);
>>> Callback.consumeUnwrappedLine(*Line);
>>> -
>>> - return Error;
>>> + return StructuralError;
>>> }
>>>
>>> -bool UnwrappedLineParser::parseFile() {
>>> +void UnwrappedLineParser::parseFile() {
>>> ScopedDeclarationState DeclarationState(
>>> *Line, DeclarationScopeStack,
>>> /*MustBeDeclaration=*/ !Line->InPPDirective);
>>> - bool Error = parseLevel(/*HasOpeningBrace=*/ false);
>>> + parseLevel(/*HasOpeningBrace=*/ false);
>>> // Make sure to format the remaining tokens.
>>> flushComments(true);
>>> addUnwrappedLine();
>>> - return Error;
>>> }
>>>
>>> -bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
>>> - bool Error = false;
>>> +void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
>>> do {
>>> switch (FormatTok.Tok.getKind()) {
>>> case tok::comment:
>>> @@ -165,30 +167,27 @@ bool UnwrappedLineParser::parseLevel(boo
>>> case tok::l_brace:
>>> // FIXME: Add parameter whether this can happen - if this
>>> happens, we must
>>> // be in a non-declaration context.
>>> - Error |= parseBlock(/*MustBeDeclaration=*/ false);
>>> + parseBlock(/*MustBeDeclaration=*/ false);
>>> addUnwrappedLine();
>>> break;
>>> case tok::r_brace:
>>> - if (HasOpeningBrace) {
>>> - return false;
>>> - } else {
>>> - Diag.Report(FormatTok.Tok.getLocation(),
>>> -
>>> Diag.getCustomDiagID(clang::DiagnosticsEngine::Error,
>>> - "unexpected '}'"));
>>> - Error = true;
>>> - nextToken();
>>> - addUnwrappedLine();
>>> - }
>>> + if (HasOpeningBrace)
>>> + return;
>>> + Diag.Report(FormatTok.Tok.getLocation(),
>>> + Diag.getCustomDiagID(clang::DiagnosticsEngine::Error,
>>> + "unexpected '}'"));
>>> + StructuralError = true;
>>> + nextToken();
>>> + addUnwrappedLine();
>>> break;
>>> default:
>>> parseStructuralElement();
>>> break;
>>> }
>>> } while (!eof());
>>> - return Error;
>>> }
>>>
>>> -bool UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
>>> +void UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
>>> unsigned AddLevels) {
>>> assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected");
>>> nextToken();
>>> @@ -202,17 +201,17 @@ bool UnwrappedLineParser::parseBlock(boo
>>>
>>> if (!FormatTok.Tok.is(tok::r_brace)) {
>>> Line->Level -= AddLevels;
>>> - return true;
>>> + StructuralError = true;
>>> + return;
>>> }
>>>
>>> nextToken(); // Munch the closing brace.
>>> Line->Level -= AddLevels;
>>> - return false;
>>> }
>>>
>>> void UnwrappedLineParser::parsePPDirective() {
>>> assert(FormatTok.Tok.is(tok::hash) && "'#' expected");
>>> - ScopedMacroState MacroState(*Line, Tokens, FormatTok);
>>> + ScopedMacroState MacroState(*Line, Tokens, FormatTok,
>>> StructuralError);
>>> nextToken();
>>>
>>> if (FormatTok.Tok.getIdentifierInfo() == NULL) {
>>>
>>> Modified: cfe/trunk/lib/Format/UnwrappedLineParser.h
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.h?rev=179379&r1=179378&r2=179379&view=diff
>>>
>>> ==============================================================================
>>> --- cfe/trunk/lib/Format/UnwrappedLineParser.h (original)
>>> +++ cfe/trunk/lib/Format/UnwrappedLineParser.h Fri Apr 12 09:13:36 2013
>>> @@ -125,9 +125,9 @@ public:
>>> bool parse();
>>>
>>> private:
>>> - bool parseFile();
>>> - bool parseLevel(bool HasOpeningBrace);
>>> - bool parseBlock(bool MustBeDeclaration, unsigned AddLevels = 1);
>>> + void parseFile();
>>> + void parseLevel(bool HasOpeningBrace);
>>> + void parseBlock(bool MustBeDeclaration, unsigned AddLevels = 1);
>>> void parsePPDirective();
>>> void parsePPDefine();
>>> void parsePPUnknown();
>>> @@ -187,6 +187,10 @@ private:
>>> // whether we are in a compound statement or not.
>>> std::vector<bool> DeclarationScopeStack;
>>>
>>> + // Will be true if we encounter an error that leads to possibily
>>> incorrect
>>> + // indentation levels.
>>> + bool StructuralError;
>>> +
>>> clang::DiagnosticsEngine &Diag;
>>> const FormatStyle &Style;
>>> FormatTokenSource *Tokens;
>>>
>>> Modified: cfe/trunk/unittests/Format/FormatTest.cpp
>>> URL:
>>> http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=179379&r1=179378&r2=179379&view=diff
>>>
>>> ==============================================================================
>>> --- cfe/trunk/unittests/Format/FormatTest.cpp (original)
>>> +++ cfe/trunk/unittests/Format/FormatTest.cpp Fri Apr 12 09:13:36 2013
>>> @@ -430,6 +430,7 @@ TEST_F(FormatTest, FormatsSwitchStatemen
>>> verifyFormat("switch (x) {\n"
>>> "default: {\n"
>>> " // Do nothing.\n"
>>> + "}\n"
>>> "}");
>>> verifyFormat("switch (x) {\n"
>>> "// comment\n"
>>> @@ -3563,8 +3564,18 @@ TEST_F(FormatTest, ObjCLiterals) {
>>> verifyFormat("@{ @\"one\" : @1 }");
>>> verifyFormat("return @{ @\"one\" : @1 };");
>>> verifyFormat("@{ @\"one\" : @1, }");
>>> - verifyFormat("@{ @\"one\" : @{ @2 : @1 } }");
>>> - verifyFormat("@{ @\"one\" : @{ @2 : @1 }, }");
>>> +
>>> + // FIXME: Breaking in cases where we think there's a structural error
>>> + // showed that we're incorrectly parsing this code. We need to fix the
>>> + // parsing here.
>>>
>>
>> Any progress on this? This regressed working functionality.
>>
>
> Unfortunately I have no idea about Obj-C and a short search didn't get me
> a BNS that explained what can be done here... One question is: do we really
> need to special case every identifier with an "@" in front of it, or is
> there a mode where "@{" for example gets output as a single token (or is
> there a fundamental concept for @ in the language that I need to understand
> first?)
>
> Thx,
> /Manuel
>
>
>>
>>
>>> + verifyFormat("@{ @\"one\" : @\n"
>>> + "{ @2 : @1 }\n"
>>> + "}");
>>> + verifyFormat("@{ @\"one\" : @\n"
>>> + "{ @2 : @1 }\n"
>>> + ",\n"
>>> + "}");
>>> +
>>> verifyFormat("@{ 1 > 2 ? @\"one\" : @\"two\" : 1 > 2 ? @1 : @2 }");
>>> verifyFormat("[self setDict:@{}");
>>> verifyFormat("[self setDict:@{ @1 : @2 }");
>>>
>>>
>>> _______________________________________________
>>> cfe-commits mailing list
>>> cfe-commits at cs.uiuc.edu
>>> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
>>>
>>
>>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20130528/da060d4b/attachment.html>
More information about the cfe-commits
mailing list