r179379 - Revamps structural error detection / handling.

Manuel Klimek klimek at google.com
Fri Apr 12 07:13:36 PDT 2013


Author: klimek
Date: Fri Apr 12 09:13:36 2013
New Revision: 179379

URL: http://llvm.org/viewvc/llvm-project?rev=179379&view=rev
Log:
Revamps structural error detection / handling.

Previously we'd only detect structural errors on the very first level.
This leads to incorrectly balanced braces not being discovered, and thus
incorrect indentation.

This change fixes the problem by:
- changing the parser to use an error state that can be detected
  anywhere inside the productions, for example if we get an eof on
  SOME_MACRO({ some block <eof>
- previously we'd never break lines when we discovered a structural
  error; now we break even in the case of a structural error if there
  are two unwrapped lines within the same line; thus,
  void f() { while (true) { g(); y(); } }
  will still be re-formatted, even if there's missing braces somewhere
  in the file
- still exclude macro definitions from generating structural error;
  macro definitions are inbalanced snippets

Modified:
    cfe/trunk/lib/Format/Format.cpp
    cfe/trunk/lib/Format/UnwrappedLineParser.cpp
    cfe/trunk/lib/Format/UnwrappedLineParser.h
    cfe/trunk/unittests/Format/FormatTest.cpp

Modified: cfe/trunk/lib/Format/Format.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=179379&r1=179378&r2=179379&view=diff
==============================================================================
--- cfe/trunk/lib/Format/Format.cpp (original)
+++ cfe/trunk/lib/Format/Format.cpp Fri Apr 12 09:13:36 2013
@@ -459,7 +459,7 @@ public:
   UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr,
                          const AnnotatedLine &Line, unsigned FirstIndent,
                          const AnnotatedToken &RootToken,
-                         WhitespaceManager &Whitespaces, bool StructuralError)
+                         WhitespaceManager &Whitespaces)
       : Style(Style), SourceMgr(SourceMgr), Line(Line),
         FirstIndent(FirstIndent), RootToken(RootToken),
         Whitespaces(Whitespaces), Count(0) {}
@@ -1381,7 +1381,7 @@ public:
   tooling::Replacements format() {
     LexerBasedFormatTokenSource Tokens(Lex, SourceMgr);
     UnwrappedLineParser Parser(Diag, Style, Tokens, *this);
-    StructuralError = Parser.parse();
+    bool StructuralError = Parser.parse();
     unsigned PreviousEndOfLineColumn = 0;
     TokenAnnotator Annotator(Style, SourceMgr, Lex,
                              Tokens.getIdentTable().get("in"));
@@ -1431,17 +1431,19 @@ public:
         unsigned Indent = LevelIndent;
         if (static_cast<int>(Indent) + Offset >= 0)
           Indent += Offset;
-        if (!FirstTok.WhiteSpaceStart.isValid() || StructuralError) {
-          Indent = LevelIndent =
-              SourceMgr.getSpellingColumnNumber(FirstTok.Tok.getLocation()) - 1;
-        } else {
+        if (FirstTok.WhiteSpaceStart.isValid() &&
+            // Insert a break even if there is a structural error in case where
+            // we break apart a line consisting of multiple unwrapped lines.
+            (FirstTok.NewlinesBefore == 0 || !StructuralError)) {
           formatFirstToken(TheLine.First, PreviousLineLastToken, Indent,
                            TheLine.InPPDirective, PreviousEndOfLineColumn);
+        } else {
+          Indent = LevelIndent =
+              SourceMgr.getSpellingColumnNumber(FirstTok.Tok.getLocation()) - 1;
         }
         tryFitMultipleLinesInOne(Indent, I, E);
         UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent,
-                                         TheLine.First, Whitespaces,
-                                         StructuralError);
+                                         TheLine.First, Whitespaces);
         PreviousEndOfLineColumn =
             Formatter.format(I + 1 != E ? &*(I + 1) : NULL);
         IndentForLevel[TheLine.Level] = LevelIndent;
@@ -1742,7 +1744,6 @@ private:
   WhitespaceManager Whitespaces;
   std::vector<CharSourceRange> Ranges;
   std::vector<AnnotatedLine> AnnotatedLines;
-  bool StructuralError;
 };
 
 tooling::Replacements

Modified: cfe/trunk/lib/Format/UnwrappedLineParser.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.cpp?rev=179379&r1=179378&r2=179379&view=diff
==============================================================================
--- cfe/trunk/lib/Format/UnwrappedLineParser.cpp (original)
+++ cfe/trunk/lib/Format/UnwrappedLineParser.cpp Fri Apr 12 09:13:36 2013
@@ -45,9 +45,11 @@ private:
 class ScopedMacroState : public FormatTokenSource {
 public:
   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
-                   FormatToken &ResetToken)
+                   FormatToken &ResetToken, bool &StructuralError)
       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
-        PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource) {
+        PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
+        StructuralError(StructuralError),
+        PreviousStructuralError(StructuralError) {
     TokenSource = this;
     Line.Level = 0;
     Line.InPPDirective = true;
@@ -58,6 +60,7 @@ public:
     ResetToken = Token;
     Line.InPPDirective = false;
     Line.Level = PreviousLineLevel;
+    StructuralError = PreviousStructuralError;
   }
 
   virtual FormatToken getNextToken() {
@@ -85,6 +88,8 @@ private:
   FormatToken &ResetToken;
   unsigned PreviousLineLevel;
   FormatTokenSource *PreviousTokenSource;
+  bool &StructuralError;
+  bool PreviousStructuralError;
 
   FormatToken Token;
 };
@@ -124,13 +129,13 @@ UnwrappedLineParser::UnwrappedLineParser
     clang::DiagnosticsEngine &Diag, const FormatStyle &Style,
     FormatTokenSource &Tokens, UnwrappedLineConsumer &Callback)
     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
-      CurrentLines(&Lines), Diag(Diag), Style(Style), Tokens(&Tokens),
-      Callback(Callback) {}
+      CurrentLines(&Lines), StructuralError(false), Diag(Diag), Style(Style),
+      Tokens(&Tokens), Callback(Callback) {}
 
 bool UnwrappedLineParser::parse() {
   DEBUG(llvm::dbgs() << "----\n");
   readToken();
-  bool Error = parseFile();
+  parseFile();
   for (std::vector<UnwrappedLine>::iterator I = Lines.begin(), E = Lines.end();
        I != E; ++I) {
     Callback.consumeUnwrappedLine(*I);
@@ -139,23 +144,20 @@ bool UnwrappedLineParser::parse() {
   // Create line with eof token.
   pushToken(FormatTok);
   Callback.consumeUnwrappedLine(*Line);
-
-  return Error;
+  return StructuralError;
 }
 
-bool UnwrappedLineParser::parseFile() {
+void UnwrappedLineParser::parseFile() {
   ScopedDeclarationState DeclarationState(
       *Line, DeclarationScopeStack,
       /*MustBeDeclaration=*/ !Line->InPPDirective);
-  bool Error = parseLevel(/*HasOpeningBrace=*/ false);
+  parseLevel(/*HasOpeningBrace=*/ false);
   // Make sure to format the remaining tokens.
   flushComments(true);
   addUnwrappedLine();
-  return Error;
 }
 
-bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
-  bool Error = false;
+void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
   do {
     switch (FormatTok.Tok.getKind()) {
     case tok::comment:
@@ -165,30 +167,27 @@ bool UnwrappedLineParser::parseLevel(boo
     case tok::l_brace:
       // FIXME: Add parameter whether this can happen - if this happens, we must
       // be in a non-declaration context.
-      Error |= parseBlock(/*MustBeDeclaration=*/ false);
+      parseBlock(/*MustBeDeclaration=*/ false);
       addUnwrappedLine();
       break;
     case tok::r_brace:
-      if (HasOpeningBrace) {
-        return false;
-      } else {
-        Diag.Report(FormatTok.Tok.getLocation(),
-                    Diag.getCustomDiagID(clang::DiagnosticsEngine::Error,
-                                         "unexpected '}'"));
-        Error = true;
-        nextToken();
-        addUnwrappedLine();
-      }
+      if (HasOpeningBrace)
+        return;
+      Diag.Report(FormatTok.Tok.getLocation(),
+                  Diag.getCustomDiagID(clang::DiagnosticsEngine::Error,
+                                       "unexpected '}'"));
+      StructuralError = true;
+      nextToken();
+      addUnwrappedLine();
       break;
     default:
       parseStructuralElement();
       break;
     }
   } while (!eof());
-  return Error;
 }
 
-bool UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
+void UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
                                      unsigned AddLevels) {
   assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected");
   nextToken();
@@ -202,17 +201,17 @@ bool UnwrappedLineParser::parseBlock(boo
 
   if (!FormatTok.Tok.is(tok::r_brace)) {
     Line->Level -= AddLevels;
-    return true;
+    StructuralError = true;
+    return;
   }
 
   nextToken(); // Munch the closing brace.
   Line->Level -= AddLevels;
-  return false;
 }
 
 void UnwrappedLineParser::parsePPDirective() {
   assert(FormatTok.Tok.is(tok::hash) && "'#' expected");
-  ScopedMacroState MacroState(*Line, Tokens, FormatTok);
+  ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError);
   nextToken();
 
   if (FormatTok.Tok.getIdentifierInfo() == NULL) {

Modified: cfe/trunk/lib/Format/UnwrappedLineParser.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/UnwrappedLineParser.h?rev=179379&r1=179378&r2=179379&view=diff
==============================================================================
--- cfe/trunk/lib/Format/UnwrappedLineParser.h (original)
+++ cfe/trunk/lib/Format/UnwrappedLineParser.h Fri Apr 12 09:13:36 2013
@@ -125,9 +125,9 @@ public:
   bool parse();
 
 private:
-  bool parseFile();
-  bool parseLevel(bool HasOpeningBrace);
-  bool parseBlock(bool MustBeDeclaration, unsigned AddLevels = 1);
+  void parseFile();
+  void parseLevel(bool HasOpeningBrace);
+  void parseBlock(bool MustBeDeclaration, unsigned AddLevels = 1);
   void parsePPDirective();
   void parsePPDefine();
   void parsePPUnknown();
@@ -187,6 +187,10 @@ private:
   // whether we are in a compound statement or not.
   std::vector<bool> DeclarationScopeStack;
 
+  // Will be true if we encounter an error that leads to possibily incorrect
+  // indentation levels.
+  bool StructuralError;
+
   clang::DiagnosticsEngine &Diag;
   const FormatStyle &Style;
   FormatTokenSource *Tokens;

Modified: cfe/trunk/unittests/Format/FormatTest.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=179379&r1=179378&r2=179379&view=diff
==============================================================================
--- cfe/trunk/unittests/Format/FormatTest.cpp (original)
+++ cfe/trunk/unittests/Format/FormatTest.cpp Fri Apr 12 09:13:36 2013
@@ -430,6 +430,7 @@ TEST_F(FormatTest, FormatsSwitchStatemen
   verifyFormat("switch (x) {\n"
                "default: {\n"
                "  // Do nothing.\n"
+               "}\n"
                "}");
   verifyFormat("switch (x) {\n"
                "// comment\n"
@@ -3563,8 +3564,18 @@ TEST_F(FormatTest, ObjCLiterals) {
   verifyFormat("@{ @\"one\" : @1 }");
   verifyFormat("return @{ @\"one\" : @1 };");
   verifyFormat("@{ @\"one\" : @1, }");
-  verifyFormat("@{ @\"one\" : @{ @2 : @1 } }");
-  verifyFormat("@{ @\"one\" : @{ @2 : @1 }, }");
+
+  // FIXME: Breaking in cases where we think there's a structural error
+  // showed that we're incorrectly parsing this code. We need to fix the
+  // parsing here.
+  verifyFormat("@{ @\"one\" : @\n"
+               "{ @2 : @1 }\n"
+               "}");
+  verifyFormat("@{ @\"one\" : @\n"
+               "{ @2 : @1 }\n"
+               ",\n"
+               "}");
+
   verifyFormat("@{ 1 > 2 ? @\"one\" : @\"two\" : 1 > 2 ? @1 : @2 }");
   verifyFormat("[self setDict:@{}");
   verifyFormat("[self setDict:@{ @1 : @2 }");





More information about the cfe-commits mailing list