[cfe-commits] r38833 - in /cfe/cfe/trunk: Parse/ParseDecl.cpp Parse/Parser.cpp include/clang/Basic/DiagnosticKinds.def include/clang/Parse/Parser.h

Wed Jul 11 09:25:02 PDT 2007

Author: sabre
Date: Wed Jul 11 11:25:01 2007
New Revision: 38833

URL: http://llvm.org/viewvc/llvm-project?rev=38833&view=rev
Log:
Start implementing error recovery, this implements test/Parser/recovery-1.c

Modified:
    cfe/cfe/trunk/Parse/ParseDecl.cpp
    cfe/cfe/trunk/Parse/Parser.cpp
    cfe/cfe/trunk/include/clang/Basic/DiagnosticKinds.def
    cfe/cfe/trunk/include/clang/Parse/Parser.h

Modified: cfe/cfe/trunk/Parse/ParseDecl.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Parse/ParseDecl.cpp?rev=38833&r1=38832&r2=38833&view=diff

==============================================================================

--- cfe/cfe/trunk/Parse/ParseDecl.cpp (original)
+++ cfe/cfe/trunk/Parse/ParseDecl.cpp Wed Jul 11 11:25:01 2007
@@ -332,22 +332,14 @@
     // direct-declarator: '(' attributes declarator ')'   [TODO]
     // Example: 'char (*X)'   or 'int (*XX)(void)'
     ParseParenDeclarator(D);
-  } else if (Tok.getKind() == tok::l_square &&
-             D.mayOmitIdentifier()) {
-    // direct-abstract-declarator[opt] '[' assignment-expression[opt] ']'
-    // direct-abstract-declarator[opt] '[' '*' ']'
-    
-    // direct-abstract-declarator was not specified.  Remember that this is the
-    // place where the identifier would have been.
-    D.SetIdentifier(0, Tok.getLocation());
-    // Don't consume the '[', handle it below.
   } else if (D.mayOmitIdentifier()) {
     // This could be something simple like "int" (in which case the declarator
     // portion is empty), if an abstract-declarator is allowed.
     D.SetIdentifier(0, Tok.getLocation());
   } else {
-    // expected identifier or '(' or '['.
-    assert(0 && "ERROR: should recover!");
+    // Expected identifier or '('.
+    Diag(Tok, diag::err_expected_ident_lparen);
+    D.SetIdentifier(0, Tok.getLocation());
   }
   
   assert(D.isPastIdentifier() &&
@@ -389,6 +381,7 @@
 ///         identifier-list ',' identifier
 ///
 void Parser::ParseParenDeclarator(Declarator &D) {
+  SourceLocation LParenLoc = Tok.getLocation();
   ConsumeParen();
   
   // If we haven't past the identifier yet (or where the identifier would be
@@ -418,10 +411,14 @@
     // direct-declarator: '(' attributes declarator ')'   [TODO]
     if (isGrouping) {
       ParseDeclarator(D);
-      // expected ')': skip until we find ')'.
-     if (Tok.getKind() != tok::r_paren)
-        assert(0 && "Recover!");
-      ConsumeParen();
+      if (Tok.getKind() == tok::r_paren) {
+        ConsumeParen();
+      } else {
+        // expected ')': skip until we find ')'.
+        Diag(Tok, diag::err_expected_rparen);
+        Diag(LParenLoc, diag::err_matching);
+        SkipUntil(tok::r_paren);
+      }
       return;
     }
     
@@ -520,6 +517,7 @@
     HasPrototype = true;
   }
   
+  // FIXME: pop the scope.  
   
   // expected ')': skip until we find ')'.
   if (Tok.getKind() != tok::r_paren)
@@ -535,7 +533,7 @@
 /// [C99]   direct-declarator '[' type-qual-list[opt] '*' ']'
 void Parser::ParseBracketDeclarator(Declarator &D) {
   SourceLocation StartLoc = Tok.getLocation();
-  ConsumeSquare();
+  ConsumeBracket();
   
   // If valid, this location is the position where we read the 'static' keyword.
   SourceLocation StaticLoc;
@@ -570,15 +568,13 @@
         Diag(StaticLoc, diag::err_unspecified_vla_size_with_static);
       StaticLoc = SourceLocation();  // Drop the static.
       isStar = true;
-      ConsumeToken();
     } else {
       // Otherwise, the * must have been some expression (such as '*ptr') that
       // started an assign-expr.  We already consumed the token, but now we need
       // to reparse it.
-      // FIXME: There are two options here: first, we could push 'StarTok' and
-      // Tok back into the preprocessor as a macro expansion context, so they
-      // will be read again.  Second, we could parse the rest of the assign-expr
-      // then apply the dereference.
+      // FIXME: We must push 'StarTok' and Tok back into the preprocessor as a
+      // macro expansion context, so they will be read again. It is basically
+      // impossible to refudge the * in otherwise, due to cases like X[*p + 4].
       assert(0 && "FIXME: int X[*p] unimplemented");
     }
   }
@@ -588,7 +584,7 @@
     assert(0 && "expr parsing not impl yet!");
   }
   
-  ConsumeSquare();
+  ConsumeBracket();
   
   // If C99 isn't enabled, emit an ext-warn if the arg list wasn't empty and if
   // it was not a constant expression.

Modified: cfe/cfe/trunk/Parse/Parser.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/Parse/Parser.cpp?rev=38833&r1=38832&r2=38833&view=diff

==============================================================================
--- cfe/cfe/trunk/Parse/Parser.cpp (original)
+++ cfe/cfe/trunk/Parse/Parser.cpp Wed Jul 11 11:25:01 2007
@@ -22,6 +22,8 @@
   // Create the global scope, install it as the current scope.
   CurScope = new Scope(0);
   Tok.SetKind(tok::eof);
+  
+  ParenCount = BracketCount = BraceCount = 0;
 }
 
 Parser::~Parser() {
@@ -35,6 +37,92 @@
 }
 
 //===----------------------------------------------------------------------===//
+// Error recovery.
+//===----------------------------------------------------------------------===//
+
+/// SkipUntil - Read tokens until we get to the specified token, then consume
+/// it (unless DontConsume is false).  Because we cannot guarantee that the
+/// token will ever occur, this skips to the next token, or to some likely
+/// good stopping point.  If StopAtSemi is true, skipping will stop at a ';'
+/// character.
+/// 
+/// If SkipUntil finds the specified token, it returns true, otherwise it
+/// returns false.  
+bool Parser::SkipUntil(tok::TokenKind T, bool StopAtSemi, bool DontConsume) {
+  while (1) {
+    // If we found the token, stop and return true.
+    if (Tok.getKind() == T) {
+      if (DontConsume) {
+        // Noop, don't consume the token.
+      } else if (isTokenParen()) {
+        ConsumeParen();
+      } else if (isTokenBracket()) {
+        ConsumeBracket();
+      } else if (isTokenBrace()) {
+        ConsumeBrace();
+      } else if (T == tok::string_literal) {
+        ConsumeStringToken();
+      } else {
+        ConsumeToken();
+      }
+      return true;
+    }
+    
+    switch (Tok.getKind()) {
+    case tok::eof:
+      // Ran out of tokens.
+      return false;
+      
+    case tok::l_paren:
+      // Recursively skip properly-nested parens.
+      ConsumeParen();
+      SkipUntil(tok::r_paren);
+      break;
+    case tok::l_square:
+      // Recursively skip properly-nested square brackets.
+      ConsumeBracket();
+      SkipUntil(tok::r_square);
+      break;
+    case tok::l_brace:
+      // Recursively skip properly-nested braces.
+      ConsumeBrace();
+      SkipUntil(tok::r_brace);
+      break;
+      
+    // Okay, we found a ']' or '}' or ')', which we think should be balanced.
+    // Since the user wasn't looking for this token (if they were, it would
+    // already be handled), this isn't balanced.  If there is a LHS token at a
+    // higher level, we will assume that this matches the unbalanced token
+    // and return it.  Otherwise, this is a spurious RHS token, which we skip.
+    case tok::r_paren:
+      if (ParenCount) return false;  // Matches something.
+      ConsumeParen();
+      break;
+    case tok::r_square:
+      if (BracketCount) return false;  // Matches something.
+      ConsumeBracket();
+      break;
+    case tok::r_brace:
+      if (BraceCount) return false;  // Matches something.
+      ConsumeBrace();
+      break;
+      
+    case tok::string_literal:
+      ConsumeStringToken();
+      break;
+    case tok::semi:
+      if (StopAtSemi)
+        return false;
+      // FALL THROUGH.
+    default:
+      // Skip this token.
+      ConsumeToken();
+      break;
+    }
+  }  
+}
+
+//===----------------------------------------------------------------------===//
 // C99 6.9: External Definitions.
 //===----------------------------------------------------------------------===//
 
@@ -140,9 +228,8 @@
     ConsumeToken();
   } else {
     Diag(Tok, diag::err_parse_error);
-    // FIXME: skip to end of block or statement
-    while (Tok.getKind() != tok::semi && Tok.getKind() != tok::eof)
-      ConsumeToken();
+    // Skip to end of block or statement
+    SkipUntil(tok::r_brace, true);
     if (Tok.getKind() == tok::semi)
       ConsumeToken();
   }

Modified: cfe/cfe/trunk/include/clang/Basic/DiagnosticKinds.def
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Basic/DiagnosticKinds.def?rev=38833&r1=38832&r2=38833&view=diff

==============================================================================
--- cfe/cfe/trunk/include/clang/Basic/DiagnosticKinds.def (original)
+++ cfe/cfe/trunk/include/clang/Basic/DiagnosticKinds.def Wed Jul 11 11:25:01 2007
@@ -255,8 +255,22 @@
 DIAG(ext_c99_array_usage, EXTENSION,
      "use of c99-specific array features")
 
+// Generic errors.
 DIAG(err_parse_error, ERROR,
      "parse error")
+DIAG(err_expected_ident_lparen, ERROR,
+     "expected identifier or '('")
+DIAG(err_expected_rparen, ERROR,
+     "expected ')'")
+
+/// err_matching - this is used as a continuation of a previous error, e.g. to 
+/// specify the '(' when we expected a ')'.  This should probably be some
+/// special sort of diagnostic kind to indicate that it is the second half of
+/// the previous diagnostic.
+DIAG(err_matching, ERROR,
+     "to match")
+
+     
 DIAG(err_invalid_decl_spec_combination, ERROR,
      "cannot combine with previous '%s' declaration specifier")
 DIAG(err_invalid_sign_spec, ERROR,

Modified: cfe/cfe/trunk/include/clang/Parse/Parser.h
URL: http://llvm.org/viewvc/llvm-project/cfe/cfe/trunk/include/clang/Parse/Parser.h?rev=38833&r1=38832&r2=38833&view=diff

==============================================================================
--- cfe/cfe/trunk/include/clang/Parse/Parser.h (original)
+++ cfe/cfe/trunk/include/clang/Parse/Parser.h Wed Jul 11 11:25:01 2007
@@ -33,6 +33,7 @@
   ParserActions &Actions;
   Diagnostic &Diags;
   Scope *CurScope;
+  unsigned short ParenCount, BracketCount, BraceCount;
   
   /// Tok - The current token we are peeking head.  All parsing methods assume
   /// that this is valid.
@@ -61,37 +62,95 @@
     Diag(Tok, DiagID, Msg);
   }
   
-  /// ConsumeToken - Consume the current 'peek token', lexing a new one and
-  /// returning the token kind.  This does not work will all kinds of tokens,
-  /// strings and parens must be consumed with custom methods below.
+  /// isTokenParen - Return true if the cur token is '(' or ')'.
+  bool isTokenParen() const {
+    return Tok.getKind() == tok::l_paren || Tok.getKind() == tok::r_paren;
+  }
+  /// isTokenBracket - Return true if the cur token is '[' or ']'.
+  bool isTokenBracket() const {
+    return Tok.getKind() == tok::l_square || Tok.getKind() == tok::r_square;
+  }
+  /// isTokenBrace - Return true if the cur token is '{' or '}'.
+  bool isTokenBrace() const {
+    return Tok.getKind() == tok::l_brace || Tok.getKind() == tok::r_brace;
+  }
+  
+  /// ConsumeToken - Consume the current 'peek token' and lex the next one.
+  /// This does not work will all kinds of tokens: strings and specific other
+  /// tokens must be consumed with custom methods below.
   void ConsumeToken() {
+    // Note: update Parser::SkipUntil if any other special tokens are added.
     assert(Tok.getKind() != tok::string_literal &&
-           Tok.getKind() != tok::l_paren &&
-           Tok.getKind() != tok::r_paren &&
-           Tok.getKind() != tok::l_square &&
-           Tok.getKind() != tok::r_square &&
+           !isTokenParen() && !isTokenBracket() && !isTokenBrace() &&
            "Should consume special tokens with Consume*Token");
     PP.Lex(Tok);
   }
   
-  /// ConsumeParen -  This consume method keeps the paren count up-to-date.
+  /// ConsumeParen - This consume method keeps the paren count up-to-date.
   ///
   void ConsumeParen() {
-    assert((Tok.getKind() == tok::l_paren ||
-            Tok.getKind() == tok::r_paren) && "wrong consume method");
+    assert(isTokenParen() && "wrong consume method");
+    if (Tok.getKind() == tok::l_paren)
+      ++ParenCount;
+    else if (ParenCount)
+      --ParenCount;       // Don't let unbalanced )'s drive the count negative.
     PP.Lex(Tok);
   }
-
-  /// ConsumeSquare -  This consume method keeps the bracket count up-to-date.
+  
+  /// ConsumeBracket - This consume method keeps the bracket count up-to-date.
   ///
-  void ConsumeSquare() {
-    assert((Tok.getKind() == tok::l_square ||
-            Tok.getKind() == tok::r_square) && "wrong consume method");
+  void ConsumeBracket() {
+    assert(isTokenBracket() && "wrong consume method");
+    if (Tok.getKind() == tok::l_square)
+      ++BracketCount;
+    else if (BracketCount)
+      --BracketCount;     // Don't let unbalanced ]'s drive the count negative.
+    
     PP.Lex(Tok);
   }
+      
+  /// ConsumeBrace - This consume method keeps the brace count up-to-date.
+  ///
+  void ConsumeBrace() {
+    assert(isTokenBrace() && "wrong consume method");
+    if (Tok.getKind() == tok::l_brace)
+      ++BraceCount;
+    else if (BraceCount)
+      --BraceCount;     // Don't let unbalanced }'s drive the count negative.
+    
+    PP.Lex(Tok);
+  }
+  
+  
+  /// ConsumeStringToken - Consume the current 'peek token', lexing a new one
+  /// and returning the token kind.  This method is specific to strings, as it
+  /// handles string literal concatenation, as per C99 5.1.1.2, translation
+  /// phase #6.
+  void ConsumeStringToken() {
+    assert(Tok.getKind() != tok::string_literal &&
+           "Should consume special tokens with Consume*Token");
+    // Due to string literal concatenation, all consequtive string literals are
+    // a single token.
+    while (Tok.getKind() == tok::string_literal)
+      PP.Lex(Tok);
+  }
   
 private:
   //===--------------------------------------------------------------------===//
+  // Error recovery.
+    
+  /// SkipUntil - Read tokens until we get to the specified token, then consume
+  /// it (unless DontConsume is false).  Because we cannot guarantee that the
+  /// token will ever occur, this skips to the next token, or to some likely
+  /// good stopping point.  If StopAtSemi is true, skipping will stop at a ';'
+  /// character.
+  /// 
+  /// If SkipUntil finds the specified token, it returns true, otherwise it
+  /// returns false.  
+  bool SkipUntil(tok::TokenKind T, bool StopAtSemi = false,
+                 bool DontConsume = false);
+    
+  //===--------------------------------------------------------------------===//
   // C99 6.9: External Definitions.
   void ParseExternalDeclaration();
   void ParseDeclarationOrFunctionDefinition();