[cfe-commits] r66011 - in /cfe/trunk: Driver/PrintParserCallbacks.cpp include/clang/AST/Stmt.h include/clang/Parse/Action.h lib/AST/Stmt.cpp lib/AST/StmtSerialization.cpp lib/Parse/ParseStmt.cpp lib/Sema/Sema.h lib/Sema/SemaStmt.cpp

Chris Lattner sabre at nondot.org
Tue Mar 3 20:23:07 PST 2009


Author: lattner
Date: Tue Mar  3 22:23:07 2009
New Revision: 66011

URL: http://llvm.org/viewvc/llvm-project?rev=66011&view=rev
Log:
Change Parser::ParseCaseStatement to use an iterative approach to parsing
multiple sequential case statements instead of doing it with recursion.  This
fixes a problem where we run out of stack space parsing 100K directly nested
cases.

There are a couple other problems that prevent this from being useful in 
practice (right now the example only parses correctly with -disable-free and
doesn't work with -emit-llvm), but this is a start.

I'm not including a testcase because it is large and uninteresting for 
regtesting.

Sebastian, I would appreciate it if you could scrutinize the smart pointer 
gymnastics I do.

Modified:
    cfe/trunk/Driver/PrintParserCallbacks.cpp
    cfe/trunk/include/clang/AST/Stmt.h
    cfe/trunk/include/clang/Parse/Action.h
    cfe/trunk/lib/AST/Stmt.cpp
    cfe/trunk/lib/AST/StmtSerialization.cpp
    cfe/trunk/lib/Parse/ParseStmt.cpp
    cfe/trunk/lib/Sema/Sema.h
    cfe/trunk/lib/Sema/SemaStmt.cpp

Modified: cfe/trunk/Driver/PrintParserCallbacks.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/Driver/PrintParserCallbacks.cpp?rev=66011&r1=66010&r2=66011&view=diff

==============================================================================
--- cfe/trunk/Driver/PrintParserCallbacks.cpp (original)
+++ cfe/trunk/Driver/PrintParserCallbacks.cpp Tue Mar  3 22:23:07 2009
@@ -272,8 +272,7 @@
                                            ExprArg LHSVal,
                                            SourceLocation DotDotDotLoc,
                                            ExprArg RHSVal,
-                                           SourceLocation ColonLoc,
-                                           StmtArg SubStmt) {
+                                           SourceLocation ColonLoc) {
       llvm::cout << __FUNCTION__ << "\n";
       return StmtEmpty();
     }

Modified: cfe/trunk/include/clang/AST/Stmt.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/Stmt.h?rev=66011&r1=66010&r2=66011&view=diff

==============================================================================
--- cfe/trunk/include/clang/AST/Stmt.h (original)
+++ cfe/trunk/include/clang/AST/Stmt.h Tue Mar  3 22:23:07 2009
@@ -147,7 +147,7 @@
   }
   virtual ~Stmt() {}
   
-  virtual void Destroy(ASTContext& Ctx);
+  virtual void Destroy(ASTContext &Ctx);
 
   StmtClass getStmtClass() const { return sClass; }
   const char *getStmtClassName() const;
@@ -440,9 +440,9 @@
                              // GNU "case 1 ... 4" extension
   SourceLocation CaseLoc;
 public:
-  CaseStmt(Expr *lhs, Expr *rhs, Stmt *substmt, SourceLocation caseLoc) 
+  CaseStmt(Expr *lhs, Expr *rhs, SourceLocation caseLoc) 
     : SwitchCase(CaseStmtClass) {
-    SubExprs[SUBSTMT] = substmt;
+    SubExprs[SUBSTMT] = 0;
     SubExprs[LHS] = reinterpret_cast<Stmt*>(lhs);
     SubExprs[RHS] = reinterpret_cast<Stmt*>(rhs);
     CaseLoc = caseLoc;

Modified: cfe/trunk/include/clang/Parse/Action.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Parse/Action.h?rev=66011&r1=66010&r2=66011&view=diff

==============================================================================
--- cfe/trunk/include/clang/Parse/Action.h (original)
+++ cfe/trunk/include/clang/Parse/Action.h Tue Mar  3 22:23:07 2009
@@ -406,12 +406,18 @@
   }
 
   /// ActOnCaseStmt - Note that this handles the GNU 'case 1 ... 4' extension,
-  /// which can specify an RHS value.
+  /// which can specify an RHS value.  The sub-statement of the case is
+  /// specified in a separate action.
   virtual OwningStmtResult ActOnCaseStmt(SourceLocation CaseLoc, ExprArg LHSVal,
-                                    SourceLocation DotDotDotLoc, ExprArg RHSVal,
-                                    SourceLocation ColonLoc, StmtArg SubStmt) {
+                                         SourceLocation DotDotDotLoc,
+                                         ExprArg RHSVal,
+                                         SourceLocation ColonLoc) {
     return StmtEmpty();
   }
+  
+  /// ActOnCaseStmtBody - This installs a statement as the body of a case.
+  virtual void ActOnCaseStmtBody(StmtTy *CaseStmt, StmtArg SubStmt) {}
+  
   virtual OwningStmtResult ActOnDefaultStmt(SourceLocation DefaultLoc,
                                             SourceLocation ColonLoc,
                                             StmtArg SubStmt, Scope *CurScope){

Modified: cfe/trunk/lib/AST/Stmt.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/Stmt.cpp?rev=66011&r1=66010&r2=66011&view=diff

==============================================================================
--- cfe/trunk/lib/AST/Stmt.cpp (original)
+++ cfe/trunk/lib/AST/Stmt.cpp Tue Mar  3 22:23:07 2009
@@ -43,13 +43,12 @@
   return getStmtInfoTableEntry(sClass).Name;
 }
 
-void Stmt::DestroyChildren(ASTContext& C) {
-  for (child_iterator I = child_begin(), E = child_end(); I !=E; ) {
+void Stmt::DestroyChildren(ASTContext &C) {
+  for (child_iterator I = child_begin(), E = child_end(); I !=E; )
     if (Stmt* Child = *I++) Child->Destroy(C);
-  }
 }
 
-void Stmt::Destroy(ASTContext& C) {
+void Stmt::Destroy(ASTContext &C) {
   DestroyChildren(C);
   // FIXME: Eventually all Stmts should be allocated with the allocator
   //  in ASTContext, just like with Decls.
@@ -57,7 +56,7 @@
   C.Deallocate((void *)this);
 }
 
-void DeclStmt::Destroy(ASTContext& C) {
+void DeclStmt::Destroy(ASTContext &C) {
   this->~DeclStmt();
   C.Deallocate((void *)this);
 }

Modified: cfe/trunk/lib/AST/StmtSerialization.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/StmtSerialization.cpp?rev=66011&r1=66010&r2=66011&view=diff

==============================================================================
--- cfe/trunk/lib/AST/StmtSerialization.cpp (original)
+++ cfe/trunk/lib/AST/StmtSerialization.cpp Tue Mar  3 22:23:07 2009
@@ -414,7 +414,7 @@
 CaseStmt* CaseStmt::CreateImpl(Deserializer& D, ASTContext& C) {
   SourceLocation CaseLoc = SourceLocation::ReadVal(D);
   CaseStmt* stmt = new (C, llvm::alignof<CaseStmt>())
-                    CaseStmt(NULL,NULL,NULL,CaseLoc);
+                      CaseStmt(NULL,NULL,CaseLoc);
   D.ReadPtr(stmt->NextSwitchCase);
   D.BatchReadOwnedPtrs((unsigned) END_EXPR, &stmt->SubExprs[0], C);
   return stmt;

Modified: cfe/trunk/lib/Parse/ParseStmt.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Parse/ParseStmt.cpp?rev=66011&r1=66010&r2=66011&view=diff

==============================================================================
--- cfe/trunk/lib/Parse/ParseStmt.cpp (original)
+++ cfe/trunk/lib/Parse/ParseStmt.cpp Tue Mar  3 22:23:07 2009
@@ -225,54 +225,112 @@
 ///         'case' constant-expression ':' statement
 /// [GNU]   'case' constant-expression '...' constant-expression ':' statement
 ///
-/// Note that this does not parse the 'statement' at the end.
-///
 Parser::OwningStmtResult Parser::ParseCaseStatement() {
   assert(Tok.is(tok::kw_case) && "Not a case stmt!");
-  SourceLocation CaseLoc = ConsumeToken();  // eat the 'case'.
-
-  OwningExprResult LHS(ParseConstantExpression());
-  if (LHS.isInvalid()) {
-    SkipUntil(tok::colon);
-    return StmtError();
-  }
-
-  // GNU case range extension.
-  SourceLocation DotDotDotLoc;
-  OwningExprResult RHS(Actions);
-  if (Tok.is(tok::ellipsis)) {
-    Diag(Tok, diag::ext_gnu_case_range);
-    DotDotDotLoc = ConsumeToken();
-
-    RHS = ParseConstantExpression();
-    if (RHS.isInvalid()) {
+  
+  // It is very very common for code to contain many case statements recursively
+  // nested, as in (but usually without indentation):
+  //  case 1:
+  //    case 2:
+  //      case 3:
+  //         case 4:
+  //           case 5: etc.
+  //
+  // Parsing this naively works, but is both inefficient and can cause us to run
+  // out of stack space in our recursive descent parser.  As a special case,
+  // flatten this recursion into an interative loop.  This is complex and gross,
+  // but all the grossness is constrained to ParseCaseStatement (and some
+  // wierdness in the actions), so this is just local grossness :).
+  
+  // TopLevelCase - This is the highest level we have parsed.  'case 1' in the
+  // example above.
+  OwningStmtResult TopLevelCase(Actions, true);
+  
+  // DeepestParsedCaseStmt - This is the deepest statement we have parsed, which
+  // gets updated each time a new case is parsed, and whose body is unset so
+  // far.  When parsing 'case 4', this is the 'case 3' node.
+  StmtTy *DeepestParsedCaseStmt = 0;
+  
+  // While we have case statements, eat and stack them.
+  do {
+    SourceLocation CaseLoc = ConsumeToken();  // eat the 'case'.
+    
+    OwningExprResult LHS(ParseConstantExpression());
+    if (LHS.isInvalid()) {
       SkipUntil(tok::colon);
       return StmtError();
     }
-  }
 
-  if (Tok.isNot(tok::colon)) {
-    Diag(Tok, diag::err_expected_colon_after) << "'case'";
-    SkipUntil(tok::colon);
-    return StmtError();
-  }
+    // GNU case range extension.
+    SourceLocation DotDotDotLoc;
+    OwningExprResult RHS(Actions);
+    if (Tok.is(tok::ellipsis)) {
+      Diag(Tok, diag::ext_gnu_case_range);
+      DotDotDotLoc = ConsumeToken();
+
+      RHS = ParseConstantExpression();
+      if (RHS.isInvalid()) {
+        SkipUntil(tok::colon);
+        return StmtError();
+      }
+    }
 
-  SourceLocation ColonLoc = ConsumeToken();
+    if (Tok.isNot(tok::colon)) {
+      Diag(Tok, diag::err_expected_colon_after) << "'case'";
+      SkipUntil(tok::colon);
+      return StmtError();
+    }
 
-  // Diagnose the common error "switch (X) { case 4: }", which is not valid.
-  if (Tok.is(tok::r_brace)) {
+    SourceLocation ColonLoc = ConsumeToken();
+    
+    OwningStmtResult Case =
+      Actions.ActOnCaseStmt(CaseLoc, move(LHS), DotDotDotLoc,
+                            move(RHS), ColonLoc);
+    
+    // If we had a sema error parsing this case, then just ignore it and
+    // continue parsing the sub-stmt.
+    if (Case.isInvalid()) {
+      if (TopLevelCase.isInvalid())  // No parsed case stmts.
+        return ParseStatement();
+      // Otherwise, just don't add it as a nested case.
+    } else {
+      // If this is the first case statement we parsed, it becomes TopLevelCase.
+      // Otherwise we link it into the current chain.
+      StmtTy *NextDeepest = Case.get();
+      if (TopLevelCase.isInvalid())
+        TopLevelCase = move(Case);
+      else
+        Actions.ActOnCaseStmtBody(DeepestParsedCaseStmt, move(Case));
+      DeepestParsedCaseStmt = NextDeepest;
+    }
+    
+    // Handle all case statements.
+  } while (Tok.is(tok::kw_case));
+    
+  assert(!TopLevelCase.isInvalid() && "Should have parsed at least one case!");
+  
+  // If we found a non-case statement, start by parsing it.
+  OwningStmtResult SubStmt(Actions);
+  
+  if (Tok.isNot(tok::r_brace)) {
+    SubStmt = ParseStatement();
+  } else {
+    // Nicely diagnose the common error "switch (X) { case 4: }", which is
+    // not valid.
+    // FIXME: add insertion hint.
     Diag(Tok, diag::err_label_end_of_compound_statement);
-    return StmtError();
+    SubStmt = true;
   }
-
-  OwningStmtResult SubStmt(ParseStatement());
-
-  // Broken substmt shouldn't prevent the case from being added to the AST.
+  
+  // Broken sub-stmt shouldn't prevent forming the case statement properly.
   if (SubStmt.isInvalid())
-    SubStmt = Actions.ActOnNullStmt(ColonLoc);
+    SubStmt = Actions.ActOnNullStmt(SourceLocation());
+  
+  // Install the body into the most deeply-nested case.
+  Actions.ActOnCaseStmtBody(DeepestParsedCaseStmt, move(SubStmt));
 
-  return Actions.ActOnCaseStmt(CaseLoc, move(LHS), DotDotDotLoc,
-                               move(RHS), ColonLoc, move(SubStmt));
+  // Return the top level parsed statement tree.
+  return OwningStmtResult(Actions, TopLevelCase.release());
 }
 
 /// ParseDefaultStatement

Modified: cfe/trunk/lib/Sema/Sema.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/Sema.h?rev=66011&r1=66010&r2=66011&view=diff

==============================================================================
--- cfe/trunk/lib/Sema/Sema.h (original)
+++ cfe/trunk/lib/Sema/Sema.h Tue Mar  3 22:23:07 2009
@@ -995,7 +995,9 @@
                                          SourceLocation EndLoc);
   virtual OwningStmtResult ActOnCaseStmt(SourceLocation CaseLoc, ExprArg LHSVal,
                                     SourceLocation DotDotDotLoc, ExprArg RHSVal,
-                                    SourceLocation ColonLoc, StmtArg SubStmt);
+                                    SourceLocation ColonLoc);
+  virtual void ActOnCaseStmtBody(StmtTy *CaseStmt, StmtArg SubStmt);
+  
   virtual OwningStmtResult ActOnDefaultStmt(SourceLocation DefaultLoc,
                                             SourceLocation ColonLoc,
                                             StmtArg SubStmt, Scope *CurScope);

Modified: cfe/trunk/lib/Sema/SemaStmt.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaStmt.cpp?rev=66011&r1=66010&r2=66011&view=diff

==============================================================================
--- cfe/trunk/lib/Sema/SemaStmt.cpp (original)
+++ cfe/trunk/lib/Sema/SemaStmt.cpp Tue Mar  3 22:23:07 2009
@@ -61,10 +61,9 @@
     DeclGroupRef DG(*decls.begin());                      
     return Owned(new (Context) DeclStmt(DG, StartLoc, EndLoc));
   }
-  else {
-    DeclGroupRef DG(DeclGroup::Create(Context, decls.size(), &decls[0]));
-    return Owned(new (Context) DeclStmt(DG, StartLoc, EndLoc));
-  }
+
+  DeclGroupRef DG(DeclGroup::Create(Context, decls.size(), &decls[0]));
+  return Owned(new (Context) DeclStmt(DG, StartLoc, EndLoc));
 }
 
 Action::OwningStmtResult
@@ -114,16 +113,14 @@
 Action::OwningStmtResult
 Sema::ActOnCaseStmt(SourceLocation CaseLoc, ExprArg lhsval,
                     SourceLocation DotDotDotLoc, ExprArg rhsval,
-                    SourceLocation ColonLoc, StmtArg subStmt) {
-  Stmt *SubStmt = static_cast<Stmt*>(subStmt.release());
+                    SourceLocation ColonLoc) {
   assert((lhsval.get() != 0) && "missing expression in case statement");
 
   // C99 6.8.4.2p3: The expression shall be an integer constant.
   // However, GCC allows any evaluatable integer expression. 
-
   Expr *LHSVal = static_cast<Expr*>(lhsval.get());
   if (VerifyIntegerConstantExpression(LHSVal))
-    return Owned(SubStmt);
+    return StmtError();
 
   // GCC extension: The expression shall be an integer constant.
 
@@ -135,17 +132,24 @@
 
   if (SwitchStack.empty()) {
     Diag(CaseLoc, diag::err_case_not_in_switch);
-    return Owned(SubStmt);
+    return StmtError();
   }
 
   // Only now release the smart pointers.
   lhsval.release();
   rhsval.release();
-  CaseStmt *CS = new (Context) CaseStmt(LHSVal, RHSVal, SubStmt, CaseLoc);
+  CaseStmt *CS = new (Context) CaseStmt(LHSVal, RHSVal, CaseLoc);
   SwitchStack.back()->addSwitchCase(CS);
   return Owned(CS);
 }
 
+/// ActOnCaseStmtBody - This installs a statement as the body of a case.
+void Sema::ActOnCaseStmtBody(StmtTy *caseStmt, StmtArg subStmt) {
+  CaseStmt *CS = static_cast<CaseStmt*>(caseStmt);
+  Stmt *SubStmt = static_cast<Stmt*>(subStmt.release());
+  CS->setSubStmt(SubStmt);
+}
+
 Action::OwningStmtResult
 Sema::ActOnDefaultStmt(SourceLocation DefaultLoc, SourceLocation ColonLoc, 
                        StmtArg subStmt, Scope *CurScope) {





More information about the cfe-commits mailing list