[clang-tools-extra] bd5cc65 - [pseudo] Start rules are `_ := start-symbol EOF`, improve recovery.

Fri Aug 19 07:50:14 PDT 2022

Author: Sam McCall
Date: 2022-08-19T16:49:37+02:00
New Revision: bd5cc6575bdbe1d091f1cc70fb7f0b3c07cb894b

URL: https://github.com/llvm/llvm-project/commit/bd5cc6575bdbe1d091f1cc70fb7f0b3c07cb894b
DIFF: https://github.com/llvm/llvm-project/commit/bd5cc6575bdbe1d091f1cc70fb7f0b3c07cb894b.diff

LOG: [pseudo] Start rules are `_ := start-symbol EOF`, improve recovery.

Previously we were calling glrRecover() ad-hoc at the end of input.
Two main problems with this:
 - glrRecover() on two separate code paths is inelegant
 - We may have to recover several times in succession (e.g. to exit from
   nested scopes), so we need a loop at end-of-file
Having an actual shift action for an EOF terminal allows us to handle
both concerns in the main shift/recover/reduce loop.

This revealed a recovery design bug where recovery could enter a loop by
repeatedly choosing the same parent to identically recover from.
Addressed this by allowing each node to be used as a recovery base once.

Differential Revision: https://reviews.llvm.org/D130550

Added: 
    

Modified: 
    clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
    clang-tools-extra/pseudo/lib/Forest.cpp
    clang-tools-extra/pseudo/lib/GLR.cpp
    clang-tools-extra/pseudo/lib/cxx/cxx.bnf
    clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp
    clang-tools-extra/pseudo/test/lr-build-basic.test
    clang-tools-extra/pseudo/test/lr-build-conflicts.test
    clang-tools-extra/pseudo/unittests/ForestTest.cpp
    clang-tools-extra/pseudo/unittests/GLRTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h b/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
index 7b0a80920da98..f5257ce8d82f3 100644

--- a/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
@@ -71,6 +71,8 @@ struct GSS {
     LRTable::StateID State;
     // Used internally to track reachability during garbage collection.
     bool GCParity;
+    // Have we already used this node for error recovery? (prevents loops)
+    mutable bool Recovered = false;
     // Number of the parents of this node.
     // The parents hold previous parsed symbols, and may resume control after
     // this node is reduced.

diff  --git a/clang-tools-extra/pseudo/lib/Forest.cpp b/clang-tools-extra/pseudo/lib/Forest.cpp
index bea93809c5b38..131dd4dca8393 100644
--- a/clang-tools-extra/pseudo/lib/Forest.cpp
+++ b/clang-tools-extra/pseudo/lib/Forest.cpp
@@ -178,7 +178,7 @@ std::string ForestNode::dumpRecursive(const Grammar &G,
 
 llvm::ArrayRef<ForestNode>
 ForestArena::createTerminals(const TokenStream &Code) {
-  ForestNode *Terminals = Arena.Allocate<ForestNode>(Code.tokens().size());
+  ForestNode *Terminals = Arena.Allocate<ForestNode>(Code.tokens().size() + 1);
   size_t Index = 0;
   for (const auto &T : Code.tokens()) {
     new (&Terminals[Index])
@@ -186,6 +186,12 @@ ForestArena::createTerminals(const TokenStream &Code) {
                    /*Start=*/Index, /*TerminalData*/ 0);
     ++Index;
   }
+  // Include an `eof` terminal.
+  // This is important to drive the final shift/recover/reduce loop.
+  new (&Terminals[Index])
+      ForestNode(ForestNode::Terminal, tokenSymbol(tok::eof),
+                 /*Start=*/Index, /*TerminalData*/ 0);
+  ++Index;
   NodeCount = Index;
   return llvm::makeArrayRef(Terminals, Index);
 }

diff  --git a/clang-tools-extra/pseudo/lib/GLR.cpp b/clang-tools-extra/pseudo/lib/GLR.cpp
index 3e49a7a9c2691..8e4e6181eb2a6 100644
--- a/clang-tools-extra/pseudo/lib/GLR.cpp
+++ b/clang-tools-extra/pseudo/lib/GLR.cpp
@@ -95,17 +95,19 @@ void glrRecover(llvm::ArrayRef<const GSS::Node *> OldHeads,
   auto WalkUp = [&](const GSS::Node *N, Token::Index NextTok, auto &WalkUp) {
     if (!Seen.insert(N).second)
       return;
-    for (auto Strategy : Lang.Table.getRecovery(N->State)) {
-      Options.push_back(PlaceholderRecovery{
-          NextTok,
-          Strategy.Result,
-          Strategy.Strategy,
-          N,
-          Path,
-      });
-      LLVM_DEBUG(llvm::dbgs()
-                 << "Option: recover " << Lang.G.symbolName(Strategy.Result)
-                 << " at token " << NextTok << "\n");
+    if (!N->Recovered) { // Don't recover the same way twice!
+      for (auto Strategy : Lang.Table.getRecovery(N->State)) {
+        Options.push_back(PlaceholderRecovery{
+            NextTok,
+            Strategy.Result,
+            Strategy.Strategy,
+            N,
+            Path,
+        });
+        LLVM_DEBUG(llvm::dbgs()
+                   << "Option: recover " << Lang.G.symbolName(Strategy.Result)
+                   << " at token " << NextTok << "\n");
+      }
     }
     Path.push_back(N->Payload);
     for (const GSS::Node *Parent : N->parents())
@@ -180,6 +182,7 @@ void glrRecover(llvm::ArrayRef<const GSS::Node *> OldHeads,
   // There are various options, including simply breaking ties between options.
   // For now it's obscure enough to ignore.
   for (const PlaceholderRecovery *Option : BestOptions) {
+    Option->RecoveryNode->Recovered = true;
     const ForestNode &Placeholder =
         Params.Forest.createOpaque(Option->Symbol, RecoveryRange->Begin);
     LRTable::StateID OldState = Option->RecoveryNode->State;
@@ -587,6 +590,9 @@ class GLRReduce {
     auto NextState = Lang.Table.getGoToState(Base->State, Rule.Target);
     assert(NextState.has_value() && "goto must succeed after reduce!");
     Heads->push_back(Params.GSStack.addNode(*NextState, Parsed, {Base}));
+    LLVM_DEBUG(llvm::dbgs()
+               << "  Reduce (trivial) " << Lang.G.dumpRule(*RID) << "\n"
+               << "    --> S" << Heads->back()->State << "\n");
     return true;
   }
 };
@@ -638,7 +644,7 @@ const ForestNode &glrParse(const ParseParams &Params, SymbolID StartSymbol,
       // We discard all heads formed by reduction, and recreate them without
       // this constraint. This may duplicate some nodes, but it's rare.
       LLVM_DEBUG(llvm::dbgs() << "Shift failed, will attempt recovery. "
-                                 "Re-reducing without lookahead.");
+                                 "Re-reducing without lookahead.\n");
       Heads.resize(HeadsPartition);
       Reduce(Heads, /*allow all reductions*/ tokenSymbol(tok::unknown));
 
@@ -662,34 +668,26 @@ const ForestNode &glrParse(const ParseParams &Params, SymbolID StartSymbol,
   }
   LLVM_DEBUG(llvm::dbgs() << llvm::formatv("Reached eof\n"));
 
-  // The parse was successful if we're in state `_ := start-symbol .`
-  auto AcceptState = Lang.Table.getGoToState(StartState, StartSymbol);
-  assert(AcceptState.has_value() && "goto must succeed after start symbol!");
+  // The parse was successful if in state `_ := start-symbol EOF .`
+  // The GSS parent has `_ := start-symbol . EOF`; its payload is the parse.
+  auto AfterStart = Lang.Table.getGoToState(StartState, StartSymbol);
+  assert(AfterStart.has_value() && "goto must succeed after start symbol!");
+  auto Accept = Lang.Table.getShiftState(*AfterStart, tokenSymbol(tok::eof));
+  assert(Accept.has_value() && "shift EOF must succeed!");
   auto SearchForAccept = [&](llvm::ArrayRef<const GSS::Node *> Heads) {
     const ForestNode *Result = nullptr;
     for (const auto *Head : Heads) {
-      if (Head->State == *AcceptState) {
-        assert(Head->Payload->symbol() == StartSymbol);
+      if (Head->State == *Accept) {
+        assert(Head->Payload->symbol() == tokenSymbol(tok::eof));
         assert(Result == nullptr && "multiple results!");
-        Result = Head->Payload;
+        Result = Head->parents().front()->Payload;
+        assert(Result->symbol() == StartSymbol);
       }
     }
     return Result;
   };
   if (auto *Result = SearchForAccept(Heads))
     return *Result;
-  // Failed to parse the input, attempt to run recovery.
-  // FIXME: this awkwardly repeats the recovery in the loop, when shift fails.
-  // More elegant is to include EOF in the token stream, and make the
-  // augmented rule: `_ := translation-unit EOF`. In this way recovery at EOF
-  // would not be a special case: it show up as a failure to shift the EOF
-  // token.
-  unsigned I = Terminals.size();
-  glrRecover(Heads, I, Params, Lang, NextHeads);
-  Reduce(NextHeads, tokenSymbol(tok::eof));
-  if (auto *Result = SearchForAccept(NextHeads))
-    return *Result;
-
   // We failed to parse the input, returning an opaque forest node for recovery.
   // FIXME: as above, we can add fallback error handling so this is impossible.
   return Params.Forest.createOpaque(StartSymbol, /*Token::Index=*/0);
@@ -704,8 +702,10 @@ void glrReduce(std::vector<const GSS::Node *> &Heads, SymbolID Lookahead,
 const GSS::Node *GSS::addNode(LRTable::StateID State, const ForestNode *Symbol,
 
                               llvm::ArrayRef<const Node *> Parents) {
-  Node *Result = new (allocate(Parents.size()))
-      Node({State, GCParity, static_cast<uint16_t>(Parents.size())});
+  Node *Result = new (allocate(Parents.size())) Node();
+  Result->State = State;
+  Result->GCParity = GCParity;
+  Result->ParentCount = Parents.size();
   Alive.push_back(Result);
   ++NodesCreated;
   Result->Payload = Symbol;

diff  --git a/clang-tools-extra/pseudo/lib/cxx/cxx.bnf b/clang-tools-extra/pseudo/lib/cxx/cxx.bnf
index 7221a5086acf5..80c1b54437c07 100644
--- a/clang-tools-extra/pseudo/lib/cxx/cxx.bnf
+++ b/clang-tools-extra/pseudo/lib/cxx/cxx.bnf
@@ -29,9 +29,9 @@
 # We list important nonterminals as start symbols, rather than doing it for all
 # nonterminals by default, this reduces the number of states by 30% and LRTable
 # actions by 16%.
-_ := translation-unit
-_ := statement-seq
-_ := declaration-seq
+_ := translation-unit EOF
+_ := statement-seq EOF
+_ := declaration-seq EOF
 
 # gram.key
 #! we don't distinguish between namespaces and namespace aliases, as it's hard

diff  --git a/clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp b/clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp
index 3aa665ef04d94..82c7cc7d8b293 100644
--- a/clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp
+++ b/clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp
@@ -240,8 +240,9 @@ LRGraph LRGraph::buildLR0(const Grammar &G) {
     PendingStates.push_back(Result.first);
 
     const Rule &StartRule = G.lookupRule(RID);
-    assert(StartRule.Size == 1 &&
-           "Start rule must have exactly one symbol in its body!");
+    assert(StartRule.Size == 2 &&
+           StartRule.seq().back() == tokenSymbol(tok::eof) &&
+           "Start rule must be of the form `_ := start-symbol EOF`!");
     Builder.addStartState(StartRule.seq().front(), Result.first);
   }
 

diff  --git a/clang-tools-extra/pseudo/test/lr-build-basic.test b/clang-tools-extra/pseudo/test/lr-build-basic.test
index eba705623dac4..13036349eb8c1 100644
--- a/clang-tools-extra/pseudo/test/lr-build-basic.test
+++ b/clang-tools-extra/pseudo/test/lr-build-basic.test
@@ -1,19 +1,21 @@
-_ := expr
+_ := expr EOF
 expr := id
 id := IDENTIFIER
 
 # RUN: clang-pseudo -grammar %s -print-graph | FileCheck %s --check-prefix=GRAPH
 #      GRAPH: States:
 # GRAPH-NEXT: State 0
-# GRAPH-NEXT:     _ :=  • expr
+# GRAPH-NEXT:     _ :=  • expr EOF
 # GRAPH-NEXT:     expr :=  • id
 # GRAPH-NEXT:     id :=  • IDENTIFIER
 # GRAPH-NEXT: State 1
-# GRAPH-NEXT:     _ := expr • 
+# GRAPH-NEXT:     _ := expr • EOF
 # GRAPH-NEXT: State 2
 # GRAPH-NEXT:     expr := id • 
 # GRAPH-NEXT: State 3
 # GRAPH-NEXT:     id := IDENTIFIER • 
+# GRAPH-NEXT: State 4
+# GRAPH-NEXT:     _ := expr EOF • 
 
 # RUN: clang-pseudo -grammar %s -print-table | FileCheck %s --check-prefix=TABLE
 #      TABLE: LRTable:
@@ -22,7 +24,9 @@ id := IDENTIFIER
 # TABLE-NEXT:     expr: go to state 1
 # TABLE-NEXT:     id: go to state 2
 # TABLE-NEXT: State 1
+# TABLE-NEXT:     EOF: shift state 4
 # TABLE-NEXT: State 2
-# TABLE-NEXT:     EOF: reduce by rule 1 'expr := id'
+# TABLE-NEXT:     EOF: reduce by rule 2 'expr := id'
 # TABLE-NEXT: State 3
-# TABLE-NEXT:     EOF: reduce by rule 0 'id := IDENTIFIER'
+# TABLE-NEXT:     EOF: reduce by rule 1 'id := IDENTIFIER'
+# TABLE-NEXT: State 4

diff  --git a/clang-tools-extra/pseudo/test/lr-build-conflicts.test b/clang-tools-extra/pseudo/test/lr-build-conflicts.test
index e5149b865fd00..a66ce4d622ca1 100644
--- a/clang-tools-extra/pseudo/test/lr-build-conflicts.test
+++ b/clang-tools-extra/pseudo/test/lr-build-conflicts.test
@@ -1,31 +1,34 @@
-_ := expr
+_ := expr EOF
 expr := expr - expr  # S/R conflict at state 4 on '-' token
 expr := IDENTIFIER
 
 # RUN: clang-pseudo -grammar %s -print-graph | FileCheck %s --check-prefix=GRAPH
 #      GRAPH: States
 # GRAPH-NEXT: State 0
+# GRAPH-NEXT:     _ :=  • expr EOF
 # GRAPH-NEXT:     expr :=  • expr - expr
-# GRAPH-NEXT:     _ :=  • expr
 # GRAPH-NEXT:     expr :=  • IDENTIFIER
 # GRAPH-NEXT: State 1
-# GRAPH-NEXT:     _ := expr • 
+# GRAPH-NEXT:     _ := expr • EOF
 # GRAPH-NEXT:     expr := expr • - expr
 # GRAPH-NEXT: State 2
 # GRAPH-NEXT:     expr := IDENTIFIER • 
 # GRAPH-NEXT: State 3
+# GRAPH-NEXT:     _ := expr EOF •
+# GRAPH-NEXT: State 4
 # GRAPH-NEXT:     expr :=  • expr - expr
 # GRAPH-NEXT:     expr := expr - • expr
 # GRAPH-NEXT:     expr :=  • IDENTIFIER
-# GRAPH-NEXT: State 4
+# GRAPH-NEXT: State 5
 # GRAPH-NEXT:     expr := expr - expr • 
 # GRAPH-NEXT:     expr := expr • - expr
 # GRAPH-NEXT: 0 ->[expr] 1
 # GRAPH-NEXT: 0 ->[IDENTIFIER] 2
-# GRAPH-NEXT: 1 ->[-] 3
-# GRAPH-NEXT: 3 ->[expr] 4
-# GRAPH-NEXT: 3 ->[IDENTIFIER] 2
-# GRAPH-NEXT: 4 ->[-] 3
+# GRAPH-NEXT: 1 ->[EOF] 3
+# GRAPH-NEXT: 1 ->[-] 4
+# GRAPH-NEXT: 4 ->[expr] 5
+# GRAPH-NEXT: 4 ->[IDENTIFIER] 2
+# GRAPH-NEXT: 5 ->[-] 4
 
 # RUN: clang-pseudo -grammar %s -print-table | FileCheck %s --check-prefix=TABLE
 #      TABLE: LRTable:
@@ -33,12 +36,14 @@ expr := IDENTIFIER
 # TABLE-NEXT:     IDENTIFIER: shift state 2
 # TABLE-NEXT:     expr: go to state 1
 # TABLE-NEXT: State 1
-# TABLE-NEXT:     -: shift state 3
+# TABLE-NEXT:     EOF: shift state 3
+# TABLE-NEXT:     -: shift state 4
 # TABLE-NEXT: State 2
-# TABLE-NEXT:     EOF -: reduce by rule 1 'expr := IDENTIFIER'
+# TABLE-NEXT:     EOF -: reduce by rule 2 'expr := IDENTIFIER'
 # TABLE-NEXT: State 3
-# TABLE-NEXT:     IDENTIFIER: shift state 2
-# TABLE-NEXT:     expr: go to state 4
 # TABLE-NEXT: State 4
-# TABLE-NEXT:     -: shift state 3
-# TABLE-NEXT:     EOF -: reduce by rule 0 'expr := expr - expr'
+# TABLE-NEXT:     IDENTIFIER: shift state 2
+# TABLE-NEXT:     expr: go to state 5
+# TABLE-NEXT: State 5
+# TABLE-NEXT:     -: shift state 4
+# TABLE-NEXT:     EOF -: reduce by rule 1 'expr := expr - expr'

diff  --git a/clang-tools-extra/pseudo/unittests/ForestTest.cpp b/clang-tools-extra/pseudo/unittests/ForestTest.cpp
index b5a2b7e9a3364..36af896148209 100644
--- a/clang-tools-extra/pseudo/unittests/ForestTest.cpp
+++ b/clang-tools-extra/pseudo/unittests/ForestTest.cpp
@@ -54,7 +54,7 @@ class ForestTest : public ::testing::Test {
 
 TEST_F(ForestTest, DumpBasic) {
   build(R"cpp(
-    _ := add-expression
+    _ := add-expression EOF
     add-expression := id-expression + id-expression
     id-expression := IDENTIFIER
   )cpp");
@@ -64,7 +64,7 @@ TEST_F(ForestTest, DumpBasic) {
       cook(lex("a + b", clang::LangOptions()), clang::LangOptions());
 
   auto T = Arena.createTerminals(TS);
-  ASSERT_EQ(T.size(), 3u);
+  ASSERT_EQ(T.size(), 4u);
   const auto *Left = &Arena.createSequence(
       symbol("id-expression"), ruleFor("id-expression"), {&T.front()});
   const auto *Right = &Arena.createSequence(symbol("id-expression"),
@@ -89,9 +89,9 @@ TEST_F(ForestTest, DumpBasic) {
 
 TEST_F(ForestTest, DumpAmbiguousAndRefs) {
   build(R"cpp(
-    _ := type
-    type := class-type # rule 3
-    type := enum-type # rule 4
+    _ := type EOF
+    type := class-type # rule 4
+    type := enum-type # rule 5
     class-type := shared-type
     enum-type := shared-type
     shared-type := IDENTIFIER)cpp");
@@ -100,7 +100,7 @@ TEST_F(ForestTest, DumpAmbiguousAndRefs) {
   const auto &TS = cook(lex("abc", clang::LangOptions()), clang::LangOptions());
 
   auto Terminals = Arena.createTerminals(TS);
-  ASSERT_EQ(Terminals.size(), 1u);
+  ASSERT_EQ(Terminals.size(), 2u);
 
   const auto *SharedType = &Arena.createSequence(
       symbol("shared-type"), ruleFor("shared-type"), {Terminals.begin()});
@@ -109,9 +109,9 @@ TEST_F(ForestTest, DumpAmbiguousAndRefs) {
   const auto *EnumType = &Arena.createSequence(
       symbol("enum-type"), ruleFor("enum-type"), {SharedType});
   const auto *Alternative1 =
-      &Arena.createSequence(symbol("type"), /*RuleID=*/3, {ClassType});
+      &Arena.createSequence(symbol("type"), /*RuleID=*/4, {ClassType});
   const auto *Alternative2 =
-      &Arena.createSequence(symbol("type"), /*RuleID=*/4, {EnumType});
+      &Arena.createSequence(symbol("type"), /*RuleID=*/5, {EnumType});
   const auto *Type =
       &Arena.createAmbiguous(symbol("type"), {Alternative1, Alternative2});
   EXPECT_EQ(Type->dumpRecursive(G),

diff  --git a/clang-tools-extra/pseudo/unittests/GLRTest.cpp b/clang-tools-extra/pseudo/unittests/GLRTest.cpp
index 397ad6d3e8000..761b2c8db4aac 100644
--- a/clang-tools-extra/pseudo/unittests/GLRTest.cpp
+++ b/clang-tools-extra/pseudo/unittests/GLRTest.cpp
@@ -509,7 +509,7 @@ TEST_F(GLRTest, PerfectForestNodeSharing) {
   // item `expr := • IDENTIFIER`, and both have 
diff erent goto states on the
   // nonterminal `expr`.
   build(R"bnf(
-    _ := test
+    _ := test EOF
 
     test := { expr
     test := { IDENTIFIER
@@ -548,7 +548,7 @@ TEST_F(GLRTest, GLRReduceOrder) {
   // foo should be reduced first, so that in step 2 we have completed reduces
   // for test, and form an ambiguous forest node.
   build(R"bnf(
-    _ := test
+    _ := test EOF
 
     test := IDENTIFIER
     test := foo
@@ -575,7 +575,7 @@ TEST_F(GLRTest, RecoveryEndToEnd) {
   //  - multiple possible recovery rules
   //  - recovery from outer scopes is rejected
   build(R"bnf(
-    _ := block
+    _ := block EOF
 
     block := { block [recover=Braces] }
     block := { numbers [recover=Braces] }
@@ -606,14 +606,14 @@ TEST_F(GLRTest, RecoveryEndToEnd) {
 
 TEST_F(GLRTest, RecoverTerminal) {
   build(R"bnf(
-    _ := stmt
+    _ := stmt EOF
 
     stmt := IDENTIFIER ; [recover=Skip]
   )bnf");
   TestLang.Table = LRTable::buildSLR(TestLang.G);
   TestLang.RecoveryStrategies.try_emplace(
       extensionID("Skip"),
-      [](Token::Index Start, const TokenStream &) { return Start + 1; });
+      [](Token::Index Start, const TokenStream &) { return Start; });
   clang::LangOptions LOptions;
   TokenStream Tokens = cook(lex("foo", LOptions), LOptions);
 
@@ -630,7 +630,7 @@ TEST_F(GLRTest, RecoverUnrestrictedReduce) {
   // We would not normally reduce `word := IDENTIFIER`, but do so for recovery.
 
   build(R"bnf(
-    _ := sentence
+    _ := sentence EOF
 
     word := IDENTIFIER
     sentence := word word [recover=AcceptAnyTokenInstead]
@@ -652,9 +652,40 @@ TEST_F(GLRTest, RecoverUnrestrictedReduce) {
             "[  1, end) └─word := <opaque>\n");
 }
 
+TEST_F(GLRTest, RepeatedRecovery) {
+  // We require multiple steps of recovery at eof and then a reduction in order
+  // to successfully parse.
+  build(R"bnf(
+    _ := function EOF
+    # FIXME: this forces EOF to be in follow(signature).
+    # Remove it once we use unconstrained reduction for recovery.
+    _ := signature EOF
+
+    function := signature body [recover=Skip]
+    signature := IDENTIFIER params [recover=Skip]
+    params := ( )
+    body := { }
+  )bnf");
+  TestLang.Table = LRTable::buildSLR(TestLang.G);
+  TestLang.RecoveryStrategies.try_emplace(
+      extensionID("Skip"),
+      [](Token::Index Start, const TokenStream &) { return Start; });
+  clang::LangOptions LOptions;
+  TokenStream Tokens = cook(lex("main", LOptions), LOptions);
+
+  const ForestNode &Parsed =
+      glrParse({Tokens, Arena, GSStack}, id("function"), TestLang);
+  EXPECT_EQ(Parsed.dumpRecursive(TestLang.G),
+            "[  0, end) function := signature body [recover=Skip]\n"
+            "[  0,   1) ├─signature := IDENTIFIER params [recover=Skip]\n"
+            "[  0,   1) │ ├─IDENTIFIER := tok[0]\n"
+            "[  1,   1) │ └─params := <opaque>\n"
+            "[  1, end) └─body := <opaque>\n");
+}
+
 TEST_F(GLRTest, NoExplicitAccept) {
   build(R"bnf(
-    _ := test
+    _ := test EOF
 
     test := IDENTIFIER test
     test := IDENTIFIER
@@ -677,7 +708,7 @@ TEST_F(GLRTest, NoExplicitAccept) {
 
 TEST_F(GLRTest, GuardExtension) {
   build(R"bnf(
-    _ := start
+    _ := start EOF
 
     start := IDENTIFIER [guard]
   )bnf");