[PATCH] D130523: [pseudo] Perform unconstrained recovery prior to completion.
Sam McCall via Phabricator via cfe-commits
cfe-commits at lists.llvm.org
Mon Jul 25 15:02:26 PDT 2022
sammccall updated this revision to Diff 447487.
sammccall added a comment.
restore removed debug
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D130523/new/
https://reviews.llvm.org/D130523
Files:
clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h
clang-tools-extra/pseudo/lib/GLR.cpp
clang-tools-extra/pseudo/unittests/GLRTest.cpp
Index: clang-tools-extra/pseudo/unittests/GLRTest.cpp
===================================================================
--- clang-tools-extra/pseudo/unittests/GLRTest.cpp
+++ clang-tools-extra/pseudo/unittests/GLRTest.cpp
@@ -604,6 +604,33 @@
"[ 5, end) └─} := tok[5]\n");
}
+TEST_F(GLRTest, RecoverUnrestrictedReduce) {
+ // Here, ! is not in any rule and therefore not in the follow set of `word`.
+ // We would not normally reduce `word := IDENTIFIER`, but do so for recovery.
+
+ build(R"bnf(
+ _ := sentence
+
+ word := IDENTIFIER
+ sentence := word word [recover=AcceptAnyTokenInstead]
+ )bnf");
+
+ clang::LangOptions LOptions;
+ const TokenStream &Tokens = cook(lex("id !", LOptions), LOptions);
+ TestLang.Table = LRTable::buildSLR(TestLang.G);
+ TestLang.RecoveryStrategies.try_emplace(
+ extensionID("AcceptAnyTokenInstead"),
+ [](Token::Index Start, const TokenStream &Stream) { return Start + 1; });
+
+ const ForestNode &Parsed =
+ glrParse({Tokens, Arena, GSStack}, id("sentence"), TestLang);
+ EXPECT_EQ(Parsed.dumpRecursive(TestLang.G),
+ "[ 0, end) sentence := word word [recover=AcceptAnyTokenInstead]\n"
+ "[ 0, 1) ├─word := IDENTIFIER\n"
+ "[ 0, 1) │ └─IDENTIFIER := tok[0]\n"
+ "[ 1, end) └─word := <opaque>\n");
+}
+
TEST_F(GLRTest, NoExplicitAccept) {
build(R"bnf(
_ := test
Index: clang-tools-extra/pseudo/lib/GLR.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/GLR.cpp
+++ clang-tools-extra/pseudo/lib/GLR.cpp
@@ -597,6 +597,10 @@
std::vector<const GSS::Node *> Heads = {GSS.addNode(/*State=*/StartState,
/*ForestNode=*/nullptr,
{})};
+ // Invariant: Heads is partitioned by source: {shifted | reduced}.
+ // HeadsPartition is the index of the first head formed by reduction.
+ // We use this to discard and recreate the reduced heads during recovery.
+ unsigned HeadsPartition = 0;
std::vector<const GSS::Node *> NextHeads;
auto MaybeGC = [&, Roots(std::vector<const GSS::Node *>{}), I(0u)]() mutable {
assert(NextHeads.empty() && "Running GC at the wrong time!");
@@ -619,8 +623,17 @@
// If we weren't able to consume the token, try to skip over some tokens
// so we can keep parsing.
if (NextHeads.empty()) {
- // FIXME: Heads may not be fully reduced, because our reductions were
- // constrained by lookahead (but lookahead is meaningless to recovery).
+ // The reduction in the previous round was constrained by lookahead.
+ // On valid code this only rejects dead ends, but on broken code we should
+ // consider all possibilities.
+ //
+ // We discard all heads formed by reduction, and recreate them without
+ // this constraint. This may duplicate some nodes, but it's rare.
+ LLVM_DEBUG(llvm::dbgs() << "Shift failed, will attempt recovery. "
+ "Re-reducing without lookahead.");
+ Heads.resize(HeadsPartition);
+ Reduce(Heads, /*allow all reductions*/ tokenSymbol(tok::unknown));
+
glrRecover(Heads, I, Params, Lang, NextHeads);
if (NextHeads.empty())
// FIXME: Ensure the `_ := start-symbol` rules have a fallback
@@ -632,6 +645,7 @@
// Form nonterminals containing the token we just consumed.
SymbolID Lookahead =
I == Terminals.size() ? tokenSymbol(tok::eof) : Terminals[I].symbol();
+ HeadsPartition = NextHeads.size();
Reduce(NextHeads, Lookahead);
// Prepare for the next token.
std::swap(Heads, NextHeads);
Index: clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h
===================================================================
--- clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h
+++ clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h
@@ -105,7 +105,11 @@
bool canFollow(SymbolID Nonterminal, SymbolID Terminal) const {
assert(isToken(Terminal));
assert(isNonterminal(Nonterminal));
- return FollowSets.test(tok::NUM_TOKENS * Nonterminal +
+ // tok::unknown is a sentinel value used in recovery: can follow anything.
+ if (tok::unknown)
+ return true;
+ return Terminal == tokenSymbol(tok::unknown) ||
+ FollowSets.test(tok::NUM_TOKENS * Nonterminal +
symbolToToken(Terminal));
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D130523.447487.patch
Type: text/x-patch
Size: 4524 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20220725/316d9959/attachment.bin>
More information about the cfe-commits
mailing list