[clang-tools-extra] 743971f - Revert "[pseudo] Add error-recovery framework & brace-based recovery"
Sam McCall via cfe-commits
cfe-commits at lists.llvm.org
Tue Jun 28 12:11:15 PDT 2022
Author: Sam McCall
Date: 2022-06-28T21:11:09+02:00
New Revision: 743971faaf84eaa0e6310bdc22c68f34c20330f1
URL: https://github.com/llvm/llvm-project/commit/743971faaf84eaa0e6310bdc22c68f34c20330f1
DIFF: https://github.com/llvm/llvm-project/commit/743971faaf84eaa0e6310bdc22c68f34c20330f1.diff
LOG: Revert "[pseudo] Add error-recovery framework & brace-based recovery"
This reverts commit a0f4c10ae227a62c2a63611e64eba83f0ff0f577.
This commit hadn't been reviewed yet, and was unintentionally included
on another branch.
Added:
Modified:
clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRGraph.h
clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h
clang-tools-extra/pseudo/lib/GLR.cpp
clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp
clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp
clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
clang-tools-extra/pseudo/test/cxx/empty-member-spec.cpp
clang-tools-extra/pseudo/unittests/GLRTest.cpp
Removed:
clang-tools-extra/pseudo/test/cxx/recovery-init-list.cpp
################################################################################
diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h b/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
index edc4b2acc0a8..a3e8611de425 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
@@ -144,26 +144,6 @@ void glrShift(llvm::ArrayRef<const GSS::Node *> OldHeads,
void glrReduce(std::vector<const GSS::Node *> &Heads, SymbolID Lookahead,
const ParseParams &Params);
-// Heuristically recover from a state where no further parsing is possible.
-//
-// OldHeads is the parse state at TokenIndex.
-// This function consumes consumes zero or more tokens (advancing TokenIndex),
-// and places any recovery states created in NewHeads.
-//
-// On failure, NewHeads is empty and TokenIndex is unchanged.
-//
-// WARNING: glrRecover acts as a "fallback shift". If it consumes no tokens,
-// there is a risk of the parser falling into an infinite loop, creating an
-// endless sequence of recovery nodes.
-// Generally it is safe for recovery to match 0 tokens against sequence symbols
-// like `statement-seq`, as the grammar won't permit another statement-seq
-// immediately afterwards. However recovery strategies for `statement` should
-// consume at least one token, as statements may be adjacent in the input.
-void glrRecover(llvm::ArrayRef<const GSS::Node *> OldHeads,
- unsigned &TokenIndex, const TokenStream &Tokens,
- const ParseParams &Params,
- std::vector<const GSS::Node *> &NewHeads);
-
} // namespace pseudo
} // namespace clang
diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
index 7240f5adbb03..382da41397f0 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
@@ -81,12 +81,9 @@ inline tok::TokenKind symbolToToken(SymbolID SID) {
assert(SID < NumTerminals);
return static_cast<tok::TokenKind>(SID);
}
-inline constexpr SymbolID tokenSymbol(tok::TokenKind TK) {
+inline SymbolID tokenSymbol(tok::TokenKind TK) {
return TokenFlag | static_cast<SymbolID>(TK);
}
-// Error recovery strategies.
-// FIXME: these should be provided as extensions instead.
-enum class RecoveryStrategy : uint8_t { None, Braces };
// An extension is a piece of native code specific to a grammar that modifies
// the behavior of annotated rules. One ExtensionID is assigned for each unique
@@ -110,7 +107,7 @@ struct Rule {
// length to 9 (this is the longest sequence in cxx grammar).
static constexpr unsigned SizeBits = 4;
static constexpr unsigned MaxElements = 9;
- static_assert(MaxElements < (1 << SizeBits), "Exceeds the maximum limit");
+ static_assert(MaxElements <= (1 << SizeBits), "Exceeds the maximum limit");
static_assert(SizeBits + SymbolBits <= 16,
"Must be able to store symbol ID + size efficiently");
@@ -126,13 +123,6 @@ struct Rule {
// being set for this rule.
ExtensionID Guard = 0;
- // Specifies the index within Sequence eligible for error recovery.
- // Given stmt := { stmt-seq_opt }, if we fail to parse the stmt-seq then we
- // should recover by finding the matching brace, and forcing stmt-seq to match
- // everything between braces.
- uint8_t RecoveryIndex = -1;
- RecoveryStrategy Recovery = RecoveryStrategy::None;
-
llvm::ArrayRef<SymbolID> seq() const {
return llvm::ArrayRef<SymbolID>(Sequence, Size);
}
diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRGraph.h b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRGraph.h
index f5c2cc7a1623..1b5365327431 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRGraph.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRGraph.h
@@ -137,20 +137,8 @@ class LRGraph {
SymbolID Label;
};
- // A possible error recovery: choose to match some tokens against a symbol.
- //
- // e.g. a state that contains
- // stmt := { . stmt-seq [recover=braces] }
- // has a Recovery { Src = S, Strategy=braces, Result=stmt-seq }.
- struct Recovery {
- StateID Src; // The state we are in when encountering the error.
- RecoveryStrategy Strategy; // Heuristic choosing the tokens to match.
- SymbolID Result; // The symbol that is produced.
- };
-
llvm::ArrayRef<State> states() const { return States; }
llvm::ArrayRef<Edge> edges() const { return Edges; }
- llvm::ArrayRef<Recovery> recoveries() const { return Recoveries; }
llvm::ArrayRef<std::pair<SymbolID, StateID>> startStates() const {
return StartStates;
}
@@ -159,15 +147,12 @@ class LRGraph {
private:
LRGraph(std::vector<State> States, std::vector<Edge> Edges,
- std::vector<Recovery> Recoveries,
std::vector<std::pair<SymbolID, StateID>> StartStates)
: States(std::move(States)), Edges(std::move(Edges)),
- Recoveries(std::move(Recoveries)), StartStates(std::move(StartStates)) {
- }
+ StartStates(std::move(StartStates)) {}
std::vector<State> States;
std::vector<Edge> Edges;
- std::vector<Recovery> Recoveries;
std::vector<std::pair<SymbolID, StateID>> StartStates;
};
diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h
index c8e48dbaf309..70ce52924f11 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h
@@ -121,14 +121,6 @@ class LRTable {
uint16_t Value : ValueBits;
};
- struct Recovery {
- RecoveryStrategy Strategy;
- SymbolID Result;
- };
-
- // Returns all available actions for the given state on a terminal.
- // Expected to be called by LR parsers.
- llvm::ArrayRef<Action> getActions(StateID State, SymbolID Terminal) const;
// Returns the state after we reduce a nonterminal.
// Expected to be called by LR parsers.
// REQUIRES: Nonterminal is valid here.
@@ -159,12 +151,6 @@ class LRTable {
symbolToToken(Terminal));
}
- // Looks up available recovery actions if we stopped parsing in this state.
- llvm::ArrayRef<Recovery> getRecovery(StateID State) const {
- return llvm::makeArrayRef(Recoveries.data() + RecoveryOffset[State],
- Recoveries.data() + RecoveryOffset[State + 1]);
- }
-
// Returns the state from which the LR parser should start to parse the input
// tokens as the given StartSymbol.
//
@@ -202,15 +188,9 @@ class LRTable {
StateID State;
RuleID Rule;
};
- struct RecoveryEntry {
- StateID State;
- RecoveryStrategy Strategy;
- SymbolID Result;
- };
- // Build a specified table for testing purposes.
- static LRTable buildForTests(const Grammar &, llvm::ArrayRef<Entry>,
- llvm::ArrayRef<ReduceEntry>,
- llvm::ArrayRef<RecoveryEntry> = {});
+ // Build a specifid table for testing purposes.
+ static LRTable buildForTests(const Grammar &G, llvm::ArrayRef<Entry>,
+ llvm::ArrayRef<ReduceEntry>);
private:
// Looks up actions stored in the generic table.
@@ -242,11 +222,6 @@ class LRTable {
// This is flattened by encoding the (SymbolID Nonterminal, tok::Kind Token)
// as an index: Nonterminal * NUM_TOKENS + Token.
llvm::BitVector FollowSets;
-
- // Recovery stores all recovery actions from all states.
- // A given state has [RecoveryOffset[S], RecoveryOffset[S+1]).
- std::vector<uint32_t> RecoveryOffset;
- std::vector<Recovery> Recoveries;
};
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const LRTable::Action &);
diff --git a/clang-tools-extra/pseudo/lib/GLR.cpp b/clang-tools-extra/pseudo/lib/GLR.cpp
index ccb3cd770f7f..0b9cf46245a9 100644
--- a/clang-tools-extra/pseudo/lib/GLR.cpp
+++ b/clang-tools-extra/pseudo/lib/GLR.cpp
@@ -24,156 +24,6 @@
namespace clang {
namespace pseudo {
-namespace {
-
-llvm::Optional<unsigned>
-findRecoveryEndpoint(RecoveryStrategy Strategy,
- const GSS::Node *RecoveryNode,
- const TokenStream &Tokens) {
- assert(Strategy == RecoveryStrategy::Braces);
- const ForestNode *LBrace = RecoveryNode->Payload;
- assert(LBrace->kind() == ForestNode::Terminal &&
- LBrace->symbol() == tokenSymbol(tok::l_brace));
- if (const Token *RBrace = Tokens.tokens()[LBrace->startTokenIndex()].pair())
- return Tokens.index(*RBrace);
- return llvm::None;
-}
-
-} // namespace
-
-void glrRecover(llvm::ArrayRef<const GSS::Node *> OldHeads,
- unsigned &TokenIndex, const TokenStream &Tokens,
- const ParseParams &Params,
- std::vector<const GSS::Node *> &NewHeads) {
- LLVM_DEBUG(llvm::dbgs() << "Recovery at token " << TokenIndex << "...\n");
- // Describes a possibility to recover by forcibly interpreting a range of
- // tokens around the cursor as a nonterminal that we expected to see.
- struct PlaceholderRecovery {
- // The token prior to the nonterminal which is being recovered.
- // This starts of the region we're skipping, so higher Position is better.
- Token::Index Position;
- // The nonterminal which will be created in order to recover.
- SymbolID Symbol;
- // The heuristic used to choose the bounds of the nonterminal to recover.
- RecoveryStrategy Strategy;
-
- // The GSS head where we are expecting the recovered nonterminal.
- const GSS::Node *RecoveryNode;
- // Payload of nodes on the way back from the OldHead to the recovery node.
- // These represent the partial parse that is being discarded.
- // They should become the children of the opaque recovery node.
- //
- // There may be multiple paths leading to the same recovery node, we choose
- // one arbitrarily.
- std::vector<const ForestNode *> Path;
- };
- std::vector<PlaceholderRecovery> Options;
-
- // Find recovery options by walking up the stack.
- //
- // This is similar to exception handling: we walk up the "frames" of nested
- // rules being parsed until we find one that has a "handler" which allows us
- // to determine the node bounds without parsing it.
- //
- // Unfortunately there's a significant
diff erence: the stack contains both
- // "upward" nodes (ancestor parses) and "leftward" ones.
- // e.g. when parsing `int(2 + ?)`, the stack contains:
- // expr := expr + . expr - which we're currently parsing
- // expr := type ( . expr ) - (up) we should recover this outer expr
- // expr := . type ( expr ) - (up+left) we should not recover this type!
- //
- // It's not obvious how to avoid collecting the latter as a recovery option.
- // I think the distinction is ill-defined after merging items into states.
- // For now, we have to take this into account when defining recovery rules.
- // FIXME: find a more satisfying way to avoid such false recovery.
- std::vector<const ForestNode *> Path;
- llvm::DenseSet<const GSS::Node *> Seen;
- auto DFS = [&](const GSS::Node *N, Token::Index NextTok, auto &DFS) {
- if (!Seen.insert(N).second)
- return;
- for (auto Strategy : Params.Table.getRecovery(N->State)) {
- Options.push_back(PlaceholderRecovery{
- NextTok,
- Strategy.Result,
- Strategy.Strategy,
- N,
- Path,
- });
- LLVM_DEBUG(llvm::dbgs()
- << "Option: recover " << Params.G.symbolName(Strategy.Result)
- << " at token " << NextTok << "\n");
- }
- Path.push_back(N->Payload);
- for (const GSS::Node *Parent : N->parents())
- DFS(Parent, N->Payload->startTokenIndex(), DFS);
- Path.pop_back();
- };
- for (auto *N : llvm::reverse(OldHeads))
- DFS(N, TokenIndex, DFS);
-
- // Now we select the option(s) we will use to recover.
- //
- // We prefer options starting further right, as these discard less code
- // (e.g. we prefer to recover inner scopes rather than outer ones).
- // The options also need to agree on an endpoint, so the parser has a
- // consistent position afterwards.
- //
- // So conceptually we're sorting by the tuple (start, end), though we avoid
- // computing `end` for options that can't be winners.
-
- // Consider options starting further right first.
- // Don't drop the others yet though, we may still use them if preferred fails.
- llvm::stable_sort(Options, [&](const auto &L, const auto &R) {
- return L.Position > R.Position;
- });
-
- assert(NewHeads.empty()); // We may repeatedly populate and clear it.
- llvm::Optional<Token::Range> RecoveryRange;
- for (const PlaceholderRecovery &Option : Options) {
- // If this starts further right than options we've already found, then
- // we'll never find anything better. Skip computing End for the rest.
- if (RecoveryRange && Option.Position < RecoveryRange->Begin)
- break;
-
- auto End =
- findRecoveryEndpoint(Option.Strategy, Option.RecoveryNode, Tokens);
- // Only consider recovery that advances the parse.
- if (!End || *End <= TokenIndex)
- continue;
- if (RecoveryRange) {
- // If this is worse than our previous options, ignore it.
- if (RecoveryRange->End < *End)
- continue;
- // If this is an improvement over our previous options, then drop them.
- if (RecoveryRange->End > *End)
- NewHeads.clear();
- }
- // Create recovery nodes and heads for them in the GSS. These may be
- // discarded if a better recovery is later found, but this path isn't hot.
- RecoveryRange = {Option.Position, *End};
- const ForestNode &Placeholder =
- Params.Forest.createOpaque(Option.Symbol, Option.Position);
- const GSS::Node *NewHead = Params.GSStack.addNode(
- Params.Table.getGoToState(Option.RecoveryNode->State, Option.Symbol),
- &Placeholder, {Option.RecoveryNode});
- NewHeads.push_back(NewHead);
- }
-
- // Advance the cursor, whether recovery succeeded or not.
- if (RecoveryRange) {
- LLVM_DEBUG({
- llvm::dbgs() << "Recovered range=" << *RecoveryRange << ":";
- for (const auto *Head : NewHeads)
- llvm::dbgs() << " " << Params.G.symbolName(Head->Payload->symbol());
- llvm::dbgs() << "\n";
- });
- TokenIndex = RecoveryRange->End;
- } else {
- LLVM_DEBUG(llvm::dbgs() << "Recovery failed after trying " << Options.size()
- << " strategies\n");
- ++TokenIndex;
- }
-}
using StateID = LRTable::StateID;
@@ -181,9 +31,8 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const GSS::Node &N) {
std::vector<std::string> ParentStates;
for (const auto *Parent : N.parents())
ParentStates.push_back(llvm::formatv("{0}", Parent->State));
- OS << llvm::formatv("state {0}, parsed symbol {1}, parents {3}", N.State,
- N.Payload ? N.Payload->symbol() : 0,
- llvm::join(ParentStates, ", "));
+ OS << llvm::formatv("state {0}, parsed symbol {1}, parents {2}", N.State,
+ N.Payload->symbol(), llvm::join(ParentStates, ", "));
return OS;
}
@@ -578,27 +427,15 @@ const ForestNode &glrParse(const TokenStream &Tokens, const ParseParams &Params,
GSS.gc(std::move(Roots));
};
// Each iteration fully processes a single token.
- for (unsigned I = 0; I < Terminals.size();) {
+ for (unsigned I = 0; I < Terminals.size(); ++I) {
LLVM_DEBUG(llvm::dbgs() << llvm::formatv(
"Next token {0} (id={1})\n",
G.symbolName(Terminals[I].symbol()), Terminals[I].symbol()));
// Consume the token.
glrShift(Heads, Terminals[I], Params, NextHeads);
-
- // If we weren't able to consume the token, try to skip over some tokens
- // so we can keep parsing.
- if (NextHeads.empty()) {
- glrRecover(Heads, I, Tokens, Params, NextHeads);
- if (NextHeads.empty())
- // FIXME: Ensure the `_ := start-symbol` rules have a fallback
- // error-recovery strategy attached. Then this condition can't happen.
- return Params.Forest.createOpaque(StartSymbol, /*Token::Index=*/0);
- } else
- ++I;
-
// Form nonterminals containing the token we just consumed.
- SymbolID Lookahead =
- I == Terminals.size() ? tokenSymbol(tok::eof) : Terminals[I].symbol();
+ SymbolID Lookahead = I + 1 == Terminals.size() ? tokenSymbol(tok::eof)
+ : Terminals[I + 1].symbol();
Reduce(NextHeads, Lookahead);
// Prepare for the next token.
std::swap(Heads, NextHeads);
@@ -607,35 +444,22 @@ const ForestNode &glrParse(const TokenStream &Tokens, const ParseParams &Params,
}
LLVM_DEBUG(llvm::dbgs() << llvm::formatv("Reached eof\n"));
- // The parse was successful if we're in state `_ := start-symbol .`
StateID AcceptState = Params.Table.getGoToState(StartState, StartSymbol);
- auto SearchForAccept = [&](llvm::ArrayRef<const GSS::Node *> Heads) {
- const ForestNode *Result = nullptr;
- for (const auto *Head : Heads) {
- if (Head->State == AcceptState) {
- assert(Head->Payload->symbol() == StartSymbol);
- assert(Result == nullptr && "multiple results!");
- Result = Head->Payload;
- }
+ const ForestNode *Result = nullptr;
+ for (const auto *Head : Heads) {
+ if (Head->State == AcceptState) {
+ assert(Head->Payload->symbol() == StartSymbol);
+ assert(Result == nullptr && "multiple results!");
+ Result = Head->Payload;
}
- return Result;
- };
- if (auto *Result = SearchForAccept(Heads))
- return *Result;
- // Failed to parse the input, attempt to run recovery.
- // FIXME: this awkwardly repeats the recovery in the loop, when shift fails.
- // More elegant is to include EOF in the token stream, and make the
- // augmented rule: `_ := translation-unit EOF`. In this way recovery at EOF
- // would not be a special case: it show up as a failure to shift the EOF
- // token.
- unsigned I = Terminals.size();
- glrRecover(Heads, I, Tokens, Params, NextHeads);
- Reduce(NextHeads, tokenSymbol(tok::eof));
- if (auto *Result = SearchForAccept(NextHeads))
+ }
+ if (Result)
return *Result;
-
// We failed to parse the input, returning an opaque forest node for recovery.
- // FIXME: as above, we can add fallback error handling so this is impossible.
+ //
+ // FIXME: We will need to invoke our generic error-recovery handlers when we
+ // reach EOF without reaching accept state, and involving the eof
+ // token in the above main for-loopmay be the best way to reuse the code).
return Params.Forest.createOpaque(StartSymbol, /*Token::Index=*/0);
}
@@ -646,10 +470,9 @@ void glrReduce(std::vector<const GSS::Node *> &Heads, SymbolID Lookahead,
}
const GSS::Node *GSS::addNode(LRTable::StateID State, const ForestNode *Symbol,
-
llvm::ArrayRef<const Node *> Parents) {
Node *Result = new (allocate(Parents.size()))
- Node({State, GCParity, static_cast<uint16_t>(Parents.size())});
+ Node({State, GCParity, static_cast<unsigned>(Parents.size())});
Alive.push_back(Result);
++NodesCreated;
Result->Payload = Symbol;
diff --git a/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp b/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
index dc4d95820264..8e3bcb7afb37 100644
--- a/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
+++ b/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp
@@ -59,11 +59,8 @@ std::string Grammar::dumpRule(RuleID RID) const {
llvm::raw_string_ostream OS(Result);
const Rule &R = T->Rules[RID];
OS << symbolName(R.Target) << " :=";
- for (unsigned I = 0; I < R.Size; ++I) {
- OS << " " << symbolName(R.Sequence[I]);
- if (R.RecoveryIndex == I)
- OS << " [recover=" << static_cast<unsigned>(R.Recovery) << "]";
- }
+ for (SymbolID SID : R.seq())
+ OS << " " << symbolName(SID);
if (R.Guard)
OS << " [guard=" << T->AttributeValues[R.Guard] << "]";
return Result;
diff --git a/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp b/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp
index 281c08681c92..9fbc34da7155 100644
--- a/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp
+++ b/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp
@@ -106,17 +106,6 @@ class GrammarBuilder {
assert(T->Rules.size() < (1 << RuleBits) &&
"Too many rules to fit in RuleID bits!");
- // Wherever RHS contains { foo }, mark foo for brace-recovery.
- // FIXME: this should be grammar annotations instead.
- for (auto &Rule : T->Rules) {
- for (unsigned I = 2; I < Rule.Size; ++I)
- if (Rule.Sequence[I] == tokenSymbol(tok::r_brace) &&
- Rule.Sequence[I - 2] == tokenSymbol(tok::l_brace) &&
- !isToken(Rule.Sequence[I - 1])) {
- Rule.Recovery = RecoveryStrategy::Braces;
- Rule.RecoveryIndex = I - 1;
- }
- }
const auto &SymbolOrder = getTopologicalOrder(T.get());
llvm::stable_sort(
T->Rules, [&SymbolOrder](const Rule &Left, const Rule &Right) {
diff --git a/clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp b/clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp
index 290a4f32f576..5312a675cdb8 100644
--- a/clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp
+++ b/clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp
@@ -120,20 +120,6 @@ nextAvailableKernelItems(const State &S, const Grammar &G) {
return Results;
}
-std::vector<std::pair<RecoveryStrategy, SymbolID>>
-availableRecovery(const State &S, const Grammar &G) {
- std::vector<std::pair<RecoveryStrategy, SymbolID>> Result;
- for (const Item &I : S.Items) {
- const auto &Rule = G.lookupRule(I.rule());
- if (I.dot() != Rule.RecoveryIndex)
- continue;
- Result.push_back({Rule.Recovery, Rule.seq()[Rule.RecoveryIndex]});
- }
- llvm::sort(Result);
- Result.erase(std::unique(Result.begin(), Result.end()), Result.end());
- return Result;
-}
-
} // namespace
std::string Item::dump(const Grammar &G) const {
@@ -144,10 +130,9 @@ std::string Item::dump(const Grammar &G) const {
Results.push_back(G.symbolName(SID));
return Results;
};
- return llvm::formatv("{0} := {1} • {2}{3}", G.symbolName(Rule.Target),
+ return llvm::formatv("{0} := {1} • {2}", G.symbolName(Rule.Target),
llvm::join(ToNames(Rule.seq().take_front(DotPos)), " "),
- llvm::join(ToNames(Rule.seq().drop_front(DotPos)), " "),
- Rule.RecoveryIndex == DotPos ? " [recovery]" : "")
+ llvm::join(ToNames(Rule.seq().drop_front(DotPos)), " "))
.str();
}
@@ -196,11 +181,6 @@ LRGraph LRGraph::buildLR0(const Grammar &G) {
Edges.push_back({Src, Dst, Label});
}
- void insertRecovery(StateID Src, RecoveryStrategy Strategy,
- SymbolID Result) {
- Recoveries.push_back({Src, Strategy, Result});
- }
-
// Returns a state with the given id.
const State &find(StateID ID) const {
assert(ID < States.size());
@@ -214,10 +194,9 @@ LRGraph LRGraph::buildLR0(const Grammar &G) {
LRGraph build() && {
States.shrink_to_fit();
Edges.shrink_to_fit();
- Recoveries.shrink_to_fit();
llvm::sort(StartStates);
StartStates.shrink_to_fit();
- return LRGraph(std::move(States), std::move(Edges), std::move(Recoveries),
+ return LRGraph(std::move(States), std::move(Edges),
std::move(StartStates));
}
@@ -226,7 +205,6 @@ LRGraph LRGraph::buildLR0(const Grammar &G) {
llvm::DenseMap<ItemSet, /*index of States*/ size_t> StatesIndex;
std::vector<State> States;
std::vector<Edge> Edges;
- std::vector<Recovery> Recoveries;
const Grammar &G;
std::vector<std::pair<SymbolID, StateID>> StartStates;
} Builder(G);
@@ -247,16 +225,15 @@ LRGraph LRGraph::buildLR0(const Grammar &G) {
}
while (!PendingStates.empty()) {
- auto StateID = PendingStates.back();
+ auto CurrentStateID = PendingStates.back();
PendingStates.pop_back();
- for (auto Next : nextAvailableKernelItems(Builder.find(StateID), G)) {
+ for (auto Next :
+ nextAvailableKernelItems(Builder.find(CurrentStateID), G)) {
auto Insert = Builder.insert(Next.second);
if (Insert.second) // new state, insert to the pending queue.
PendingStates.push_back(Insert.first);
- Builder.insertEdge(StateID, Insert.first, Next.first);
+ Builder.insertEdge(CurrentStateID, Insert.first, Next.first);
}
- for (auto Recovery : availableRecovery(Builder.find(StateID), G))
- Builder.insertRecovery(StateID, Recovery.first, Recovery.second);
}
return std::move(Builder).build();
}
diff --git a/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp b/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
index 3d36042044ad..59ea4ce5e327 100644
--- a/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
+++ b/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
@@ -11,7 +11,6 @@
#include "clang-pseudo/grammar/LRTable.h"
#include "clang/Basic/TokenKinds.h"
#include "llvm/ADT/SmallSet.h"
-#include "llvm/Support/raw_ostream.h"
#include <cstdint>
namespace llvm {
@@ -45,7 +44,6 @@ struct LRTable::Builder {
llvm::DenseSet<Entry> Entries;
llvm::DenseMap<StateID, llvm::SmallSet<RuleID, 4>> Reduces;
std::vector<llvm::DenseSet<SymbolID>> FollowSets;
- std::vector<LRGraph::Recovery> Recoveries;
LRTable build(unsigned NumStates) && {
// E.g. given the following parsing table with 3 states and 3 terminals:
@@ -90,26 +88,6 @@ struct LRTable::Builder {
}
Table.StartStates = std::move(StartStates);
- // Error recovery entries: sort (no dups already), and build offset lookup.
- llvm::sort(Recoveries,
- [&](const LRGraph::Recovery &L, const LRGraph::Recovery &R) {
- return std::tie(L.Src, L.Result, L.Strategy) <
- std::tie(R.Src, R.Result, R.Strategy);
- });
- Table.Recoveries.reserve(Recoveries.size());
- for (const auto &R : Recoveries)
- Table.Recoveries.push_back({R.Strategy, R.Result});
- Table.RecoveryOffset = std::vector<uint32_t>(NumStates + 1, 0);
- SortedIndex = 0;
- for (StateID State = 0; State < NumStates; ++State) {
- Table.RecoveryOffset[State] = SortedIndex;
- while (SortedIndex < Recoveries.size() &&
- Recoveries[SortedIndex].Src == State)
- SortedIndex++;
- }
- Table.RecoveryOffset[NumStates] = SortedIndex;
- assert(SortedIndex == Recoveries.size());
-
// Compile the follow sets into a bitmap.
Table.FollowSets.resize(tok::NUM_TOKENS * FollowSets.size());
for (SymbolID NT = 0; NT < FollowSets.size(); ++NT)
@@ -136,8 +114,7 @@ struct LRTable::Builder {
};
LRTable LRTable::buildForTests(const Grammar &G, llvm::ArrayRef<Entry> Entries,
- llvm::ArrayRef<ReduceEntry> Reduces,
- llvm::ArrayRef<RecoveryEntry> Recoveries) {
+ llvm::ArrayRef<ReduceEntry> Reduces) {
StateID MaxState = 0;
for (const auto &Entry : Entries) {
MaxState = std::max(MaxState, Entry.State);
@@ -151,8 +128,6 @@ LRTable LRTable::buildForTests(const Grammar &G, llvm::ArrayRef<Entry> Entries,
for (const ReduceEntry &E : Reduces)
Build.Reduces[E.State].insert(E.Rule);
Build.FollowSets = followSets(G);
- for (const auto &R : Recoveries)
- Build.Recoveries.push_back({R.State, R.Strategy, R.Result});
return std::move(Build).build(/*NumStates=*/MaxState + 1);
}
@@ -160,7 +135,6 @@ LRTable LRTable::buildSLR(const Grammar &G) {
auto Graph = LRGraph::buildLR0(G);
Builder Build;
Build.StartStates = Graph.startStates();
- Build.Recoveries = Graph.recoveries();
for (const auto &T : Graph.edges()) {
Action Act = isToken(T.Label) ? Action::shift(T.Dst) : Action::goTo(T.Dst);
Build.Entries.insert({T.Src, T.Label, Act});
diff --git a/clang-tools-extra/pseudo/test/cxx/empty-member-spec.cpp b/clang-tools-extra/pseudo/test/cxx/empty-member-spec.cpp
index bcfdff192b99..6d7a6823d0bf 100644
--- a/clang-tools-extra/pseudo/test/cxx/empty-member-spec.cpp
+++ b/clang-tools-extra/pseudo/test/cxx/empty-member-spec.cpp
@@ -2,7 +2,7 @@
class Foo {
public:
};
-// CHECK: decl-specifier-seq~class-specifier := class-head { member-specification [recover=1] }
+// CHECK: decl-specifier-seq~class-specifier := class-head { member-specification }
// CHECK-NEXT: ├─class-head := class-key class-head-name
// CHECK-NEXT: │ ├─class-key~CLASS := tok[0]
// CHECK-NEXT: │ └─class-head-name~IDENTIFIER := tok[1]
diff --git a/clang-tools-extra/pseudo/test/cxx/recovery-init-list.cpp b/clang-tools-extra/pseudo/test/cxx/recovery-init-list.cpp
deleted file mode 100644
index a8bbe88dadd8..000000000000
--- a/clang-tools-extra/pseudo/test/cxx/recovery-init-list.cpp
+++ /dev/null
@@ -1,13 +0,0 @@
-// RUN: clang-pseudo -grammar=%cxx-bnf-file -source=%s --print-forest | FileCheck %s
-auto x = { complete garbage };
-// CHECK: translation-unit~simple-declaration
-// CHECK-NEXT: ├─decl-specifier-seq~AUTO := tok[0]
-// CHECK-NEXT: ├─init-declarator-list~init-declarator
-// CHECK-NEXT: │ ├─declarator~IDENTIFIER := tok[1]
-// CHECK-NEXT: │ └─initializer~brace-or-equal-initializer
-// CHECK-NEXT: │ ├─= := tok[2]
-// CHECK-NEXT: │ └─initializer-clause~braced-init-list
-// CHECK-NEXT: │ ├─{ := tok[3]
-// CHECK-NEXT: │ ├─initializer-list := <opaque>
-// CHECK-NEXT: │ └─} := tok[6]
-// CHECK-NEXT: └─; := tok[7]
diff --git a/clang-tools-extra/pseudo/unittests/GLRTest.cpp b/clang-tools-extra/pseudo/unittests/GLRTest.cpp
index a37fda3fe2f5..846f2dbf1677 100644
--- a/clang-tools-extra/pseudo/unittests/GLRTest.cpp
+++ b/clang-tools-extra/pseudo/unittests/GLRTest.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "clang-pseudo/GLR.h"
-#include "clang-pseudo/Bracket.h"
#include "clang-pseudo/Token.h"
#include "clang-pseudo/grammar/Grammar.h"
#include "clang/Basic/LangOptions.h"
@@ -33,13 +32,11 @@ namespace {
using Action = LRTable::Action;
using testing::AllOf;
using testing::ElementsAre;
-using testing::IsEmpty;
using testing::UnorderedElementsAre;
MATCHER_P(state, StateID, "") { return arg->State == StateID; }
MATCHER_P(parsedSymbol, FNode, "") { return arg->Payload == FNode; }
MATCHER_P(parsedSymbolID, SID, "") { return arg->Payload->symbol() == SID; }
-MATCHER_P(start, Start, "") { return arg->Payload->startTokenIndex() == Start; }
testing::Matcher<const GSS::Node *>
parents(llvm::ArrayRef<const GSS::Node *> Parents) {
@@ -241,9 +238,9 @@ TEST_F(GLRTest, ReduceJoiningWithMultipleBases) {
/*State=*/1, /*ForestNode=*/CVQualifierNode, /*Parents=*/{GSSNode0});
const auto *GSSNode2 = GSStack.addNode(
/*State=*/2, /*ForestNode=*/CVQualifierNode, /*Parents=*/{GSSNode0});
- const auto *GSSNode3 = GSStack.addNode(
- /*State=*/3, /*ForestNode=*/ClassNameNode,
- /*Parents=*/{GSSNode1});
+ const auto *GSSNode3 =
+ GSStack.addNode(/*State=*/3, /*ForestNode=*/ClassNameNode,
+ /*Parents=*/{GSSNode1});
const auto *GSSNode4 =
GSStack.addNode(/*State=*/4, /*ForestNode=*/EnumNameNode,
/*Parents=*/{GSSNode2});
@@ -366,124 +363,6 @@ TEST_F(GLRTest, ReduceLookahead) {
EXPECT_THAT(Heads, ElementsAre(GSSNode1));
}
-TEST_F(GLRTest, Recover) {
- // Recovery while parsing "word" inside braces.
- // Before:
- // 0--1({)--2(?)
- // After recovering a `word` at state 1:
- // 0--3(word) // 3 is goto(1, word)
- buildGrammar({"word"}, {});
- LRTable Table = LRTable::buildForTests(
- G, {{/*State=*/1, id("word"), Action::goTo(3)}}, /*Reduce=*/{},
- /*Recovery=*/{{/*State=*/1, RecoveryStrategy::Braces, id("word")}});
-
- auto *LBrace = &Arena.createTerminal(tok::l_brace, 0);
- auto *Question1 = &Arena.createTerminal(tok::question, 1);
- const auto *Root = GSStack.addNode(0, nullptr, {});
- const auto *OpenedBraces = GSStack.addNode(1, LBrace, {Root});
- const auto *AfterQuestion1 = GSStack.addNode(2, Question1, {OpenedBraces});
-
- // Need a token stream with paired braces so the strategy works.
- clang::LangOptions LOptions;
- TokenStream Tokens = cook(lex("{ ? ? ? }", LOptions), LOptions);
- pairBrackets(Tokens);
- std::vector<const GSS::Node *> NewHeads;
-
- unsigned TokenIndex = 2;
- glrRecover({AfterQuestion1}, TokenIndex, Tokens, {G, Table, Arena, GSStack},
- NewHeads);
- EXPECT_EQ(TokenIndex, 4u) << "should skip ahead to matching brace";
- EXPECT_THAT(NewHeads, ElementsAre(
- AllOf(state(3), parsedSymbolID(id("word")),
- parents({OpenedBraces}), start(1u))));
- EXPECT_EQ(NewHeads.front()->Payload->kind(), ForestNode::Opaque);
-
- // Test recovery failure: omit closing brace so strategy fails
- TokenStream NoRBrace = cook(lex("{ ? ? ? ?", LOptions), LOptions);
- pairBrackets(NoRBrace);
- NewHeads.clear();
- TokenIndex = 2;
- glrRecover({AfterQuestion1}, TokenIndex, NoRBrace,
- {G, Table, Arena, GSStack}, NewHeads);
- EXPECT_EQ(TokenIndex, 3u) << "should advance by 1 by default";
- EXPECT_THAT(NewHeads, IsEmpty());
-}
-
-TEST_F(GLRTest, RecoverRightmost) {
- // In a nested block structure, we recover at the innermost possible block.
- // Before:
- // 0--1({)--1({)--1({)
- // After recovering a `block` at inside the second braces:
- // 0--1({)--2(body) // 2 is goto(1, body)
- buildGrammar({"body"}, {});
- LRTable Table = LRTable::buildForTests(
- G, {{/*State=*/1, id("body"), Action::goTo(2)}}, /*Reduce=*/{},
- /*Recovery=*/{{/*State=*/1, RecoveryStrategy::Braces, id("body")}});
-
- clang::LangOptions LOptions;
- // Innermost brace is unmatched, to test fallback to next brace.
- TokenStream Tokens = cook(lex("{ { { ? ? } }", LOptions), LOptions);
- Tokens.tokens()[0].Pair = 5;
- Tokens.tokens()[1].Pair = 4;
- Tokens.tokens()[4].Pair = 1;
- Tokens.tokens()[5].Pair = 0;
-
- auto *Brace1 = &Arena.createTerminal(tok::l_brace, 0);
- auto *Brace2 = &Arena.createTerminal(tok::l_brace, 1);
- auto *Brace3 = &Arena.createTerminal(tok::l_brace, 2);
- const auto *Root = GSStack.addNode(0, nullptr, {});
- const auto *In1 = GSStack.addNode(1, Brace1, {Root});
- const auto *In2 = GSStack.addNode(1, Brace2, {In1});
- const auto *In3 = GSStack.addNode(1, Brace3, {In2});
-
- unsigned TokenIndex = 3;
- std::vector<const GSS::Node *> NewHeads;
- glrRecover({In3}, TokenIndex, Tokens, {G, Table, Arena, GSStack}, NewHeads);
- EXPECT_EQ(TokenIndex, 5u);
- EXPECT_THAT(NewHeads, ElementsAre(AllOf(state(2), parsedSymbolID(id("body")),
- parents({In2}), start(2u))));
-}
-
-TEST_F(GLRTest, RecoverAlternatives) {
- // Recovery inside braces with multiple equally good options
- // Before:
- // 0--1({)
- // After recovering either `word` or `number` inside the braces:
- // 0--1({)--2(word) // 2 is goto(1, word)
- // └--3(number) // 3 is goto(1, number)
- buildGrammar({"number", "word"}, {});
- LRTable Table = LRTable::buildForTests(
- G,
- {
- {/*State=*/1, id("number"), Action::goTo(2)},
- {/*State=*/1, id("word"), Action::goTo(3)},
- },
- /*Reduce=*/{},
- /*Recovery=*/
- {
- {/*State=*/1, RecoveryStrategy::Braces, id("number")},
- {/*State=*/1, RecoveryStrategy::Braces, id("word")},
- });
- auto *LBrace = &Arena.createTerminal(tok::l_brace, 0);
- const auto *Root = GSStack.addNode(0, nullptr, {});
- const auto *OpenedBraces = GSStack.addNode(1, LBrace, {Root});
-
- clang::LangOptions LOptions;
- TokenStream Tokens = cook(lex("{ ? }", LOptions), LOptions);
- pairBrackets(Tokens);
- std::vector<const GSS::Node *> NewHeads;
- unsigned TokenIndex = 1;
-
- glrRecover({OpenedBraces}, TokenIndex, Tokens, {G, Table, Arena, GSStack},
- NewHeads);
- EXPECT_EQ(TokenIndex, 2u);
- EXPECT_THAT(NewHeads,
- UnorderedElementsAre(AllOf(state(2), parsedSymbolID(id("number")),
- parents({OpenedBraces}), start(1u)),
- AllOf(state(3), parsedSymbolID(id("word")),
- parents({OpenedBraces}), start(1u))));
-}
-
TEST_F(GLRTest, PerfectForestNodeSharing) {
// Run the GLR on a simple grammar and test that we build exactly one forest
// node per (SymbolID, token range).
@@ -552,40 +431,6 @@ TEST_F(GLRTest, GLRReduceOrder) {
"[ 0, end) └─IDENTIFIER := tok[0]\n");
}
-TEST_F(GLRTest, RecoveryEndToEnd) {
- // Simple example of brace-based recovery showing:
- // - recovered region includes tokens both ahead of and behind the cursor
- // - multiple possible recovery rules
- // - recovery from outer scopes is rejected
- build(R"bnf(
- _ := block
-
- block := { block }
- block := { numbers }
- numbers := NUMERIC_CONSTANT NUMERIC_CONSTANT
- )bnf");
- auto LRTable = LRTable::buildSLR(G);
- clang::LangOptions LOptions;
- TokenStream Tokens = cook(lex("{ { 42 ? } }", LOptions), LOptions);
- pairBrackets(Tokens);
-
- const ForestNode &Parsed =
- glrParse(Tokens, {G, LRTable, Arena, GSStack}, id("block"));
- EXPECT_EQ(Parsed.dumpRecursive(G),
- "[ 0, end) block := { block [recover=1] }\n"
- "[ 0, 1) ├─{ := tok[0]\n"
- "[ 1, 5) ├─block := <ambiguous>\n"
- "[ 1, 5) │ ├─block := { block [recover=1] }\n"
- "[ 1, 2) │ │ ├─{ := tok[1]\n"
- "[ 2, 4) │ │ ├─block := <opaque>\n"
- "[ 4, 5) │ │ └─} := tok[4]\n"
- "[ 1, 5) │ └─block := { numbers [recover=1] }\n"
- "[ 1, 2) │ ├─{ := tok[1]\n"
- "[ 2, 4) │ ├─numbers := <opaque>\n"
- "[ 4, 5) │ └─} := tok[4]\n"
- "[ 5, end) └─} := tok[5]\n");
-}
-
TEST_F(GLRTest, NoExplicitAccept) {
build(R"bnf(
_ := test
More information about the cfe-commits
mailing list