[clang-tools-extra] 9ab67cc - [pseudo] Implement guard extension.
Haojian Wu via cfe-commits
cfe-commits at lists.llvm.org
Tue Jul 5 06:56:31 PDT 2022
Author: Haojian Wu
Date: 2022-07-05T15:55:15+02:00
New Revision: 9ab67cc8bfe7181b7d775bab4d7a908dc8774889
URL: https://github.com/llvm/llvm-project/commit/9ab67cc8bfe7181b7d775bab4d7a908dc8774889
DIFF: https://github.com/llvm/llvm-project/commit/9ab67cc8bfe7181b7d775bab4d7a908dc8774889.diff
LOG: [pseudo] Implement guard extension.
- Extend the GLR parser to allow conditional reduction based on the
guard functions;
- Implement two simple guards (contextual-override/final) for cxx.bnf;
- layering: clangPseudoCXX depends on clangPseudo (as the guard function need
to access the TokenStream);
Differential Revision: https://reviews.llvm.org/D127448
Added:
clang-tools-extra/pseudo/test/cxx/contextual-keywords.cpp
Modified:
clang-tools-extra/pseudo/benchmarks/Benchmark.cpp
clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
clang-tools-extra/pseudo/gen/Main.cpp
clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
clang-tools-extra/pseudo/include/clang-pseudo/Language.h
clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
clang-tools-extra/pseudo/lib/GLR.cpp
clang-tools-extra/pseudo/lib/cli/CLI.cpp
clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt
clang-tools-extra/pseudo/lib/cxx/CXX.cpp
clang-tools-extra/pseudo/lib/cxx/cxx.bnf
clang-tools-extra/pseudo/tool/ClangPseudo.cpp
clang-tools-extra/pseudo/unittests/GLRTest.cpp
Removed:
################################################################################
diff --git a/clang-tools-extra/pseudo/benchmarks/Benchmark.cpp b/clang-tools-extra/pseudo/benchmarks/Benchmark.cpp
index 02f4a40fae7c7..087ab6c250e39 100644
--- a/clang-tools-extra/pseudo/benchmarks/Benchmark.cpp
+++ b/clang-tools-extra/pseudo/benchmarks/Benchmark.cpp
@@ -122,8 +122,7 @@ static void glrParse(benchmark::State &State) {
for (auto _ : State) {
pseudo::ForestArena Forest;
pseudo::GSS GSS;
- pseudo::glrParse(Stream, ParseParams{Lang->G, Lang->Table, Forest, GSS},
- StartSymbol);
+ pseudo::glrParse(ParseParams{Stream, Forest, GSS}, StartSymbol, *Lang);
}
State.SetBytesProcessed(static_cast<uint64_t>(State.iterations()) *
SourceText->size());
@@ -136,9 +135,7 @@ static void full(benchmark::State &State) {
TokenStream Stream = lexAndPreprocess();
pseudo::ForestArena Forest;
pseudo::GSS GSS;
- pseudo::glrParse(lexAndPreprocess(),
- ParseParams{Lang->G, Lang->Table, Forest, GSS},
- StartSymbol);
+ pseudo::glrParse(ParseParams{Stream, Forest, GSS}, StartSymbol, *Lang);
}
State.SetBytesProcessed(static_cast<uint64_t>(State.iterations()) *
SourceText->size());
diff --git a/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp b/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
index ea1606b330be3..87b9d15480cc3 100644
--- a/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
+++ b/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
@@ -43,9 +43,8 @@ class Fuzzer {
clang::pseudo::GSS GSS;
const Language &Lang = getLanguageFromFlags();
auto &Root =
- glrParse(ParseableStream,
- clang::pseudo::ParseParams{Lang.G, Lang.Table, Arena, GSS},
- *Lang.G.findNonterminal("translation-unit"));
+ glrParse(clang::pseudo::ParseParams{ParseableStream, Arena, GSS},
+ *Lang.G.findNonterminal("translation-unit"), Lang);
if (Print)
llvm::outs() << Root.dumpRecursive(Lang.G);
}
diff --git a/clang-tools-extra/pseudo/gen/Main.cpp b/clang-tools-extra/pseudo/gen/Main.cpp
index 5964cc8143f05..b74f3d46c3560 100644
--- a/clang-tools-extra/pseudo/gen/Main.cpp
+++ b/clang-tools-extra/pseudo/gen/Main.cpp
@@ -79,6 +79,14 @@ int main(int argc, char *argv[]) {
switch (Emit) {
case EmitSymbolList:
+ Out.os() << R"cpp(
+#ifndef NONTERMINAL
+#define NONTERMINAL(X, Y)
+#endif
+#ifndef EXTENSION
+#define EXTENSION(X, Y)
+#endif
+ )cpp";
for (clang::pseudo::SymbolID ID = 0; ID < G.table().Nonterminals.size();
++ID) {
std::string Name = G.symbolName(ID).str();
@@ -86,6 +94,16 @@ int main(int argc, char *argv[]) {
std::replace(Name.begin(), Name.end(), '-', '_');
Out.os() << llvm::formatv("NONTERMINAL({0}, {1})\n", Name, ID);
}
+ for (clang::pseudo::ExtensionID EID = 1 /*skip the sentinel 0 value*/;
+ EID < G.table().AttributeValues.size(); ++EID) {
+ llvm::StringRef Name = G.table().AttributeValues[EID];
+ assert(!Name.empty());
+ Out.os() << llvm::formatv("EXTENSION({0}, {1})\n", Name, EID);
+ }
+ Out.os() << R"cpp(
+#undef NONTERMINAL
+#undef EXTENSION
+ )cpp";
break;
case EmitGrammarContent:
for (llvm::StringRef Line : llvm::split(GrammarText, '\n')) {
diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h b/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
index 51fb233bf9962..ebf54685b01b2 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
@@ -30,6 +30,7 @@
#define CLANG_PSEUDO_GLR_H
#include "clang-pseudo/Forest.h"
+#include "clang-pseudo/Language.h"
#include "clang-pseudo/grammar/Grammar.h"
#include "clang-pseudo/grammar/LRTable.h"
#include "llvm/Support/Allocator.h"
@@ -112,38 +113,35 @@ struct GSS {
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const GSS::Node &);
// Parameters for the GLR parsing.
-// FIXME: refine it with the ParseLang struct.
struct ParseParams {
- // The grammar of the language we're going to parse.
- const Grammar &G;
- // The LR table which GLR uses to parse the input, should correspond to the
- // Grammar G.
- const LRTable &Table;
+ // The token stream to parse.
+ const TokenStream &Code;
// Arena for data structure used by the GLR algorithm.
ForestArena &Forest; // Storage for the output forest.
GSS &GSStack; // Storage for parsing stacks.
};
+
// Parses the given token stream as the start symbol with the GLR algorithm,
// and returns a forest node of the start symbol.
//
// A rule `_ := StartSymbol` must exit for the chosen start symbol.
//
// If the parsing fails, we model it as an opaque node in the forest.
-const ForestNode &glrParse(const TokenStream &Code, const ParseParams &Params,
- SymbolID StartSymbol);
+const ForestNode &glrParse(const ParseParams &Params, SymbolID StartSymbol,
+ const Language &Lang);
// Shift a token onto all OldHeads, placing the results into NewHeads.
//
// Exposed for testing only.
void glrShift(llvm::ArrayRef<const GSS::Node *> OldHeads,
const ForestNode &NextTok, const ParseParams &Params,
- std::vector<const GSS::Node *> &NewHeads);
+ const Language &Lang, std::vector<const GSS::Node *> &NewHeads);
// Applies available reductions on Heads, appending resulting heads to the list.
//
// Exposed for testing only.
void glrReduce(std::vector<const GSS::Node *> &Heads, SymbolID Lookahead,
- const ParseParams &Params);
+ const ParseParams &Params, const Language &Lang);
} // namespace pseudo
} // namespace clang
diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/Language.h b/clang-tools-extra/pseudo/include/clang-pseudo/Language.h
index 60fb28a2b1664..8c2fdfaa2852c 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/Language.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/Language.h
@@ -14,12 +14,28 @@
namespace clang {
namespace pseudo {
+class ForestNode;
+class TokenStream;
+class LRTable;
+
+// A guard restricts when a grammar rule can be used.
+//
+// The GLR parser will use the guard to determine whether a rule reduction will
+// be conducted. For example, e.g. a guard may allow the rule
+// `virt-specifier := IDENTIFIER` only if the identifier's text is 'override`.
+//
+// Return true if the guard is satisfied.
+using RuleGuard = llvm::function_ref<bool(
+ llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &)>;
// Specify a language that can be parsed by the pseduoparser.
struct Language {
Grammar G;
LRTable Table;
+ // Binding "guard" extension id to a piece of C++ code.
+ llvm::DenseMap<ExtensionID, RuleGuard> Guards;
+
// FIXME: add clang::LangOptions.
// FIXME: add default start symbols.
};
diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h b/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
index 066cfbb818abf..d2509927337ed 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
@@ -37,6 +37,12 @@ enum class Symbol : SymbolID {
#undef NONTERMINAL
};
+enum class Extension : ExtensionID {
+#define EXTENSION(X, Y) X = Y,
+#include "CXXSymbols.inc"
+#undef EXTENSION
+};
+
// Returns the Language for the cxx.bnf grammar.
const Language &getLanguage();
diff --git a/clang-tools-extra/pseudo/lib/GLR.cpp b/clang-tools-extra/pseudo/lib/GLR.cpp
index 6373024b3db65..7ef09bf419e9e 100644
--- a/clang-tools-extra/pseudo/lib/GLR.cpp
+++ b/clang-tools-extra/pseudo/lib/GLR.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "clang-pseudo/GLR.h"
+#include "clang-pseudo/Language.h"
#include "clang-pseudo/grammar/Grammar.h"
#include "clang-pseudo/grammar/LRTable.h"
#include "clang/Basic/TokenKinds.h"
@@ -51,16 +52,16 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const GSS::Node &N) {
// └---3---┘
void glrShift(llvm::ArrayRef<const GSS::Node *> OldHeads,
const ForestNode &NewTok, const ParseParams &Params,
- std::vector<const GSS::Node *> &NewHeads) {
+ const Language &Lang, std::vector<const GSS::Node *> &NewHeads) {
assert(NewTok.kind() == ForestNode::Terminal);
LLVM_DEBUG(llvm::dbgs() << llvm::formatv(" Shift {0} ({1} active heads):\n",
- Params.G.symbolName(NewTok.symbol()),
+ Lang.G.symbolName(NewTok.symbol()),
OldHeads.size()));
// We group pending shifts by their target state so we can merge them.
llvm::SmallVector<std::pair<StateID, const GSS::Node *>, 8> Shifts;
for (const auto *H : OldHeads)
- if (auto S = Params.Table.getShiftState(H->State, NewTok.symbol()))
+ if (auto S = Lang.Table.getShiftState(H->State, NewTok.symbol()))
Shifts.push_back({*S, H});
llvm::stable_sort(Shifts, llvm::less_first{});
@@ -144,7 +145,7 @@ template <typename T> void sortAndUnique(std::vector<T> &Vec) {
// storage across calls).
class GLRReduce {
const ParseParams &Params;
-
+ const Language& Lang;
// There are two interacting complications:
// 1. Performing one reduce can unlock new reduces on the newly-created head.
// 2a. The ambiguous ForestNodes must be complete (have all sequence nodes).
@@ -230,7 +231,8 @@ class GLRReduce {
Sequence TempSequence;
public:
- GLRReduce(const ParseParams &Params) : Params(Params) {}
+ GLRReduce(const ParseParams &Params, const Language &Lang)
+ : Params(Params), Lang(Lang) {}
void operator()(std::vector<const GSS::Node *> &Heads, SymbolID Lookahead) {
assert(isToken(Lookahead));
@@ -249,10 +251,21 @@ class GLRReduce {
}
private:
+ bool canReduce(ExtensionID GuardID, RuleID RID,
+ llvm::ArrayRef<const ForestNode *> RHS) const {
+ if (!GuardID)
+ return true;
+ if (auto Guard = Lang.Guards.lookup(GuardID))
+ return Guard(RHS, Params.Code);
+ LLVM_DEBUG(llvm::dbgs()
+ << llvm::formatv("missing guard implementation for rule {0}\n",
+ Lang.G.dumpRule(RID)));
+ return true;
+ }
// pop walks up the parent chain(s) for a reduction from Head by to Rule.
// Once we reach the end, record the bases and sequences.
void pop(const GSS::Node *Head, RuleID RID, const Rule &Rule) {
- LLVM_DEBUG(llvm::dbgs() << " Pop " << Params.G.dumpRule(RID) << "\n");
+ LLVM_DEBUG(llvm::dbgs() << " Pop " << Lang.G.dumpRule(RID) << "\n");
Family F{/*Start=*/0, /*Symbol=*/Rule.Target, /*Rule=*/RID};
TempSequence.resize_for_overwrite(Rule.Size);
auto DFS = [&](const GSS::Node *N, unsigned I, auto &DFS) {
@@ -263,7 +276,8 @@ class GLRReduce {
for (const auto *B : N->parents())
llvm::dbgs() << " --> base at S" << B->State << "\n";
});
-
+ if (!canReduce(Rule.Guard, RID, TempSequence))
+ return;
// Copy the chain to stable storage so it can be enqueued.
if (SequenceStorageCount == SequenceStorage.size())
SequenceStorage.emplace_back();
@@ -286,9 +300,9 @@ class GLRReduce {
if (popAndPushTrivial())
continue;
for (RuleID RID :
- Params.Table.getReduceRules((*Heads)[NextPopHead]->State)) {
- const auto &Rule = Params.G.lookupRule(RID);
- if (Params.Table.canFollow(Rule.Target, Lookahead))
+ Lang.Table.getReduceRules((*Heads)[NextPopHead]->State)) {
+ const auto &Rule = Lang.G.lookupRule(RID);
+ if (Lang.Table.canFollow(Rule.Target, Lookahead))
pop((*Heads)[NextPopHead], RID, Rule);
}
}
@@ -306,7 +320,7 @@ class GLRReduce {
assert(!Sequences.empty());
Family F = Sequences.top().first;
- LLVM_DEBUG(llvm::dbgs() << " Push " << Params.G.symbolName(F.Symbol)
+ LLVM_DEBUG(llvm::dbgs() << " Push " << Lang.G.symbolName(F.Symbol)
<< " from token " << F.Start << "\n");
// Grab the sequences and bases for this family.
@@ -319,7 +333,7 @@ class GLRReduce {
const PushSpec &Push = Sequences.top().second;
FamilySequences.emplace_back(Sequences.top().first.Rule, *Push.Seq);
for (const GSS::Node *Base : Push.LastPop->parents()) {
- auto NextState = Params.Table.getGoToState(Base->State, F.Symbol);
+ auto NextState = Lang.Table.getGoToState(Base->State, F.Symbol);
assert(NextState.hasValue() && "goto must succeed after reduce!");
FamilyBases.emplace_back(*NextState, Base);
}
@@ -337,7 +351,7 @@ class GLRReduce {
SequenceNodes.size() == 1
? SequenceNodes.front()
: &Params.Forest.createAmbiguous(F.Symbol, SequenceNodes);
- LLVM_DEBUG(llvm::dbgs() << " --> " << Parsed->dump(Params.G) << "\n");
+ LLVM_DEBUG(llvm::dbgs() << " --> " << Parsed->dump(Lang.G) << "\n");
// Bases for this family, deduplicate them, and group by the goTo State.
sortAndUnique(FamilyBases);
@@ -375,15 +389,15 @@ class GLRReduce {
return false;
const GSS::Node *Head = Heads->back();
llvm::Optional<RuleID> RID;
- for (RuleID R : Params.Table.getReduceRules(Head->State)) {
+ for (RuleID R : Lang.Table.getReduceRules(Head->State)) {
if (RID.hasValue())
return false;
RID = R;
}
if (!RID)
return true; // no reductions available, but we've processed the head!
- const auto &Rule = Params.G.lookupRule(*RID);
- if (!Params.Table.canFollow(Rule.Target, Lookahead))
+ const auto &Rule = Lang.G.lookupRule(*RID);
+ if (!Lang.Table.canFollow(Rule.Target, Lookahead))
return true; // reduction is not available
const GSS::Node *Base = Head;
TempSequence.resize_for_overwrite(Rule.Size);
@@ -393,9 +407,11 @@ class GLRReduce {
TempSequence[Rule.Size - 1 - I] = Base->Payload;
Base = Base->parents().front();
}
+ if (!canReduce(Rule.Guard, *RID, TempSequence))
+ return true; // reduction is not available
const ForestNode *Parsed =
&Params.Forest.createSequence(Rule.Target, *RID, TempSequence);
- auto NextState = Params.Table.getGoToState(Base->State, Rule.Target);
+ auto NextState = Lang.Table.getGoToState(Base->State, Rule.Target);
assert(NextState.hasValue() && "goto must succeed after reduce!");
Heads->push_back(Params.GSStack.addNode(*NextState, Parsed, {Base}));
return true;
@@ -404,16 +420,14 @@ class GLRReduce {
} // namespace
-const ForestNode &glrParse(const TokenStream &Tokens, const ParseParams &Params,
- SymbolID StartSymbol) {
- GLRReduce Reduce(Params);
+const ForestNode &glrParse( const ParseParams &Params, SymbolID StartSymbol,
+ const Language& Lang) {
+ GLRReduce Reduce(Params, Lang);
assert(isNonterminal(StartSymbol) && "Start symbol must be a nonterminal");
- llvm::ArrayRef<ForestNode> Terminals = Params.Forest.createTerminals(Tokens);
- auto &G = Params.G;
- (void)G;
+ llvm::ArrayRef<ForestNode> Terminals = Params.Forest.createTerminals(Params.Code);
auto &GSS = Params.GSStack;
- StateID StartState = Params.Table.getStartState(StartSymbol);
+ StateID StartState = Lang.Table.getStartState(StartSymbol);
// Heads correspond to the parse of tokens [0, I), NextHeads to [0, I+1).
std::vector<const GSS::Node *> Heads = {GSS.addNode(/*State=*/StartState,
/*ForestNode=*/nullptr,
@@ -433,9 +447,9 @@ const ForestNode &glrParse(const TokenStream &Tokens, const ParseParams &Params,
for (unsigned I = 0; I < Terminals.size(); ++I) {
LLVM_DEBUG(llvm::dbgs() << llvm::formatv(
"Next token {0} (id={1})\n",
- G.symbolName(Terminals[I].symbol()), Terminals[I].symbol()));
+ Lang.G.symbolName(Terminals[I].symbol()), Terminals[I].symbol()));
// Consume the token.
- glrShift(Heads, Terminals[I], Params, NextHeads);
+ glrShift(Heads, Terminals[I], Params, Lang, NextHeads);
// Form nonterminals containing the token we just consumed.
SymbolID Lookahead = I + 1 == Terminals.size() ? tokenSymbol(tok::eof)
: Terminals[I + 1].symbol();
@@ -447,7 +461,7 @@ const ForestNode &glrParse(const TokenStream &Tokens, const ParseParams &Params,
}
LLVM_DEBUG(llvm::dbgs() << llvm::formatv("Reached eof\n"));
- auto AcceptState = Params.Table.getGoToState(StartState, StartSymbol);
+ auto AcceptState = Lang.Table.getGoToState(StartState, StartSymbol);
assert(AcceptState.hasValue() && "goto must succeed after start symbol!");
const ForestNode *Result = nullptr;
for (const auto *Head : Heads) {
@@ -468,9 +482,9 @@ const ForestNode &glrParse(const TokenStream &Tokens, const ParseParams &Params,
}
void glrReduce(std::vector<const GSS::Node *> &Heads, SymbolID Lookahead,
- const ParseParams &Params) {
+ const ParseParams &Params, const Language &Lang) {
// Create a new GLRReduce each time for tests, performance doesn't matter.
- GLRReduce{Params}(Heads, Lookahead);
+ GLRReduce{Params, Lang}(Heads, Lookahead);
}
const GSS::Node *GSS::addNode(LRTable::StateID State, const ForestNode *Symbol,
diff --git a/clang-tools-extra/pseudo/lib/cli/CLI.cpp b/clang-tools-extra/pseudo/lib/cli/CLI.cpp
index 267091448a9d3..cbc4b04545317 100644
--- a/clang-tools-extra/pseudo/lib/cli/CLI.cpp
+++ b/clang-tools-extra/pseudo/lib/cli/CLI.cpp
@@ -8,6 +8,7 @@
#include "clang-pseudo/cli/CLI.h"
#include "clang-pseudo/cxx/CXX.h"
+#include "clang-pseudo/grammar/Grammar.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -39,7 +40,8 @@ const Language &getLanguageFromFlags() {
for (const auto &Diag : Diags)
llvm::errs() << Diag << "\n";
auto Table = LRTable::buildSLR(G);
- return new Language{std::move(G), std::move(Table)};
+ return new Language{std::move(G), std::move(Table),
+ llvm::DenseMap<ExtensionID, RuleGuard>()};
}();
return *Lang;
}
diff --git a/clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt b/clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt
index 775b3b8c95384..a72c94739cf80 100644
--- a/clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt
+++ b/clang-tools-extra/pseudo/lib/cxx/CMakeLists.txt
@@ -9,5 +9,6 @@ add_clang_library(clangPseudoCXX
cxx_gen
LINK_LIBS
+ clangPseudo
clangPseudoGrammar
)
diff --git a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp
index 3c60b546cf62e..7fb00f03752fc 100644
--- a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp
+++ b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "clang-pseudo/cxx/CXX.h"
+#include "clang-pseudo/Forest.h"
#include "clang-pseudo/Language.h"
#include "clang-pseudo/grammar/Grammar.h"
#include "clang-pseudo/grammar/LRTable.h"
@@ -19,6 +20,26 @@ namespace {
static const char *CXXBNF =
#include "CXXBNF.inc"
;
+
+bool guardOverride(llvm::ArrayRef<const ForestNode *> RHS,
+ const TokenStream &Tokens) {
+ assert(RHS.size() == 1 &&
+ RHS.front()->symbol() == tokenSymbol(clang::tok::identifier));
+ return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "override";
+}
+bool guardFinal(llvm::ArrayRef<const ForestNode *> RHS,
+ const TokenStream &Tokens) {
+ assert(RHS.size() == 1 &&
+ RHS.front()->symbol() == tokenSymbol(clang::tok::identifier));
+ return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "final";
+}
+
+llvm::DenseMap<ExtensionID, RuleGuard> buildGuards() {
+ return llvm::DenseMap<ExtensionID, RuleGuard>(
+ {{(ExtensionID)Extension::Override, guardOverride},
+ {(ExtensionID)Extension::Final, guardFinal}});
+}
+
} // namespace
const Language &getLanguage() {
@@ -27,10 +48,8 @@ const Language &getLanguage() {
auto G = Grammar::parseBNF(CXXBNF, Diags);
assert(Diags.empty());
LRTable Table = LRTable::buildSLR(G);
- const Language *PL = new Language{
- std::move(G),
- std::move(Table),
- };
+ const Language *PL =
+ new Language{std::move(G), std::move(Table), buildGuards()};
return *PL;
}();
return CXXLanguage;
diff --git a/clang-tools-extra/pseudo/lib/cxx/cxx.bnf b/clang-tools-extra/pseudo/lib/cxx/cxx.bnf
index 03a7d00182e9a..3938fa9dcaf57 100644
--- a/clang-tools-extra/pseudo/lib/cxx/cxx.bnf
+++ b/clang-tools-extra/pseudo/lib/cxx/cxx.bnf
@@ -744,8 +744,8 @@ pointer-literal := NULLPTR
#! Contextual keywords -- clang lexer always lexes them as identifier tokens.
#! Placeholders for literal text in the grammar that lex as other things.
-contextual-override := IDENTIFIER
-contextual-final := IDENTIFIER
+contextual-override := IDENTIFIER [guard=Override]
+contextual-final := IDENTIFIER [guard=Final]
contextual-zero := NUMERIC_CONSTANT
module-keyword := IDENTIFIER
import-keyword := IDENTIFIER
diff --git a/clang-tools-extra/pseudo/test/cxx/contextual-keywords.cpp b/clang-tools-extra/pseudo/test/cxx/contextual-keywords.cpp
new file mode 100644
index 0000000000000..ae74353c0a156
--- /dev/null
+++ b/clang-tools-extra/pseudo/test/cxx/contextual-keywords.cpp
@@ -0,0 +1,9 @@
+// RUN: clang-pseudo -grammar=cxx -source=%s --print-forest | FileCheck %s
+// Verify that the contextual-{final,override} rules are guarded conditionally,
+// No ambiguous parsing for the virt-specifier.
+class Foo {
+ void foo1() override;
+// CHECK: virt-specifier-seq~IDENTIFIER := tok[7]
+ void foo2() final;
+// CHECK: virt-specifier-seq~IDENTIFIER := tok[13]
+};
diff --git a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp
index 295b7dc1d0087..b50be02731e0f 100644
--- a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp
+++ b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp
@@ -145,9 +145,8 @@ int main(int argc, char *argv[]) {
return 2;
}
auto &Root =
- glrParse(*ParseableStream,
- clang::pseudo::ParseParams{Lang.G, Lang.Table, Arena, GSS},
- *StartSymID);
+ glrParse(clang::pseudo::ParseParams{*ParseableStream, Arena, GSS},
+ *StartSymID, Lang);
if (PrintForest)
llvm::outs() << Root.dumpRecursive(Lang.G, /*Abbreviated=*/true);
diff --git a/clang-tools-extra/pseudo/unittests/GLRTest.cpp b/clang-tools-extra/pseudo/unittests/GLRTest.cpp
index 8dc0939b9bd9e..15ed5bb24274a 100644
--- a/clang-tools-extra/pseudo/unittests/GLRTest.cpp
+++ b/clang-tools-extra/pseudo/unittests/GLRTest.cpp
@@ -81,6 +81,14 @@ class GLRTest : public ::testing::Test {
ADD_FAILURE() << "No such symbol found: " << Name;
return 0;
}
+ ExtensionID extensionID(llvm::StringRef AttrValueName) const {
+ for (ExtensionID EID = 0; EID < TestLang.G.table().AttributeValues.size();
+ ++EID)
+ if (TestLang.G.table().AttributeValues[EID] == AttrValueName)
+ return EID;
+ ADD_FAILURE() << "No such attribute value found: " << AttrValueName;
+ return 0;
+ }
RuleID ruleFor(llvm::StringRef NonterminalName) const {
auto RuleRange =
@@ -131,7 +139,7 @@ TEST_F(GLRTest, ShiftMergingHeads) {
ForestNode &SemiTerminal = Arena.createTerminal(tok::semi, 0);
std::vector<const GSS::Node *> NewHeads;
glrShift({GSSNode1, GSSNode2, GSSNode3}, SemiTerminal,
- {TestLang.G, TestLang.Table, Arena, GSStack}, NewHeads);
+ {emptyTokenStream(), Arena, GSStack}, TestLang, NewHeads);
EXPECT_THAT(NewHeads,
UnorderedElementsAre(AllOf(state(4), parsedSymbol(&SemiTerminal),
@@ -164,7 +172,7 @@ TEST_F(GLRTest, ReduceConflictsSplitting) {
std::vector<const GSS::Node *> Heads = {GSSNode1};
glrReduce(Heads, tokenSymbol(tok::eof),
- {TestLang.G, TestLang.Table, Arena, GSStack});
+ {emptyTokenStream(), Arena, GSStack}, TestLang);
EXPECT_THAT(Heads, UnorderedElementsAre(
GSSNode1,
AllOf(state(2), parsedSymbolID(id("class-name")),
@@ -202,7 +210,8 @@ TEST_F(GLRTest, ReduceSplittingDueToMultipleBases) {
TestLang.Table = std::move(B).build();
std::vector<const GSS::Node *> Heads = {GSSNode4};
- glrReduce(Heads, tokenSymbol(tok::eof), {TestLang.G, TestLang.Table, Arena, GSStack});
+ glrReduce(Heads, tokenSymbol(tok::eof), {emptyTokenStream(), Arena, GSStack},
+ TestLang);
EXPECT_THAT(Heads, UnorderedElementsAre(
GSSNode4,
@@ -254,7 +263,8 @@ TEST_F(GLRTest, ReduceJoiningWithMultipleBases) {
TestLang.Table = std::move(B).build();
std::vector<const GSS::Node *> Heads = {GSSNode3, GSSNode4};
- glrReduce(Heads, tokenSymbol(tok::eof), {TestLang.G, TestLang.Table, Arena, GSStack});
+ glrReduce(Heads, tokenSymbol(tok::eof), {emptyTokenStream(), Arena, GSStack},
+ TestLang);
// Verify that the stack heads are joint at state 5 after reduces.
EXPECT_THAT(Heads, UnorderedElementsAre(GSSNode3, GSSNode4,
@@ -309,7 +319,7 @@ TEST_F(GLRTest, ReduceJoiningWithSameBase) {
std::vector<const GSS::Node *> Heads = {GSSNode3, GSSNode4};
glrReduce(Heads, tokenSymbol(tok::eof),
- {TestLang.G, TestLang.Table, Arena, GSStack});
+ {emptyTokenStream(), Arena, GSStack}, TestLang);
EXPECT_THAT(
Heads, UnorderedElementsAre(GSSNode3, GSSNode4,
@@ -343,14 +353,16 @@ TEST_F(GLRTest, ReduceLookahead) {
// When the lookahead is +, reduce is performed.
std::vector<const GSS::Node *> Heads = {GSSNode1};
- glrReduce(Heads, tokenSymbol(tok::plus), {TestLang.G, TestLang.Table, Arena, GSStack});
+ glrReduce(Heads, tokenSymbol(tok::plus), {emptyTokenStream(), Arena, GSStack},
+ TestLang);
EXPECT_THAT(Heads,
ElementsAre(GSSNode1, AllOf(state(2), parsedSymbolID(id("term")),
parents(Root))));
// When the lookahead is -, reduce is not performed.
Heads = {GSSNode1};
- glrReduce(Heads, tokenSymbol(tok::minus), {TestLang.G, TestLang.Table, Arena, GSStack});
+ glrReduce(Heads, tokenSymbol(tok::minus),
+ {emptyTokenStream(), Arena, GSStack}, TestLang);
EXPECT_THAT(Heads, ElementsAre(GSSNode1));
}
@@ -376,7 +388,7 @@ TEST_F(GLRTest, PerfectForestNodeSharing) {
const TokenStream &Tokens = cook(lex("{ abc", LOptions), LOptions);
const ForestNode &Parsed =
- glrParse(Tokens, {TestLang.G, TestLang.Table, Arena, GSStack}, id("test"));
+ glrParse({Tokens, Arena, GSStack}, id("test"), TestLang);
// Verify that there is no duplicated sequence node of `expr := IDENTIFIER`
// in the forest, see the `#1` and `=#1` in the dump string.
EXPECT_EQ(Parsed.dumpRecursive(TestLang.G),
@@ -413,7 +425,7 @@ TEST_F(GLRTest, GLRReduceOrder) {
TestLang.Table = LRTable::buildSLR(TestLang.G);
const ForestNode &Parsed =
- glrParse(Tokens, {TestLang.G, TestLang.Table, Arena, GSStack}, id("test"));
+ glrParse({Tokens, Arena, GSStack}, id("test"), TestLang);
EXPECT_EQ(Parsed.dumpRecursive(TestLang.G),
"[ 0, end) test := <ambiguous>\n"
"[ 0, end) ├─test := IDENTIFIER\n"
@@ -438,7 +450,7 @@ TEST_F(GLRTest, NoExplicitAccept) {
TestLang.Table = LRTable::buildSLR(TestLang.G);
const ForestNode &Parsed =
- glrParse(Tokens, {TestLang.G, TestLang.Table, Arena, GSStack}, id("test"));
+ glrParse({Tokens, Arena, GSStack}, id("test"), TestLang);
EXPECT_EQ(Parsed.dumpRecursive(TestLang.G),
"[ 0, end) test := IDENTIFIER test\n"
"[ 0, 1) ├─IDENTIFIER := tok[0]\n"
@@ -446,6 +458,36 @@ TEST_F(GLRTest, NoExplicitAccept) {
"[ 1, end) └─IDENTIFIER := tok[1]\n");
}
+TEST_F(GLRTest, GuardExtension) {
+ build(R"bnf(
+ _ := start
+
+ start := IDENTIFIER [guard=TestOnly]
+ )bnf");
+ TestLang.Guards.try_emplace(
+ extensionID("TestOnly"),
+ [&](llvm::ArrayRef<const ForestNode *> RHS, const TokenStream &Tokens) {
+ assert(RHS.size() == 1 &&
+ RHS.front()->symbol() == tokenSymbol(clang::tok::identifier));
+ return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "test";
+ });
+ clang::LangOptions LOptions;
+ TestLang.Table = LRTable::buildSLR(TestLang.G);
+
+ std::string Input = "test";
+ const TokenStream &Succeeded = cook(lex(Input, LOptions), LOptions);
+ EXPECT_EQ(glrParse({Succeeded, Arena, GSStack}, id("start"), TestLang)
+ .dumpRecursive(TestLang.G),
+ "[ 0, end) start := IDENTIFIER [guard=TestOnly]\n"
+ "[ 0, end) └─IDENTIFIER := tok[0]\n");
+
+ Input = "notest";
+ const TokenStream &Failed = cook(lex(Input, LOptions), LOptions);
+ EXPECT_EQ(glrParse({Failed, Arena, GSStack}, id("start"), TestLang)
+ .dumpRecursive(TestLang.G),
+ "[ 0, end) start := <opaque>\n");
+}
+
TEST(GSSTest, GC) {
// ┌-A-┬-AB
// ├-B-┘
More information about the cfe-commits
mailing list