[clang-tools-extra] fe66aeb - [pseudo] Define a clangPseudoCLI library.
Haojian Wu via cfe-commits
cfe-commits at lists.llvm.org
Thu Jun 30 23:31:44 PDT 2022
Author: Haojian Wu
Date: 2022-07-01T08:31:34+02:00
New Revision: fe66aebd755191fac66a73b50d94c29f60be9a88
URL: https://github.com/llvm/llvm-project/commit/fe66aebd755191fac66a73b50d94c29f60be9a88
DIFF: https://github.com/llvm/llvm-project/commit/fe66aebd755191fac66a73b50d94c29f60be9a88.diff
LOG: [pseudo] Define a clangPseudoCLI library.
- define a common data structure Language which is a compiled result of the
bnf grammar. It is defined in Language.h;
- creates a clangPseudoCLI lib which defines a grammar commandline flag and
expose a function to get the Language. It supports --grammar=cxx,
--grammmar=/path/to/file.bnf;
- use the clangPseudoCLI in clang-pseudo, fuzzer, and benchmark tools (
simplify the code and use the prebuilt cxx grammar);
Split out from https://reviews.llvm.org/D127448.
Differential Revision: https://reviews.llvm.org/D128679
Added:
clang-tools-extra/pseudo/include/clang-pseudo/Language.h
clang-tools-extra/pseudo/include/clang-pseudo/ParseLang.h
clang-tools-extra/pseudo/include/clang-pseudo/cli/CLI.h
clang-tools-extra/pseudo/lib/cli/CLI.cpp
clang-tools-extra/pseudo/lib/cli/CMakeLists.txt
Modified:
clang-tools-extra/pseudo/benchmarks/Benchmark.cpp
clang-tools-extra/pseudo/benchmarks/CMakeLists.txt
clang-tools-extra/pseudo/fuzzer/CMakeLists.txt
clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
clang-tools-extra/pseudo/lib/CMakeLists.txt
clang-tools-extra/pseudo/lib/cxx/CXX.cpp
clang-tools-extra/pseudo/tool/CMakeLists.txt
clang-tools-extra/pseudo/tool/ClangPseudo.cpp
clang-tools-extra/pseudo/unittests/GLRTest.cpp
Removed:
################################################################################
diff --git a/clang-tools-extra/pseudo/benchmarks/Benchmark.cpp b/clang-tools-extra/pseudo/benchmarks/Benchmark.cpp
index a06fde7d7149c..5bf6ff2c08f32 100644
--- a/clang-tools-extra/pseudo/benchmarks/Benchmark.cpp
+++ b/clang-tools-extra/pseudo/benchmarks/Benchmark.cpp
@@ -25,6 +25,7 @@
#include "clang-pseudo/Forest.h"
#include "clang-pseudo/GLR.h"
#include "clang-pseudo/Token.h"
+#include "clang-pseudo/cli/CLI.h"
#include "clang-pseudo/grammar/Grammar.h"
#include "clang-pseudo/grammar/LRTable.h"
#include "clang/Basic/LangOptions.h"
@@ -39,9 +40,6 @@ using llvm::cl::desc;
using llvm::cl::opt;
using llvm::cl::Required;
-static opt<std::string> GrammarFile("grammar",
- desc("Parse and check a BNF grammar file."),
- Required);
static opt<std::string> Source("source", desc("Source file"), Required);
namespace clang {
@@ -49,11 +47,10 @@ namespace pseudo {
namespace bench {
namespace {
-const std::string *GrammarText = nullptr;
const std::string *SourceText = nullptr;
-const Grammar *G = nullptr;
+const Language *Lang = nullptr;
-void setupGrammarAndSource() {
+void setup() {
auto ReadFile = [](llvm::StringRef FilePath) -> std::string {
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GrammarText =
llvm::MemoryBuffer::getFile(FilePath);
@@ -64,22 +61,13 @@ void setupGrammarAndSource() {
}
return GrammarText.get()->getBuffer().str();
};
- GrammarText = new std::string(ReadFile(GrammarFile));
SourceText = new std::string(ReadFile(Source));
- std::vector<std::string> Diags;
- G = new Grammar(Grammar::parseBNF(*GrammarText, Diags));
+ Lang = &getLanguageFromFlags();
}
-static void parseBNF(benchmark::State &State) {
- std::vector<std::string> Diags;
- for (auto _ : State)
- Grammar::parseBNF(*GrammarText, Diags);
-}
-BENCHMARK(parseBNF);
-
static void buildSLR(benchmark::State &State) {
for (auto _ : State)
- LRTable::buildSLR(*G);
+ LRTable::buildSLR(Lang->G);
}
BENCHMARK(buildSLR);
@@ -129,13 +117,13 @@ static void preprocess(benchmark::State &State) {
BENCHMARK(preprocess);
static void glrParse(benchmark::State &State) {
- LRTable Table = clang::pseudo::LRTable::buildSLR(*G);
- SymbolID StartSymbol = *G->findNonterminal("translation-unit");
+ SymbolID StartSymbol = *Lang->G->findNonterminal("translation-unit");
TokenStream Stream = lexAndPreprocess();
for (auto _ : State) {
pseudo::ForestArena Forest;
pseudo::GSS GSS;
- pseudo::glrParse(Stream, ParseParams{*G, Table, Forest, GSS}, StartSymbol);
+ pseudo::glrParse(Stream, ParseParams{*Lang->G, Lang->Table, Forest, GSS},
+ StartSymbol);
}
State.SetBytesProcessed(static_cast<uint64_t>(State.iterations()) *
SourceText->size());
@@ -143,13 +131,13 @@ static void glrParse(benchmark::State &State) {
BENCHMARK(glrParse);
static void full(benchmark::State &State) {
- LRTable Table = clang::pseudo::LRTable::buildSLR(*G);
- SymbolID StartSymbol = *G->findNonterminal("translation-unit");
+ SymbolID StartSymbol = *Lang->G.findNonterminal("translation-unit");
for (auto _ : State) {
TokenStream Stream = lexAndPreprocess();
pseudo::ForestArena Forest;
pseudo::GSS GSS;
- pseudo::glrParse(lexAndPreprocess(), ParseParams{*G, Table, Forest, GSS},
+ pseudo::glrParse(lexAndPreprocess(),
+ ParseParams{Lang->G, Lang->Table, Forest, GSS},
StartSymbol);
}
State.SetBytesProcessed(static_cast<uint64_t>(State.iterations()) *
@@ -165,7 +153,7 @@ BENCHMARK(full);
int main(int argc, char *argv[]) {
benchmark::Initialize(&argc, argv);
llvm::cl::ParseCommandLineOptions(argc, argv);
- clang::pseudo::bench::setupGrammarAndSource();
+ clang::pseudo::bench::setup();
benchmark::RunSpecifiedBenchmarks();
return 0;
}
diff --git a/clang-tools-extra/pseudo/benchmarks/CMakeLists.txt b/clang-tools-extra/pseudo/benchmarks/CMakeLists.txt
index b088f8f7c68b8..859db991403cd 100644
--- a/clang-tools-extra/pseudo/benchmarks/CMakeLists.txt
+++ b/clang-tools-extra/pseudo/benchmarks/CMakeLists.txt
@@ -3,6 +3,7 @@ add_benchmark(ClangPseudoBenchmark Benchmark.cpp)
target_link_libraries(ClangPseudoBenchmark
PRIVATE
clangPseudo
+ clangPseudoCLI
clangPseudoGrammar
LLVMSupport
)
diff --git a/clang-tools-extra/pseudo/fuzzer/CMakeLists.txt b/clang-tools-extra/pseudo/fuzzer/CMakeLists.txt
index e5061cee3bf9d..e1d79873471f0 100644
--- a/clang-tools-extra/pseudo/fuzzer/CMakeLists.txt
+++ b/clang-tools-extra/pseudo/fuzzer/CMakeLists.txt
@@ -11,5 +11,6 @@ add_llvm_fuzzer(clang-pseudo-fuzzer
target_link_libraries(clang-pseudo-fuzzer
PRIVATE
clangPseudo
+ clangPseudoCLI
clangPseudoGrammar
)
diff --git a/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp b/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
index d56ccd018df0d..ea1606b330be3 100644
--- a/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
+++ b/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
@@ -10,6 +10,7 @@
#include "clang-pseudo/Forest.h"
#include "clang-pseudo/GLR.h"
#include "clang-pseudo/Token.h"
+#include "clang-pseudo/cli/CLI.h"
#include "clang-pseudo/grammar/Grammar.h"
#include "clang-pseudo/grammar/LRTable.h"
#include "clang/Basic/LangOptions.h"
@@ -24,28 +25,10 @@ namespace {
class Fuzzer {
clang::LangOptions LangOpts = clang::pseudo::genericLangOpts();
- Grammar G;
- LRTable T;
bool Print;
public:
- Fuzzer(llvm::StringRef GrammarPath, bool Print) : Print(Print) {
- llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GrammarText =
- llvm::MemoryBuffer::getFile(GrammarPath);
- if (std::error_code EC = GrammarText.getError()) {
- llvm::errs() << "Error: can't read grammar file '" << GrammarPath
- << "': " << EC.message() << "\n";
- std::exit(1);
- }
- std::vector<std::string> Diags;
- G = Grammar::parseBNF(GrammarText->get()->getBuffer(), Diags);
- if (!Diags.empty()) {
- for (const auto &Diag : Diags)
- llvm::errs() << Diag << "\n";
- std::exit(1);
- }
- T = LRTable::buildSLR(G);
- }
+ Fuzzer(bool Print) : Print(Print) {}
void operator()(llvm::StringRef Code) {
std::string CodeStr = Code.str(); // Must be null-terminated.
@@ -58,11 +41,13 @@ class Fuzzer {
clang::pseudo::ForestArena Arena;
clang::pseudo::GSS GSS;
+ const Language &Lang = getLanguageFromFlags();
auto &Root =
- glrParse(ParseableStream, clang::pseudo::ParseParams{G, T, Arena, GSS},
- *G.findNonterminal("translation-unit"));
+ glrParse(ParseableStream,
+ clang::pseudo::ParseParams{Lang.G, Lang.Table, Arena, GSS},
+ *Lang.G.findNonterminal("translation-unit"));
if (Print)
- llvm::outs() << Root.dumpRecursive(G);
+ llvm::outs() << Root.dumpRecursive(Lang.G);
}
};
@@ -75,16 +60,11 @@ Fuzzer *Fuzz = nullptr;
extern "C" {
// Set up the fuzzer from command line flags:
-// -grammar=<file> (required) - path to cxx.bnf
// -print - used for testing the fuzzer
int LLVMFuzzerInitialize(int *Argc, char ***Argv) {
- llvm::StringRef GrammarFile;
bool PrintForest = false;
auto ConsumeArg = [&](llvm::StringRef Arg) -> bool {
- if (Arg.consume_front("-grammar=")) {
- GrammarFile = Arg;
- return true;
- } else if (Arg == "-print") {
+ if (Arg == "-print") {
PrintForest = true;
return true;
}
@@ -92,11 +72,7 @@ int LLVMFuzzerInitialize(int *Argc, char ***Argv) {
};
*Argc = std::remove_if(*Argv + 1, *Argv + *Argc, ConsumeArg) - *Argv;
- if (GrammarFile.empty()) {
- fprintf(stderr, "Fuzzer needs -grammar=/path/to/cxx.bnf\n");
- exit(1);
- }
- clang::pseudo::Fuzz = new clang::pseudo::Fuzzer(GrammarFile, PrintForest);
+ clang::pseudo::Fuzz = new clang::pseudo::Fuzzer(PrintForest);
return 0;
}
diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h b/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
index a3e8611de4252..51fb233bf9962 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
@@ -112,6 +112,7 @@ struct GSS {
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const GSS::Node &);
// Parameters for the GLR parsing.
+// FIXME: refine it with the ParseLang struct.
struct ParseParams {
// The grammar of the language we're going to parse.
const Grammar &G;
diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/Language.h b/clang-tools-extra/pseudo/include/clang-pseudo/Language.h
new file mode 100644
index 0000000000000..60fb28a2b1664
--- /dev/null
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/Language.h
@@ -0,0 +1,30 @@
+//===--- Language.h -------------------------------------------- -*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_PSEUDO_GRAMMAR_LANGUAGE_H
+#define CLANG_PSEUDO_GRAMMAR_LANGUAGE_H
+
+#include "clang-pseudo/grammar/Grammar.h"
+#include "clang-pseudo/grammar/LRTable.h"
+
+namespace clang {
+namespace pseudo {
+
+// Specify a language that can be parsed by the pseduoparser.
+struct Language {
+ Grammar G;
+ LRTable Table;
+
+ // FIXME: add clang::LangOptions.
+ // FIXME: add default start symbols.
+};
+
+} // namespace pseudo
+} // namespace clang
+
+#endif // CLANG_PSEUDO_GRAMMAR_LANGUAGE_H
diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/ParseLang.h b/clang-tools-extra/pseudo/include/clang-pseudo/ParseLang.h
new file mode 100644
index 0000000000000..fdbc83c26ebea
--- /dev/null
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/ParseLang.h
@@ -0,0 +1,31 @@
+//===--- ParseLang.h ------------------------------------------- -*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_PSEUDO_PARSELANG_H
+#define CLANG_PSEUDO_PARSELANG_H
+
+#include "clang-pseudo/grammar/Grammar.h"
+#include "clang-pseudo/grammar/LRTable.h"
+
+namespace clang {
+namespace pseudo {
+
+// Specify a language that can be parsed by the pseduoparser.
+// Manifest generated from a bnf grammar file.
+struct ParseLang {
+ Grammar G;
+ LRTable Table;
+
+ // FIXME: add clang::LangOptions.
+ // FIXME: add default start symbols.
+};
+
+} // namespace pseudo
+} // namespace clang
+
+#endif // CLANG_PSEUDO_PARSELANG_H
diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/cli/CLI.h b/clang-tools-extra/pseudo/include/clang-pseudo/cli/CLI.h
new file mode 100644
index 0000000000000..db09aba21502f
--- /dev/null
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/cli/CLI.h
@@ -0,0 +1,35 @@
+//===--- CLI.h - Get grammar from variant sources ----------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Provides the Grammar, LRTable etc for a language specified by the `--grammar`
+// flags. It is by design to be used by pseudoparser-based CLI tools.
+//
+// The CLI library defines a `--grammar` CLI flag, which supports 1) using a
+// grammar from a file (--grammar=/path/to/lang.bnf) or using the prebuilt cxx
+// language (--grammar=cxx).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_PSEUDO_CLI_CLI_H
+#define CLANG_PSEUDO_CLI_CLI_H
+
+#include "clang-pseudo/Language.h"
+
+namespace clang {
+namespace pseudo {
+
+// Returns the corresponding Language from the '--grammar' command-line flag.
+//
+// !! If the grammar flag is invalid (e.g. unexisting file), this function will
+// exit the program immediately.
+const Language &getLanguageFromFlags();
+
+} // namespace pseudo
+} // namespace clang
+
+#endif // CLANG_PSEUDO_CLI_CLI_H
diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h b/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
index 707297d3fb32d..066cfbb818abf 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
@@ -23,12 +23,11 @@
#ifndef CLANG_PSEUDO_CXX_CXX_H
#define CLANG_PSEUDO_CXX_CXX_H
+#include "clang-pseudo/Language.h"
#include "clang-pseudo/grammar/Grammar.h"
namespace clang {
namespace pseudo {
-class LRTable;
-
namespace cxx {
// Symbol represents nonterminal symbols in the C++ grammar.
// It provides a simple uniform way to access a particular nonterminal.
@@ -38,10 +37,8 @@ enum class Symbol : SymbolID {
#undef NONTERMINAL
};
-// Returns the C++ grammar.
-const Grammar &getGrammar();
-// Returns the corresponding LRTable for the C++ grammar.
-const LRTable &getLRTable();
+// Returns the Language for the cxx.bnf grammar.
+const Language &getLanguage();
} // namespace cxx
diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
index 382da41397f0e..f60bf487eeaf9 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
@@ -57,6 +57,7 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/raw_ostream.h"
#include <cstdint>
#include <vector>
diff --git a/clang-tools-extra/pseudo/lib/CMakeLists.txt b/clang-tools-extra/pseudo/lib/CMakeLists.txt
index f312b10f7d8b4..efcf9267e7173 100644
--- a/clang-tools-extra/pseudo/lib/CMakeLists.txt
+++ b/clang-tools-extra/pseudo/lib/CMakeLists.txt
@@ -1,3 +1,4 @@
+add_subdirectory(cli)
add_subdirectory(cxx)
add_subdirectory(grammar)
diff --git a/clang-tools-extra/pseudo/lib/cli/CLI.cpp b/clang-tools-extra/pseudo/lib/cli/CLI.cpp
new file mode 100644
index 0000000000000..267091448a9d3
--- /dev/null
+++ b/clang-tools-extra/pseudo/lib/cli/CLI.cpp
@@ -0,0 +1,48 @@
+//===--- CLI.cpp - ----------------------------------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang-pseudo/cli/CLI.h"
+#include "clang-pseudo/cxx/CXX.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+static llvm::cl::opt<std::string> Grammar(
+ "grammar",
+ llvm::cl::desc(
+ "Specify a BNF grammar file path, or a builtin language (cxx)."),
+ llvm::cl::init("cxx"));
+
+namespace clang {
+namespace pseudo {
+
+const Language &getLanguageFromFlags() {
+ if (::Grammar == "cxx")
+ return cxx::getLanguage();
+
+ static Language *Lang = []() {
+ // Read from a bnf grammar file.
+ llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GrammarText =
+ llvm::MemoryBuffer::getFile(::Grammar);
+ if (std::error_code EC = GrammarText.getError()) {
+ llvm::errs() << "Error: can't read grammar file '" << ::Grammar
+ << "': " << EC.message() << "\n";
+ std::exit(1);
+ }
+ std::vector<std::string> Diags;
+ auto G = Grammar::parseBNF(GrammarText->get()->getBuffer(), Diags);
+ for (const auto &Diag : Diags)
+ llvm::errs() << Diag << "\n";
+ auto Table = LRTable::buildSLR(G);
+ return new Language{std::move(G), std::move(Table)};
+ }();
+ return *Lang;
+}
+
+} // namespace pseudo
+} // namespace clang
diff --git a/clang-tools-extra/pseudo/lib/cli/CMakeLists.txt b/clang-tools-extra/pseudo/lib/cli/CMakeLists.txt
new file mode 100644
index 0000000000000..25b3d9e1425c0
--- /dev/null
+++ b/clang-tools-extra/pseudo/lib/cli/CMakeLists.txt
@@ -0,0 +1,11 @@
+set(LLVM_LINK_COMPONENTS
+ Support
+ )
+
+add_clang_library(clangPseudoCLI
+ CLI.cpp
+
+ LINK_LIBS
+ clangPseudoGrammar
+ clangPseudoCXX
+ )
diff --git a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp
index 88c2e0fe5f2ee..3c60b546cf62e 100644
--- a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp
+++ b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp
@@ -7,26 +7,33 @@
//===----------------------------------------------------------------------===//
#include "clang-pseudo/cxx/CXX.h"
+#include "clang-pseudo/Language.h"
+#include "clang-pseudo/grammar/Grammar.h"
#include "clang-pseudo/grammar/LRTable.h"
+#include <utility>
namespace clang {
namespace pseudo {
namespace cxx {
-
+namespace {
static const char *CXXBNF =
#include "CXXBNF.inc"
;
+} // namespace
-const Grammar &getGrammar() {
- static std::vector<std::string> Diags;
- static Grammar *G = new Grammar(Grammar::parseBNF(CXXBNF, Diags));
- assert(Diags.empty());
- return *G;
-}
-
-const LRTable &getLRTable() {
- static LRTable *Table = new LRTable(LRTable::buildSLR(getGrammar()));
- return *Table;
+const Language &getLanguage() {
+ static const auto &CXXLanguage = []() -> const Language & {
+ std::vector<std::string> Diags;
+ auto G = Grammar::parseBNF(CXXBNF, Diags);
+ assert(Diags.empty());
+ LRTable Table = LRTable::buildSLR(G);
+ const Language *PL = new Language{
+ std::move(G),
+ std::move(Table),
+ };
+ return *PL;
+ }();
+ return CXXLanguage;
}
} // namespace cxx
diff --git a/clang-tools-extra/pseudo/tool/CMakeLists.txt b/clang-tools-extra/pseudo/tool/CMakeLists.txt
index 8b47d54e766c2..7c0cd56dcf233 100644
--- a/clang-tools-extra/pseudo/tool/CMakeLists.txt
+++ b/clang-tools-extra/pseudo/tool/CMakeLists.txt
@@ -13,5 +13,6 @@ target_link_libraries(clang-pseudo
PRIVATE
clangPseudo
clangPseudoGrammar
+ clangPseudoCLI
)
diff --git a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp
index ff9b893ed3f5a..521af05d5babf 100644
--- a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp
+++ b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp
@@ -9,7 +9,9 @@
#include "clang-pseudo/Bracket.h"
#include "clang-pseudo/DirectiveTree.h"
#include "clang-pseudo/GLR.h"
+#include "clang-pseudo/Language.h"
#include "clang-pseudo/Token.h"
+#include "clang-pseudo/cli/CLI.h"
#include "clang-pseudo/grammar/Grammar.h"
#include "clang-pseudo/grammar/LRGraph.h"
#include "clang-pseudo/grammar/LRTable.h"
@@ -22,14 +24,11 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Signals.h"
-using clang::pseudo::Grammar;
using clang::pseudo::TokenStream;
using llvm::cl::desc;
using llvm::cl::init;
using llvm::cl::opt;
-static opt<std::string>
- Grammar("grammar", desc("Parse and check a BNF grammar file."), init(""));
static opt<bool> PrintGrammar("print-grammar", desc("Print the grammar."));
static opt<bool> PrintGraph("print-graph",
desc("Print the LR graph for the grammar"));
@@ -123,42 +122,51 @@ int main(int argc, char *argv[]) {
pairBrackets(*ParseableStream);
}
- if (Grammar.getNumOccurrences()) {
- std::string Text = readOrDie(Grammar);
- std::vector<std::string> Diags;
- auto G = Grammar::parseBNF(Text, Diags);
-
- if (!Diags.empty()) {
- llvm::errs() << llvm::join(Diags, "\n");
+ const auto &Lang = clang::pseudo::getLanguageFromFlags();
+ if (PrintGrammar)
+ llvm::outs() << Lang.G.dump();
+ if (PrintGraph)
+ llvm::outs() << clang::pseudo::LRGraph::buildLR0(Lang.G).dumpForTests(
+ Lang.G);
+
+ if (PrintTable)
+ llvm::outs() << Lang.Table.dumpForTests(Lang.G);
+ if (PrintStatistics)
+ llvm::outs() << Lang.Table.dumpStatistics();
+
+ if (ParseableStream) {
+ clang::pseudo::ForestArena Arena;
+ clang::pseudo::GSS GSS;
+ llvm::Optional<clang::pseudo::SymbolID> StartSymID =
+ Lang.G.findNonterminal(StartSymbol);
+ if (!StartSymID) {
+ llvm::errs() << llvm::formatv(
+ "The start symbol {0} doesn't exit in the grammar!\n", StartSymbol);
return 2;
}
- llvm::outs() << llvm::formatv("grammar file {0} is parsed successfully\n",
- Grammar);
- if (PrintGrammar)
- llvm::outs() << G.dump();
- if (PrintGraph)
- llvm::outs() << clang::pseudo::LRGraph::buildLR0(G).dumpForTests(G);
- auto LRTable = clang::pseudo::LRTable::buildSLR(G);
- if (PrintTable)
- llvm::outs() << LRTable.dumpForTests(G);
- if (PrintStatistics)
- llvm::outs() << LRTable.dumpStatistics();
+ auto &Root =
+ glrParse(*ParseableStream,
+ clang::pseudo::ParseParams{Lang.G, Lang.Table, Arena, GSS},
+ *StartSymID);
+ if (PrintForest)
+ llvm::outs() << Root.dumpRecursive(Lang.G, /*Abbreviated=*/true);
if (ParseableStream) {
clang::pseudo::ForestArena Arena;
clang::pseudo::GSS GSS;
llvm::Optional<clang::pseudo::SymbolID> StartSymID =
- G.findNonterminal(StartSymbol);
+ Lang.G.findNonterminal(StartSymbol);
if (!StartSymID) {
llvm::errs() << llvm::formatv(
- "The start symbol {0} doesn't exit in the grammar!\n", Grammar);
+ "The start symbol {0} doesn't exit in the grammar!\n", StartSymbol);
return 2;
}
- auto &Root = glrParse(*ParseableStream,
- clang::pseudo::ParseParams{G, LRTable, Arena, GSS},
- *StartSymID);
+ auto &Root =
+ glrParse(*ParseableStream,
+ clang::pseudo::ParseParams{Lang.G, Lang.Table, Arena, GSS},
+ *StartSymID);
if (PrintForest)
- llvm::outs() << Root.dumpRecursive(G, /*Abbreviated=*/true);
+ llvm::outs() << Root.dumpRecursive(Lang.G, /*Abbreviated=*/true);
if (PrintStatistics) {
llvm::outs() << "Forest bytes: " << Arena.bytes()
@@ -174,7 +182,7 @@ int main(int argc, char *argv[]) {
llvm::outs() << "\n" << Stats.Total << " " << P.first << " nodes:\n";
for (const auto &S : Stats.BySymbol)
llvm::outs() << llvm::formatv(" {0,3} {1}\n", S.second,
- G.symbolName(S.first));
+ Lang.G.symbolName(S.first));
}
}
}
diff --git a/clang-tools-extra/pseudo/unittests/GLRTest.cpp b/clang-tools-extra/pseudo/unittests/GLRTest.cpp
index f061fb3a399e3..6c5cd22bb73ee 100644
--- a/clang-tools-extra/pseudo/unittests/GLRTest.cpp
+++ b/clang-tools-extra/pseudo/unittests/GLRTest.cpp
@@ -8,6 +8,7 @@
#include "clang-pseudo/GLR.h"
#include "clang-pseudo/Token.h"
+#include "clang-pseudo/Language.h"
#include "clang-pseudo/grammar/Grammar.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/TokenKinds.h"
@@ -48,9 +49,15 @@ class GLRTest : public ::testing::Test {
public:
void build(llvm::StringRef GrammarBNF) {
std::vector<std::string> Diags;
- G = Grammar::parseBNF(GrammarBNF, Diags);
+ TestLang.G = Grammar::parseBNF(GrammarBNF, Diags);
}
+ TokenStream emptyTokenStream() {
+ TokenStream Empty;
+ Empty.finalize();
+ return Empty;
+ }
+
void buildGrammar(std::vector<std::string> Nonterminals,
std::vector<std::string> Rules) {
Nonterminals.push_back("_");
@@ -66,19 +73,22 @@ class GLRTest : public ::testing::Test {
SymbolID id(llvm::StringRef Name) const {
for (unsigned I = 0; I < NumTerminals; ++I)
- if (G.table().Terminals[I] == Name)
+ if (TestLang.G.table().Terminals[I] == Name)
return tokenSymbol(static_cast<tok::TokenKind>(I));
- for (SymbolID ID = 0; ID < G.table().Nonterminals.size(); ++ID)
- if (G.table().Nonterminals[ID].Name == Name)
+ for (SymbolID ID = 0; ID < TestLang.G.table().Nonterminals.size(); ++ID)
+ if (TestLang.G.table().Nonterminals[ID].Name == Name)
return ID;
ADD_FAILURE() << "No such symbol found: " << Name;
return 0;
}
RuleID ruleFor(llvm::StringRef NonterminalName) const {
- auto RuleRange = G.table().Nonterminals[id(NonterminalName)].RuleRange;
+ auto RuleRange =
+ TestLang.G.table().Nonterminals[id(NonterminalName)].RuleRange;
if (RuleRange.End - RuleRange.Start == 1)
- return G.table().Nonterminals[id(NonterminalName)].RuleRange.Start;
+ return TestLang.G.table()
+ .Nonterminals[id(NonterminalName)]
+ .RuleRange.Start;
ADD_FAILURE() << "Expected a single rule for " << NonterminalName
<< ", but it has " << RuleRange.End - RuleRange.Start
<< " rule!\n";
@@ -86,7 +96,7 @@ class GLRTest : public ::testing::Test {
}
protected:
- Grammar G;
+ Language TestLang;
ForestArena Arena;
GSS GSStack;
};
@@ -112,9 +122,8 @@ TEST_F(GLRTest, ShiftMergingHeads) {
/*Parents=*/{GSSNode0});
buildGrammar({}, {}); // Create a fake empty grammar.
- LRTable T =
- LRTable::buildForTests(G, /*Entries=*/
- {
+ TestLang.Table =
+ LRTable::buildForTests(TestLang.G, /*Entries=*/{
{1, tokenSymbol(tok::semi), Action::shift(4)},
{2, tokenSymbol(tok::semi), Action::shift(4)},
{3, tokenSymbol(tok::semi), Action::shift(5)},
@@ -123,8 +132,8 @@ TEST_F(GLRTest, ShiftMergingHeads) {
ForestNode &SemiTerminal = Arena.createTerminal(tok::semi, 0);
std::vector<const GSS::Node *> NewHeads;
- glrShift({GSSNode1, GSSNode2, GSSNode3}, SemiTerminal, {G, T, Arena, GSStack},
- NewHeads);
+ glrShift({GSSNode1, GSSNode2, GSSNode3}, SemiTerminal,
+ {TestLang.G, TestLang.Table, Arena, GSStack}, NewHeads);
EXPECT_THAT(NewHeads,
UnorderedElementsAre(AllOf(state(4), parsedSymbol(&SemiTerminal),
@@ -144,8 +153,8 @@ TEST_F(GLRTest, ReduceConflictsSplitting) {
buildGrammar({"class-name", "enum-name"},
{"class-name := IDENTIFIER", "enum-name := IDENTIFIER"});
- LRTable Table = LRTable::buildForTests(
- G,
+ TestLang.Table = LRTable::buildForTests(
+ TestLang.G,
{
{/*State=*/0, id("class-name"), Action::goTo(2)},
{/*State=*/0, id("enum-name"), Action::goTo(3)},
@@ -161,7 +170,8 @@ TEST_F(GLRTest, ReduceConflictsSplitting) {
GSStack.addNode(1, &Arena.createTerminal(tok::identifier, 0), {GSSNode0});
std::vector<const GSS::Node *> Heads = {GSSNode1};
- glrReduce(Heads, tokenSymbol(tok::eof), {G, Table, Arena, GSStack});
+ glrReduce(Heads, tokenSymbol(tok::eof),
+ {TestLang.G, TestLang.Table, Arena, GSStack});
EXPECT_THAT(Heads, UnorderedElementsAre(
GSSNode1,
AllOf(state(2), parsedSymbolID(id("class-name")),
@@ -192,8 +202,8 @@ TEST_F(GLRTest, ReduceSplittingDueToMultipleBases) {
/*State=*/4, &Arena.createTerminal(tok::star, /*TokenIndex=*/1),
/*Parents=*/{GSSNode2, GSSNode3});
- LRTable Table = LRTable::buildForTests(
- G,
+ TestLang.Table = LRTable::buildForTests(
+ TestLang.G,
{
{/*State=*/2, id("ptr-operator"), Action::goTo(/*NextState=*/5)},
{/*State=*/3, id("ptr-operator"), Action::goTo(/*NextState=*/6)},
@@ -202,7 +212,7 @@ TEST_F(GLRTest, ReduceSplittingDueToMultipleBases) {
{/*State=*/4, ruleFor("ptr-operator")},
});
std::vector<const GSS::Node *> Heads = {GSSNode4};
- glrReduce(Heads, tokenSymbol(tok::eof), {G, Table, Arena, GSStack});
+ glrReduce(Heads, tokenSymbol(tok::eof), {TestLang.G, TestLang.Table, Arena, GSStack});
EXPECT_THAT(Heads, UnorderedElementsAre(
GSSNode4,
@@ -246,8 +256,8 @@ TEST_F(GLRTest, ReduceJoiningWithMultipleBases) {
/*Parents=*/{GSSNode2});
// FIXME: figure out a way to get rid of the hard-coded reduce RuleID!
- LRTable Table = LRTable::buildForTests(
- G,
+ TestLang.Table = LRTable::buildForTests(
+ TestLang.G,
{
{/*State=*/1, id("type-name"), Action::goTo(/*NextState=*/5)},
{/*State=*/2, id("type-name"), Action::goTo(/*NextState=*/5)},
@@ -257,7 +267,7 @@ TEST_F(GLRTest, ReduceJoiningWithMultipleBases) {
{/*State=*/4, /* type-name := enum-name */ 1},
});
std::vector<const GSS::Node *> Heads = {GSSNode3, GSSNode4};
- glrReduce(Heads, tokenSymbol(tok::eof), {G, Table, Arena, GSStack});
+ glrReduce(Heads, tokenSymbol(tok::eof), {TestLang.G, TestLang.Table, Arena, GSStack});
// Verify that the stack heads are joint at state 5 after reduces.
EXPECT_THAT(Heads, UnorderedElementsAre(GSSNode3, GSSNode4,
@@ -266,7 +276,7 @@ TEST_F(GLRTest, ReduceJoiningWithMultipleBases) {
parents({GSSNode1, GSSNode2}))))
<< Heads;
// Verify that we create an ambiguous ForestNode of two parses of `type-name`.
- EXPECT_EQ(Heads.back()->Payload->dumpRecursive(G),
+ EXPECT_EQ(Heads.back()->Payload->dumpRecursive(TestLang.G),
"[ 1, end) type-name := <ambiguous>\n"
"[ 1, end) ├─type-name := class-name\n"
"[ 1, end) │ └─class-name := <opaque>\n"
@@ -304,8 +314,8 @@ TEST_F(GLRTest, ReduceJoiningWithSameBase) {
/*Parents=*/{GSSNode2});
// FIXME: figure out a way to get rid of the hard-coded reduce RuleID!
- LRTable Table =
- LRTable::buildForTests(G,
+ TestLang.Table =
+ LRTable::buildForTests(TestLang.G,
{
{/*State=*/0, id("pointer"), Action::goTo(5)},
},
@@ -314,14 +324,15 @@ TEST_F(GLRTest, ReduceJoiningWithSameBase) {
{4, /* pointer := enum-name */ 1},
});
std::vector<const GSS::Node *> Heads = {GSSNode3, GSSNode4};
- glrReduce(Heads, tokenSymbol(tok::eof), {G, Table, Arena, GSStack});
+ glrReduce(Heads, tokenSymbol(tok::eof),
+ {TestLang.G, TestLang.Table, Arena, GSStack});
EXPECT_THAT(
Heads, UnorderedElementsAre(GSSNode3, GSSNode4,
AllOf(state(5), parsedSymbolID(id("pointer")),
parents({GSSNode0}))))
<< Heads;
- EXPECT_EQ(Heads.back()->Payload->dumpRecursive(G),
+ EXPECT_EQ(Heads.back()->Payload->dumpRecursive(TestLang.G),
"[ 0, end) pointer := <ambiguous>\n"
"[ 0, end) ├─pointer := class-name *\n"
"[ 0, 1) │ ├─class-name := <opaque>\n"
@@ -334,8 +345,8 @@ TEST_F(GLRTest, ReduceJoiningWithSameBase) {
TEST_F(GLRTest, ReduceLookahead) {
// A term can be followed by +, but not by -.
buildGrammar({"sum", "term"}, {"expr := term + term", "term := IDENTIFIER"});
- LRTable Table =
- LRTable::buildForTests(G,
+ TestLang.Table =
+ LRTable::buildForTests(TestLang.G,
{
{/*State=*/0, id("term"), Action::goTo(2)},
},
@@ -352,14 +363,14 @@ TEST_F(GLRTest, ReduceLookahead) {
// When the lookahead is +, reduce is performed.
std::vector<const GSS::Node *> Heads = {GSSNode1};
- glrReduce(Heads, tokenSymbol(tok::plus), {G, Table, Arena, GSStack});
+ glrReduce(Heads, tokenSymbol(tok::plus), {TestLang.G, TestLang.Table, Arena, GSStack});
EXPECT_THAT(Heads,
ElementsAre(GSSNode1, AllOf(state(2), parsedSymbolID(id("term")),
parents(Root))));
// When the lookahead is -, reduce is not performed.
Heads = {GSSNode1};
- glrReduce(Heads, tokenSymbol(tok::minus), {G, Table, Arena, GSStack});
+ glrReduce(Heads, tokenSymbol(tok::minus), {TestLang.G, TestLang.Table, Arena, GSStack});
EXPECT_THAT(Heads, ElementsAre(GSSNode1));
}
@@ -380,26 +391,27 @@ TEST_F(GLRTest, PerfectForestNodeSharing) {
left-paren := {
expr := IDENTIFIER
)bnf");
+ TestLang.Table = LRTable::buildSLR(TestLang.G);
clang::LangOptions LOptions;
const TokenStream &Tokens = cook(lex("{ abc", LOptions), LOptions);
- auto LRTable = LRTable::buildSLR(G);
const ForestNode &Parsed =
- glrParse(Tokens, {G, LRTable, Arena, GSStack}, id("test"));
+ glrParse(Tokens, {TestLang.G, TestLang.Table, Arena, GSStack}, id("test"));
// Verify that there is no duplicated sequence node of `expr := IDENTIFIER`
// in the forest, see the `#1` and `=#1` in the dump string.
- EXPECT_EQ(Parsed.dumpRecursive(G), "[ 0, end) test := <ambiguous>\n"
- "[ 0, end) ├─test := { expr\n"
- "[ 0, 1) │ ├─{ := tok[0]\n"
- "[ 1, end) │ └─expr := IDENTIFIER #1\n"
- "[ 1, end) │ └─IDENTIFIER := tok[1]\n"
- "[ 0, end) ├─test := { IDENTIFIER\n"
- "[ 0, 1) │ ├─{ := tok[0]\n"
- "[ 1, end) │ └─IDENTIFIER := tok[1]\n"
- "[ 0, end) └─test := left-paren expr\n"
- "[ 0, 1) ├─left-paren := {\n"
- "[ 0, 1) │ └─{ := tok[0]\n"
- "[ 1, end) └─expr =#1\n");
+ EXPECT_EQ(Parsed.dumpRecursive(TestLang.G),
+ "[ 0, end) test := <ambiguous>\n"
+ "[ 0, end) ├─test := { expr\n"
+ "[ 0, 1) │ ├─{ := tok[0]\n"
+ "[ 1, end) │ └─expr := IDENTIFIER #1\n"
+ "[ 1, end) │ └─IDENTIFIER := tok[1]\n"
+ "[ 0, end) ├─test := { IDENTIFIER\n"
+ "[ 0, 1) │ ├─{ := tok[0]\n"
+ "[ 1, end) │ └─IDENTIFIER := tok[1]\n"
+ "[ 0, end) └─test := left-paren expr\n"
+ "[ 0, 1) ├─left-paren := {\n"
+ "[ 0, 1) │ └─{ := tok[0]\n"
+ "[ 1, end) └─expr =#1\n");
}
TEST_F(GLRTest, GLRReduceOrder) {
@@ -418,16 +430,17 @@ TEST_F(GLRTest, GLRReduceOrder) {
)bnf");
clang::LangOptions LOptions;
const TokenStream &Tokens = cook(lex("IDENTIFIER", LOptions), LOptions);
- auto LRTable = LRTable::buildSLR(G);
+ TestLang.Table = LRTable::buildSLR(TestLang.G);
const ForestNode &Parsed =
- glrParse(Tokens, {G, LRTable, Arena, GSStack}, id("test"));
- EXPECT_EQ(Parsed.dumpRecursive(G), "[ 0, end) test := <ambiguous>\n"
- "[ 0, end) ├─test := IDENTIFIER\n"
- "[ 0, end) │ └─IDENTIFIER := tok[0]\n"
- "[ 0, end) └─test := foo\n"
- "[ 0, end) └─foo := IDENTIFIER\n"
- "[ 0, end) └─IDENTIFIER := tok[0]\n");
+ glrParse(Tokens, {TestLang.G, TestLang.Table, Arena, GSStack}, id("test"));
+ EXPECT_EQ(Parsed.dumpRecursive(TestLang.G),
+ "[ 0, end) test := <ambiguous>\n"
+ "[ 0, end) ├─test := IDENTIFIER\n"
+ "[ 0, end) │ └─IDENTIFIER := tok[0]\n"
+ "[ 0, end) └─test := foo\n"
+ "[ 0, end) └─foo := IDENTIFIER\n"
+ "[ 0, end) └─IDENTIFIER := tok[0]\n");
}
TEST_F(GLRTest, NoExplicitAccept) {
@@ -442,14 +455,15 @@ TEST_F(GLRTest, NoExplicitAccept) {
// of the nonterminal `test` when the next token is `eof`, verify that the
// parser stops at the right state.
const TokenStream &Tokens = cook(lex("id id", LOptions), LOptions);
- auto LRTable = LRTable::buildSLR(G);
+ TestLang.Table = LRTable::buildSLR(TestLang.G);
const ForestNode &Parsed =
- glrParse(Tokens, {G, LRTable, Arena, GSStack}, id("test"));
- EXPECT_EQ(Parsed.dumpRecursive(G), "[ 0, end) test := IDENTIFIER test\n"
- "[ 0, 1) ├─IDENTIFIER := tok[0]\n"
- "[ 1, end) └─test := IDENTIFIER\n"
- "[ 1, end) └─IDENTIFIER := tok[1]\n");
+ glrParse(Tokens, {TestLang.G, TestLang.Table, Arena, GSStack}, id("test"));
+ EXPECT_EQ(Parsed.dumpRecursive(TestLang.G),
+ "[ 0, end) test := IDENTIFIER test\n"
+ "[ 0, 1) ├─IDENTIFIER := tok[0]\n"
+ "[ 1, end) └─test := IDENTIFIER\n"
+ "[ 1, end) └─IDENTIFIER := tok[1]\n");
}
TEST(GSSTest, GC) {
More information about the cfe-commits
mailing list