[clang-tools-extra] fe66aeb - [pseudo] Define a clangPseudoCLI library.

Haojian Wu via cfe-commits cfe-commits at lists.llvm.org
Thu Jun 30 23:31:44 PDT 2022


Author: Haojian Wu
Date: 2022-07-01T08:31:34+02:00
New Revision: fe66aebd755191fac66a73b50d94c29f60be9a88

URL: https://github.com/llvm/llvm-project/commit/fe66aebd755191fac66a73b50d94c29f60be9a88
DIFF: https://github.com/llvm/llvm-project/commit/fe66aebd755191fac66a73b50d94c29f60be9a88.diff

LOG: [pseudo] Define a clangPseudoCLI library.

- define a common data structure Language which is a compiled result of the
  bnf grammar. It is defined in Language.h;
- creates a clangPseudoCLI lib which defines a grammar commandline flag and
  expose a function to get the Language. It supports --grammar=cxx,
  --grammmar=/path/to/file.bnf;
- use the clangPseudoCLI in clang-pseudo, fuzzer, and benchmark tools (
  simplify the code and use the prebuilt cxx grammar);

Split out from https://reviews.llvm.org/D127448.

Differential Revision: https://reviews.llvm.org/D128679

Added: 
    clang-tools-extra/pseudo/include/clang-pseudo/Language.h
    clang-tools-extra/pseudo/include/clang-pseudo/ParseLang.h
    clang-tools-extra/pseudo/include/clang-pseudo/cli/CLI.h
    clang-tools-extra/pseudo/lib/cli/CLI.cpp
    clang-tools-extra/pseudo/lib/cli/CMakeLists.txt

Modified: 
    clang-tools-extra/pseudo/benchmarks/Benchmark.cpp
    clang-tools-extra/pseudo/benchmarks/CMakeLists.txt
    clang-tools-extra/pseudo/fuzzer/CMakeLists.txt
    clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
    clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
    clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
    clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
    clang-tools-extra/pseudo/lib/CMakeLists.txt
    clang-tools-extra/pseudo/lib/cxx/CXX.cpp
    clang-tools-extra/pseudo/tool/CMakeLists.txt
    clang-tools-extra/pseudo/tool/ClangPseudo.cpp
    clang-tools-extra/pseudo/unittests/GLRTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/pseudo/benchmarks/Benchmark.cpp b/clang-tools-extra/pseudo/benchmarks/Benchmark.cpp
index a06fde7d7149c..5bf6ff2c08f32 100644
--- a/clang-tools-extra/pseudo/benchmarks/Benchmark.cpp
+++ b/clang-tools-extra/pseudo/benchmarks/Benchmark.cpp
@@ -25,6 +25,7 @@
 #include "clang-pseudo/Forest.h"
 #include "clang-pseudo/GLR.h"
 #include "clang-pseudo/Token.h"
+#include "clang-pseudo/cli/CLI.h"
 #include "clang-pseudo/grammar/Grammar.h"
 #include "clang-pseudo/grammar/LRTable.h"
 #include "clang/Basic/LangOptions.h"
@@ -39,9 +40,6 @@ using llvm::cl::desc;
 using llvm::cl::opt;
 using llvm::cl::Required;
 
-static opt<std::string> GrammarFile("grammar",
-                                    desc("Parse and check a BNF grammar file."),
-                                    Required);
 static opt<std::string> Source("source", desc("Source file"), Required);
 
 namespace clang {
@@ -49,11 +47,10 @@ namespace pseudo {
 namespace bench {
 namespace {
 
-const std::string *GrammarText = nullptr;
 const std::string *SourceText = nullptr;
-const Grammar *G = nullptr;
+const Language *Lang = nullptr;
 
-void setupGrammarAndSource() {
+void setup() {
   auto ReadFile = [](llvm::StringRef FilePath) -> std::string {
     llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GrammarText =
         llvm::MemoryBuffer::getFile(FilePath);
@@ -64,22 +61,13 @@ void setupGrammarAndSource() {
     }
     return GrammarText.get()->getBuffer().str();
   };
-  GrammarText = new std::string(ReadFile(GrammarFile));
   SourceText = new std::string(ReadFile(Source));
-  std::vector<std::string> Diags;
-  G = new Grammar(Grammar::parseBNF(*GrammarText, Diags));
+  Lang = &getLanguageFromFlags();
 }
 
-static void parseBNF(benchmark::State &State) {
-  std::vector<std::string> Diags;
-  for (auto _ : State)
-    Grammar::parseBNF(*GrammarText, Diags);
-}
-BENCHMARK(parseBNF);
-
 static void buildSLR(benchmark::State &State) {
   for (auto _ : State)
-    LRTable::buildSLR(*G);
+    LRTable::buildSLR(Lang->G);
 }
 BENCHMARK(buildSLR);
 
@@ -129,13 +117,13 @@ static void preprocess(benchmark::State &State) {
 BENCHMARK(preprocess);
 
 static void glrParse(benchmark::State &State) {
-  LRTable Table = clang::pseudo::LRTable::buildSLR(*G);
-  SymbolID StartSymbol = *G->findNonterminal("translation-unit");
+  SymbolID StartSymbol = *Lang->G->findNonterminal("translation-unit");
   TokenStream Stream = lexAndPreprocess();
   for (auto _ : State) {
     pseudo::ForestArena Forest;
     pseudo::GSS GSS;
-    pseudo::glrParse(Stream, ParseParams{*G, Table, Forest, GSS}, StartSymbol);
+    pseudo::glrParse(Stream, ParseParams{*Lang->G, Lang->Table, Forest, GSS},
+                     StartSymbol);
   }
   State.SetBytesProcessed(static_cast<uint64_t>(State.iterations()) *
                           SourceText->size());
@@ -143,13 +131,13 @@ static void glrParse(benchmark::State &State) {
 BENCHMARK(glrParse);
 
 static void full(benchmark::State &State) {
-  LRTable Table = clang::pseudo::LRTable::buildSLR(*G);
-  SymbolID StartSymbol = *G->findNonterminal("translation-unit");
+  SymbolID StartSymbol = *Lang->G.findNonterminal("translation-unit");
   for (auto _ : State) {
     TokenStream Stream = lexAndPreprocess();
     pseudo::ForestArena Forest;
     pseudo::GSS GSS;
-    pseudo::glrParse(lexAndPreprocess(), ParseParams{*G, Table, Forest, GSS},
+    pseudo::glrParse(lexAndPreprocess(),
+                     ParseParams{Lang->G, Lang->Table, Forest, GSS},
                      StartSymbol);
   }
   State.SetBytesProcessed(static_cast<uint64_t>(State.iterations()) *
@@ -165,7 +153,7 @@ BENCHMARK(full);
 int main(int argc, char *argv[]) {
   benchmark::Initialize(&argc, argv);
   llvm::cl::ParseCommandLineOptions(argc, argv);
-  clang::pseudo::bench::setupGrammarAndSource();
+  clang::pseudo::bench::setup();
   benchmark::RunSpecifiedBenchmarks();
   return 0;
 }

diff  --git a/clang-tools-extra/pseudo/benchmarks/CMakeLists.txt b/clang-tools-extra/pseudo/benchmarks/CMakeLists.txt
index b088f8f7c68b8..859db991403cd 100644
--- a/clang-tools-extra/pseudo/benchmarks/CMakeLists.txt
+++ b/clang-tools-extra/pseudo/benchmarks/CMakeLists.txt
@@ -3,6 +3,7 @@ add_benchmark(ClangPseudoBenchmark Benchmark.cpp)
 target_link_libraries(ClangPseudoBenchmark
   PRIVATE
   clangPseudo
+  clangPseudoCLI
   clangPseudoGrammar
   LLVMSupport
   )

diff  --git a/clang-tools-extra/pseudo/fuzzer/CMakeLists.txt b/clang-tools-extra/pseudo/fuzzer/CMakeLists.txt
index e5061cee3bf9d..e1d79873471f0 100644
--- a/clang-tools-extra/pseudo/fuzzer/CMakeLists.txt
+++ b/clang-tools-extra/pseudo/fuzzer/CMakeLists.txt
@@ -11,5 +11,6 @@ add_llvm_fuzzer(clang-pseudo-fuzzer
 target_link_libraries(clang-pseudo-fuzzer
   PRIVATE
   clangPseudo
+  clangPseudoCLI
   clangPseudoGrammar
   )

diff  --git a/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp b/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
index d56ccd018df0d..ea1606b330be3 100644
--- a/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
+++ b/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
@@ -10,6 +10,7 @@
 #include "clang-pseudo/Forest.h"
 #include "clang-pseudo/GLR.h"
 #include "clang-pseudo/Token.h"
+#include "clang-pseudo/cli/CLI.h"
 #include "clang-pseudo/grammar/Grammar.h"
 #include "clang-pseudo/grammar/LRTable.h"
 #include "clang/Basic/LangOptions.h"
@@ -24,28 +25,10 @@ namespace {
 
 class Fuzzer {
   clang::LangOptions LangOpts = clang::pseudo::genericLangOpts();
-  Grammar G;
-  LRTable T;
   bool Print;
 
 public:
-  Fuzzer(llvm::StringRef GrammarPath, bool Print) : Print(Print) {
-    llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GrammarText =
-        llvm::MemoryBuffer::getFile(GrammarPath);
-    if (std::error_code EC = GrammarText.getError()) {
-      llvm::errs() << "Error: can't read grammar file '" << GrammarPath
-                   << "': " << EC.message() << "\n";
-      std::exit(1);
-    }
-    std::vector<std::string> Diags;
-    G = Grammar::parseBNF(GrammarText->get()->getBuffer(), Diags);
-    if (!Diags.empty()) {
-      for (const auto &Diag : Diags)
-        llvm::errs() << Diag << "\n";
-      std::exit(1);
-    }
-    T = LRTable::buildSLR(G);
-  }
+  Fuzzer(bool Print) : Print(Print) {}
 
   void operator()(llvm::StringRef Code) {
     std::string CodeStr = Code.str(); // Must be null-terminated.
@@ -58,11 +41,13 @@ class Fuzzer {
 
     clang::pseudo::ForestArena Arena;
     clang::pseudo::GSS GSS;
+    const Language &Lang = getLanguageFromFlags();
     auto &Root =
-        glrParse(ParseableStream, clang::pseudo::ParseParams{G, T, Arena, GSS},
-                 *G.findNonterminal("translation-unit"));
+        glrParse(ParseableStream,
+                 clang::pseudo::ParseParams{Lang.G, Lang.Table, Arena, GSS},
+                 *Lang.G.findNonterminal("translation-unit"));
     if (Print)
-      llvm::outs() << Root.dumpRecursive(G);
+      llvm::outs() << Root.dumpRecursive(Lang.G);
   }
 };
 
@@ -75,16 +60,11 @@ Fuzzer *Fuzz = nullptr;
 extern "C" {
 
 // Set up the fuzzer from command line flags:
-//  -grammar=<file> (required) - path to cxx.bnf
 //  -print                     - used for testing the fuzzer
 int LLVMFuzzerInitialize(int *Argc, char ***Argv) {
-  llvm::StringRef GrammarFile;
   bool PrintForest = false;
   auto ConsumeArg = [&](llvm::StringRef Arg) -> bool {
-    if (Arg.consume_front("-grammar=")) {
-      GrammarFile = Arg;
-      return true;
-    } else if (Arg == "-print") {
+    if (Arg == "-print") {
       PrintForest = true;
       return true;
     }
@@ -92,11 +72,7 @@ int LLVMFuzzerInitialize(int *Argc, char ***Argv) {
   };
   *Argc = std::remove_if(*Argv + 1, *Argv + *Argc, ConsumeArg) - *Argv;
 
-  if (GrammarFile.empty()) {
-    fprintf(stderr, "Fuzzer needs -grammar=/path/to/cxx.bnf\n");
-    exit(1);
-  }
-  clang::pseudo::Fuzz = new clang::pseudo::Fuzzer(GrammarFile, PrintForest);
+  clang::pseudo::Fuzz = new clang::pseudo::Fuzzer(PrintForest);
   return 0;
 }
 

diff  --git a/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h b/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
index a3e8611de4252..51fb233bf9962 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
@@ -112,6 +112,7 @@ struct GSS {
 llvm::raw_ostream &operator<<(llvm::raw_ostream &, const GSS::Node &);
 
 // Parameters for the GLR parsing.
+// FIXME: refine it with the ParseLang struct.
 struct ParseParams {
   // The grammar of the language we're going to parse.
   const Grammar &G;

diff  --git a/clang-tools-extra/pseudo/include/clang-pseudo/Language.h b/clang-tools-extra/pseudo/include/clang-pseudo/Language.h
new file mode 100644
index 0000000000000..60fb28a2b1664
--- /dev/null
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/Language.h
@@ -0,0 +1,30 @@
+//===--- Language.h -------------------------------------------- -*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_PSEUDO_GRAMMAR_LANGUAGE_H
+#define CLANG_PSEUDO_GRAMMAR_LANGUAGE_H
+
+#include "clang-pseudo/grammar/Grammar.h"
+#include "clang-pseudo/grammar/LRTable.h"
+
+namespace clang {
+namespace pseudo {
+
+// Specify a language that can be parsed by the pseduoparser.
+struct Language {
+  Grammar G;
+  LRTable Table;
+
+  // FIXME: add clang::LangOptions.
+  // FIXME: add default start symbols.
+};
+
+} // namespace pseudo
+} // namespace clang
+
+#endif // CLANG_PSEUDO_GRAMMAR_LANGUAGE_H

diff  --git a/clang-tools-extra/pseudo/include/clang-pseudo/ParseLang.h b/clang-tools-extra/pseudo/include/clang-pseudo/ParseLang.h
new file mode 100644
index 0000000000000..fdbc83c26ebea
--- /dev/null
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/ParseLang.h
@@ -0,0 +1,31 @@
+//===--- ParseLang.h ------------------------------------------- -*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_PSEUDO_PARSELANG_H
+#define CLANG_PSEUDO_PARSELANG_H
+
+#include "clang-pseudo/grammar/Grammar.h"
+#include "clang-pseudo/grammar/LRTable.h"
+
+namespace clang {
+namespace pseudo {
+
+// Specify a language that can be parsed by the pseduoparser.
+// Manifest generated from a bnf grammar file.
+struct ParseLang {
+  Grammar G;
+  LRTable Table;
+
+  // FIXME: add clang::LangOptions.
+  // FIXME: add default start symbols.
+};
+
+} // namespace pseudo
+} // namespace clang
+
+#endif // CLANG_PSEUDO_PARSELANG_H

diff  --git a/clang-tools-extra/pseudo/include/clang-pseudo/cli/CLI.h b/clang-tools-extra/pseudo/include/clang-pseudo/cli/CLI.h
new file mode 100644
index 0000000000000..db09aba21502f
--- /dev/null
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/cli/CLI.h
@@ -0,0 +1,35 @@
+//===--- CLI.h - Get grammar from variant sources ----------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Provides the Grammar, LRTable etc for a language specified by the `--grammar`
+// flags. It is by design to be used by pseudoparser-based CLI tools.
+//
+// The CLI library defines a `--grammar` CLI flag, which supports 1) using a
+// grammar from a file (--grammar=/path/to/lang.bnf) or using the prebuilt cxx
+// language (--grammar=cxx).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_PSEUDO_CLI_CLI_H
+#define CLANG_PSEUDO_CLI_CLI_H
+
+#include "clang-pseudo/Language.h"
+
+namespace clang {
+namespace pseudo {
+
+// Returns the corresponding Language from the '--grammar' command-line flag.
+//
+// !! If the grammar flag is invalid (e.g. unexisting file), this function will
+// exit the program immediately.
+const Language &getLanguageFromFlags();
+
+} // namespace pseudo
+} // namespace clang
+
+#endif // CLANG_PSEUDO_CLI_CLI_H

diff  --git a/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h b/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
index 707297d3fb32d..066cfbb818abf 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h
@@ -23,12 +23,11 @@
 #ifndef CLANG_PSEUDO_CXX_CXX_H
 #define CLANG_PSEUDO_CXX_CXX_H
 
+#include "clang-pseudo/Language.h"
 #include "clang-pseudo/grammar/Grammar.h"
 
 namespace clang {
 namespace pseudo {
-class LRTable;
-
 namespace cxx {
 // Symbol represents nonterminal symbols in the C++ grammar.
 // It provides a simple uniform way to access a particular nonterminal.
@@ -38,10 +37,8 @@ enum class Symbol : SymbolID {
 #undef NONTERMINAL
 };
 
-// Returns the C++ grammar.
-const Grammar &getGrammar();
-// Returns the corresponding LRTable for the C++ grammar.
-const LRTable &getLRTable();
+// Returns the Language for the cxx.bnf grammar.
+const Language &getLanguage();
 
 } // namespace cxx
 

diff  --git a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
index 382da41397f0e..f60bf487eeaf9 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h
@@ -57,6 +57,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cstdint>
 #include <vector>
 

diff  --git a/clang-tools-extra/pseudo/lib/CMakeLists.txt b/clang-tools-extra/pseudo/lib/CMakeLists.txt
index f312b10f7d8b4..efcf9267e7173 100644
--- a/clang-tools-extra/pseudo/lib/CMakeLists.txt
+++ b/clang-tools-extra/pseudo/lib/CMakeLists.txt
@@ -1,3 +1,4 @@
+add_subdirectory(cli)
 add_subdirectory(cxx)
 add_subdirectory(grammar)
 

diff  --git a/clang-tools-extra/pseudo/lib/cli/CLI.cpp b/clang-tools-extra/pseudo/lib/cli/CLI.cpp
new file mode 100644
index 0000000000000..267091448a9d3
--- /dev/null
+++ b/clang-tools-extra/pseudo/lib/cli/CLI.cpp
@@ -0,0 +1,48 @@
+//===--- CLI.cpp -  ----------------------------------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang-pseudo/cli/CLI.h"
+#include "clang-pseudo/cxx/CXX.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+static llvm::cl::opt<std::string> Grammar(
+    "grammar",
+    llvm::cl::desc(
+        "Specify a BNF grammar file path, or a builtin language (cxx)."),
+    llvm::cl::init("cxx"));
+
+namespace clang {
+namespace pseudo {
+
+const Language &getLanguageFromFlags() {
+  if (::Grammar == "cxx")
+    return cxx::getLanguage();
+
+  static Language *Lang = []() {
+    // Read from a bnf grammar file.
+    llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GrammarText =
+        llvm::MemoryBuffer::getFile(::Grammar);
+    if (std::error_code EC = GrammarText.getError()) {
+      llvm::errs() << "Error: can't read grammar file '" << ::Grammar
+                   << "': " << EC.message() << "\n";
+      std::exit(1);
+    }
+    std::vector<std::string> Diags;
+    auto G = Grammar::parseBNF(GrammarText->get()->getBuffer(), Diags);
+    for (const auto &Diag : Diags)
+      llvm::errs() << Diag << "\n";
+    auto Table = LRTable::buildSLR(G);
+    return new Language{std::move(G), std::move(Table)};
+  }();
+  return *Lang;
+}
+
+} // namespace pseudo
+} // namespace clang

diff  --git a/clang-tools-extra/pseudo/lib/cli/CMakeLists.txt b/clang-tools-extra/pseudo/lib/cli/CMakeLists.txt
new file mode 100644
index 0000000000000..25b3d9e1425c0
--- /dev/null
+++ b/clang-tools-extra/pseudo/lib/cli/CMakeLists.txt
@@ -0,0 +1,11 @@
+set(LLVM_LINK_COMPONENTS
+  Support
+  )
+
+add_clang_library(clangPseudoCLI
+  CLI.cpp
+
+  LINK_LIBS
+  clangPseudoGrammar
+  clangPseudoCXX
+  )

diff  --git a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp
index 88c2e0fe5f2ee..3c60b546cf62e 100644
--- a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp
+++ b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp
@@ -7,26 +7,33 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang-pseudo/cxx/CXX.h"
+#include "clang-pseudo/Language.h"
+#include "clang-pseudo/grammar/Grammar.h"
 #include "clang-pseudo/grammar/LRTable.h"
+#include <utility>
 
 namespace clang {
 namespace pseudo {
 namespace cxx {
-
+namespace {
 static const char *CXXBNF =
 #include "CXXBNF.inc"
     ;
+} // namespace
 
-const Grammar &getGrammar() {
-  static std::vector<std::string> Diags;
-  static Grammar *G = new Grammar(Grammar::parseBNF(CXXBNF, Diags));
-  assert(Diags.empty());
-  return *G;
-}
-
-const LRTable &getLRTable() {
-  static LRTable *Table = new LRTable(LRTable::buildSLR(getGrammar()));
-  return *Table;
+const Language &getLanguage() {
+  static const auto &CXXLanguage = []() -> const Language & {
+    std::vector<std::string> Diags;
+    auto G = Grammar::parseBNF(CXXBNF, Diags);
+    assert(Diags.empty());
+    LRTable Table = LRTable::buildSLR(G);
+    const Language *PL = new Language{
+        std::move(G),
+        std::move(Table),
+    };
+    return *PL;
+  }();
+  return CXXLanguage;
 }
 
 } // namespace cxx

diff  --git a/clang-tools-extra/pseudo/tool/CMakeLists.txt b/clang-tools-extra/pseudo/tool/CMakeLists.txt
index 8b47d54e766c2..7c0cd56dcf233 100644
--- a/clang-tools-extra/pseudo/tool/CMakeLists.txt
+++ b/clang-tools-extra/pseudo/tool/CMakeLists.txt
@@ -13,5 +13,6 @@ target_link_libraries(clang-pseudo
   PRIVATE
   clangPseudo
   clangPseudoGrammar
+  clangPseudoCLI
   )
 

diff  --git a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp
index ff9b893ed3f5a..521af05d5babf 100644
--- a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp
+++ b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp
@@ -9,7 +9,9 @@
 #include "clang-pseudo/Bracket.h"
 #include "clang-pseudo/DirectiveTree.h"
 #include "clang-pseudo/GLR.h"
+#include "clang-pseudo/Language.h"
 #include "clang-pseudo/Token.h"
+#include "clang-pseudo/cli/CLI.h"
 #include "clang-pseudo/grammar/Grammar.h"
 #include "clang-pseudo/grammar/LRGraph.h"
 #include "clang-pseudo/grammar/LRTable.h"
@@ -22,14 +24,11 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Signals.h"
 
-using clang::pseudo::Grammar;
 using clang::pseudo::TokenStream;
 using llvm::cl::desc;
 using llvm::cl::init;
 using llvm::cl::opt;
 
-static opt<std::string>
-    Grammar("grammar", desc("Parse and check a BNF grammar file."), init(""));
 static opt<bool> PrintGrammar("print-grammar", desc("Print the grammar."));
 static opt<bool> PrintGraph("print-graph",
                             desc("Print the LR graph for the grammar"));
@@ -123,42 +122,51 @@ int main(int argc, char *argv[]) {
     pairBrackets(*ParseableStream);
   }
 
-  if (Grammar.getNumOccurrences()) {
-    std::string Text = readOrDie(Grammar);
-    std::vector<std::string> Diags;
-    auto G = Grammar::parseBNF(Text, Diags);
-
-    if (!Diags.empty()) {
-      llvm::errs() << llvm::join(Diags, "\n");
+  const auto &Lang = clang::pseudo::getLanguageFromFlags();
+  if (PrintGrammar)
+    llvm::outs() << Lang.G.dump();
+  if (PrintGraph)
+    llvm::outs() << clang::pseudo::LRGraph::buildLR0(Lang.G).dumpForTests(
+        Lang.G);
+
+  if (PrintTable)
+    llvm::outs() << Lang.Table.dumpForTests(Lang.G);
+  if (PrintStatistics)
+    llvm::outs() << Lang.Table.dumpStatistics();
+
+  if (ParseableStream) {
+    clang::pseudo::ForestArena Arena;
+    clang::pseudo::GSS GSS;
+    llvm::Optional<clang::pseudo::SymbolID> StartSymID =
+        Lang.G.findNonterminal(StartSymbol);
+    if (!StartSymID) {
+      llvm::errs() << llvm::formatv(
+          "The start symbol {0} doesn't exit in the grammar!\n", StartSymbol);
       return 2;
     }
-    llvm::outs() << llvm::formatv("grammar file {0} is parsed successfully\n",
-                                  Grammar);
-    if (PrintGrammar)
-      llvm::outs() << G.dump();
-    if (PrintGraph)
-      llvm::outs() << clang::pseudo::LRGraph::buildLR0(G).dumpForTests(G);
-    auto LRTable = clang::pseudo::LRTable::buildSLR(G);
-    if (PrintTable)
-      llvm::outs() << LRTable.dumpForTests(G);
-    if (PrintStatistics)
-      llvm::outs() << LRTable.dumpStatistics();
+    auto &Root =
+        glrParse(*ParseableStream,
+                 clang::pseudo::ParseParams{Lang.G, Lang.Table, Arena, GSS},
+                 *StartSymID);
+    if (PrintForest)
+      llvm::outs() << Root.dumpRecursive(Lang.G, /*Abbreviated=*/true);
 
     if (ParseableStream) {
       clang::pseudo::ForestArena Arena;
       clang::pseudo::GSS GSS;
       llvm::Optional<clang::pseudo::SymbolID> StartSymID =
-          G.findNonterminal(StartSymbol);
+          Lang.G.findNonterminal(StartSymbol);
       if (!StartSymID) {
         llvm::errs() << llvm::formatv(
-            "The start symbol {0} doesn't exit in the grammar!\n", Grammar);
+            "The start symbol {0} doesn't exit in the grammar!\n", StartSymbol);
         return 2;
       }
-      auto &Root = glrParse(*ParseableStream,
-                            clang::pseudo::ParseParams{G, LRTable, Arena, GSS},
-                            *StartSymID);
+      auto &Root =
+          glrParse(*ParseableStream,
+                   clang::pseudo::ParseParams{Lang.G, Lang.Table, Arena, GSS},
+                   *StartSymID);
       if (PrintForest)
-        llvm::outs() << Root.dumpRecursive(G, /*Abbreviated=*/true);
+        llvm::outs() << Root.dumpRecursive(Lang.G, /*Abbreviated=*/true);
 
       if (PrintStatistics) {
         llvm::outs() << "Forest bytes: " << Arena.bytes()
@@ -174,7 +182,7 @@ int main(int argc, char *argv[]) {
           llvm::outs() << "\n" << Stats.Total << " " << P.first << " nodes:\n";
           for (const auto &S : Stats.BySymbol)
             llvm::outs() << llvm::formatv("  {0,3} {1}\n", S.second,
-                                          G.symbolName(S.first));
+                                          Lang.G.symbolName(S.first));
         }
       }
     }

diff  --git a/clang-tools-extra/pseudo/unittests/GLRTest.cpp b/clang-tools-extra/pseudo/unittests/GLRTest.cpp
index f061fb3a399e3..6c5cd22bb73ee 100644
--- a/clang-tools-extra/pseudo/unittests/GLRTest.cpp
+++ b/clang-tools-extra/pseudo/unittests/GLRTest.cpp
@@ -8,6 +8,7 @@
 
 #include "clang-pseudo/GLR.h"
 #include "clang-pseudo/Token.h"
+#include "clang-pseudo/Language.h"
 #include "clang-pseudo/grammar/Grammar.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/TokenKinds.h"
@@ -48,9 +49,15 @@ class GLRTest : public ::testing::Test {
 public:
   void build(llvm::StringRef GrammarBNF) {
     std::vector<std::string> Diags;
-    G = Grammar::parseBNF(GrammarBNF, Diags);
+    TestLang.G = Grammar::parseBNF(GrammarBNF, Diags);
   }
 
+  TokenStream emptyTokenStream() {
+    TokenStream Empty;
+    Empty.finalize();
+    return Empty;
+  }
+ 
   void buildGrammar(std::vector<std::string> Nonterminals,
                     std::vector<std::string> Rules) {
     Nonterminals.push_back("_");
@@ -66,19 +73,22 @@ class GLRTest : public ::testing::Test {
 
   SymbolID id(llvm::StringRef Name) const {
     for (unsigned I = 0; I < NumTerminals; ++I)
-      if (G.table().Terminals[I] == Name)
+      if (TestLang.G.table().Terminals[I] == Name)
         return tokenSymbol(static_cast<tok::TokenKind>(I));
-    for (SymbolID ID = 0; ID < G.table().Nonterminals.size(); ++ID)
-      if (G.table().Nonterminals[ID].Name == Name)
+    for (SymbolID ID = 0; ID < TestLang.G.table().Nonterminals.size(); ++ID)
+      if (TestLang.G.table().Nonterminals[ID].Name == Name)
         return ID;
     ADD_FAILURE() << "No such symbol found: " << Name;
     return 0;
   }
 
   RuleID ruleFor(llvm::StringRef NonterminalName) const {
-    auto RuleRange = G.table().Nonterminals[id(NonterminalName)].RuleRange;
+    auto RuleRange =
+        TestLang.G.table().Nonterminals[id(NonterminalName)].RuleRange;
     if (RuleRange.End - RuleRange.Start == 1)
-      return G.table().Nonterminals[id(NonterminalName)].RuleRange.Start;
+      return TestLang.G.table()
+          .Nonterminals[id(NonterminalName)]
+          .RuleRange.Start;
     ADD_FAILURE() << "Expected a single rule for " << NonterminalName
                   << ", but it has " << RuleRange.End - RuleRange.Start
                   << " rule!\n";
@@ -86,7 +96,7 @@ class GLRTest : public ::testing::Test {
   }
 
 protected:
-  Grammar G;
+  Language TestLang;
   ForestArena Arena;
   GSS GSStack;
 };
@@ -112,9 +122,8 @@ TEST_F(GLRTest, ShiftMergingHeads) {
                                    /*Parents=*/{GSSNode0});
 
   buildGrammar({}, {}); // Create a fake empty grammar.
-  LRTable T =
-      LRTable::buildForTests(G, /*Entries=*/
-                             {
+  TestLang.Table =
+      LRTable::buildForTests(TestLang.G, /*Entries=*/{
                                  {1, tokenSymbol(tok::semi), Action::shift(4)},
                                  {2, tokenSymbol(tok::semi), Action::shift(4)},
                                  {3, tokenSymbol(tok::semi), Action::shift(5)},
@@ -123,8 +132,8 @@ TEST_F(GLRTest, ShiftMergingHeads) {
 
   ForestNode &SemiTerminal = Arena.createTerminal(tok::semi, 0);
   std::vector<const GSS::Node *> NewHeads;
-  glrShift({GSSNode1, GSSNode2, GSSNode3}, SemiTerminal, {G, T, Arena, GSStack},
-           NewHeads);
+  glrShift({GSSNode1, GSSNode2, GSSNode3}, SemiTerminal,
+           {TestLang.G, TestLang.Table, Arena, GSStack}, NewHeads);
 
   EXPECT_THAT(NewHeads,
               UnorderedElementsAre(AllOf(state(4), parsedSymbol(&SemiTerminal),
@@ -144,8 +153,8 @@ TEST_F(GLRTest, ReduceConflictsSplitting) {
   buildGrammar({"class-name", "enum-name"},
                {"class-name := IDENTIFIER", "enum-name := IDENTIFIER"});
 
-  LRTable Table = LRTable::buildForTests(
-      G,
+  TestLang.Table = LRTable::buildForTests(
+      TestLang.G,
       {
           {/*State=*/0, id("class-name"), Action::goTo(2)},
           {/*State=*/0, id("enum-name"), Action::goTo(3)},
@@ -161,7 +170,8 @@ TEST_F(GLRTest, ReduceConflictsSplitting) {
       GSStack.addNode(1, &Arena.createTerminal(tok::identifier, 0), {GSSNode0});
 
   std::vector<const GSS::Node *> Heads = {GSSNode1};
-  glrReduce(Heads, tokenSymbol(tok::eof), {G, Table, Arena, GSStack});
+  glrReduce(Heads, tokenSymbol(tok::eof),
+            {TestLang.G, TestLang.Table, Arena, GSStack});
   EXPECT_THAT(Heads, UnorderedElementsAre(
                          GSSNode1,
                          AllOf(state(2), parsedSymbolID(id("class-name")),
@@ -192,8 +202,8 @@ TEST_F(GLRTest, ReduceSplittingDueToMultipleBases) {
       /*State=*/4, &Arena.createTerminal(tok::star, /*TokenIndex=*/1),
       /*Parents=*/{GSSNode2, GSSNode3});
 
-  LRTable Table = LRTable::buildForTests(
-      G,
+  TestLang.Table = LRTable::buildForTests(
+      TestLang.G,
       {
           {/*State=*/2, id("ptr-operator"), Action::goTo(/*NextState=*/5)},
           {/*State=*/3, id("ptr-operator"), Action::goTo(/*NextState=*/6)},
@@ -202,7 +212,7 @@ TEST_F(GLRTest, ReduceSplittingDueToMultipleBases) {
           {/*State=*/4, ruleFor("ptr-operator")},
       });
   std::vector<const GSS::Node *> Heads = {GSSNode4};
-  glrReduce(Heads, tokenSymbol(tok::eof), {G, Table, Arena, GSStack});
+  glrReduce(Heads, tokenSymbol(tok::eof), {TestLang.G, TestLang.Table, Arena, GSStack});
 
   EXPECT_THAT(Heads, UnorderedElementsAre(
                          GSSNode4,
@@ -246,8 +256,8 @@ TEST_F(GLRTest, ReduceJoiningWithMultipleBases) {
                       /*Parents=*/{GSSNode2});
 
   // FIXME: figure out a way to get rid of the hard-coded reduce RuleID!
-  LRTable Table = LRTable::buildForTests(
-      G,
+  TestLang.Table = LRTable::buildForTests(
+      TestLang.G,
       {
           {/*State=*/1, id("type-name"), Action::goTo(/*NextState=*/5)},
           {/*State=*/2, id("type-name"), Action::goTo(/*NextState=*/5)},
@@ -257,7 +267,7 @@ TEST_F(GLRTest, ReduceJoiningWithMultipleBases) {
           {/*State=*/4, /* type-name := enum-name */ 1},
       });
   std::vector<const GSS::Node *> Heads = {GSSNode3, GSSNode4};
-  glrReduce(Heads, tokenSymbol(tok::eof), {G, Table, Arena, GSStack});
+  glrReduce(Heads, tokenSymbol(tok::eof), {TestLang.G, TestLang.Table, Arena, GSStack});
 
   // Verify that the stack heads are joint at state 5 after reduces.
   EXPECT_THAT(Heads, UnorderedElementsAre(GSSNode3, GSSNode4,
@@ -266,7 +276,7 @@ TEST_F(GLRTest, ReduceJoiningWithMultipleBases) {
                                                 parents({GSSNode1, GSSNode2}))))
       << Heads;
   // Verify that we create an ambiguous ForestNode of two parses of `type-name`.
-  EXPECT_EQ(Heads.back()->Payload->dumpRecursive(G),
+  EXPECT_EQ(Heads.back()->Payload->dumpRecursive(TestLang.G),
             "[  1, end) type-name := <ambiguous>\n"
             "[  1, end) ├─type-name := class-name\n"
             "[  1, end) │ └─class-name := <opaque>\n"
@@ -304,8 +314,8 @@ TEST_F(GLRTest, ReduceJoiningWithSameBase) {
                       /*Parents=*/{GSSNode2});
 
   // FIXME: figure out a way to get rid of the hard-coded reduce RuleID!
-  LRTable Table =
-      LRTable::buildForTests(G,
+  TestLang.Table =
+      LRTable::buildForTests(TestLang.G,
                              {
                                  {/*State=*/0, id("pointer"), Action::goTo(5)},
                              },
@@ -314,14 +324,15 @@ TEST_F(GLRTest, ReduceJoiningWithSameBase) {
                                  {4, /* pointer := enum-name */ 1},
                              });
   std::vector<const GSS::Node *> Heads = {GSSNode3, GSSNode4};
-  glrReduce(Heads, tokenSymbol(tok::eof), {G, Table, Arena, GSStack});
+  glrReduce(Heads, tokenSymbol(tok::eof),
+            {TestLang.G, TestLang.Table, Arena, GSStack});
 
   EXPECT_THAT(
       Heads, UnorderedElementsAre(GSSNode3, GSSNode4,
                                   AllOf(state(5), parsedSymbolID(id("pointer")),
                                         parents({GSSNode0}))))
       << Heads;
-  EXPECT_EQ(Heads.back()->Payload->dumpRecursive(G),
+  EXPECT_EQ(Heads.back()->Payload->dumpRecursive(TestLang.G),
             "[  0, end) pointer := <ambiguous>\n"
             "[  0, end) ├─pointer := class-name *\n"
             "[  0,   1) │ ├─class-name := <opaque>\n"
@@ -334,8 +345,8 @@ TEST_F(GLRTest, ReduceJoiningWithSameBase) {
 TEST_F(GLRTest, ReduceLookahead) {
   // A term can be followed by +, but not by -.
   buildGrammar({"sum", "term"}, {"expr := term + term", "term := IDENTIFIER"});
-  LRTable Table =
-      LRTable::buildForTests(G,
+  TestLang.Table =
+      LRTable::buildForTests(TestLang.G,
                              {
                                  {/*State=*/0, id("term"), Action::goTo(2)},
                              },
@@ -352,14 +363,14 @@ TEST_F(GLRTest, ReduceLookahead) {
 
   // When the lookahead is +, reduce is performed.
   std::vector<const GSS::Node *> Heads = {GSSNode1};
-  glrReduce(Heads, tokenSymbol(tok::plus), {G, Table, Arena, GSStack});
+  glrReduce(Heads, tokenSymbol(tok::plus), {TestLang.G, TestLang.Table, Arena, GSStack});
   EXPECT_THAT(Heads,
               ElementsAre(GSSNode1, AllOf(state(2), parsedSymbolID(id("term")),
                                           parents(Root))));
 
   // When the lookahead is -, reduce is not performed.
   Heads = {GSSNode1};
-  glrReduce(Heads, tokenSymbol(tok::minus), {G, Table, Arena, GSStack});
+  glrReduce(Heads, tokenSymbol(tok::minus), {TestLang.G, TestLang.Table, Arena, GSStack});
   EXPECT_THAT(Heads, ElementsAre(GSSNode1));
 }
 
@@ -380,26 +391,27 @@ TEST_F(GLRTest, PerfectForestNodeSharing) {
     left-paren := {
     expr := IDENTIFIER
   )bnf");
+  TestLang.Table = LRTable::buildSLR(TestLang.G);
   clang::LangOptions LOptions;
   const TokenStream &Tokens = cook(lex("{ abc", LOptions), LOptions);
-  auto LRTable = LRTable::buildSLR(G);
 
   const ForestNode &Parsed =
-      glrParse(Tokens, {G, LRTable, Arena, GSStack}, id("test"));
+      glrParse(Tokens, {TestLang.G, TestLang.Table, Arena, GSStack}, id("test"));
   // Verify that there is no duplicated sequence node of `expr := IDENTIFIER`
   // in the forest, see the `#1` and `=#1` in the dump string.
-  EXPECT_EQ(Parsed.dumpRecursive(G), "[  0, end) test := <ambiguous>\n"
-                                     "[  0, end) ├─test := { expr\n"
-                                     "[  0,   1) │ ├─{ := tok[0]\n"
-                                     "[  1, end) │ └─expr := IDENTIFIER #1\n"
-                                     "[  1, end) │   └─IDENTIFIER := tok[1]\n"
-                                     "[  0, end) ├─test := { IDENTIFIER\n"
-                                     "[  0,   1) │ ├─{ := tok[0]\n"
-                                     "[  1, end) │ └─IDENTIFIER := tok[1]\n"
-                                     "[  0, end) └─test := left-paren expr\n"
-                                     "[  0,   1)   ├─left-paren := {\n"
-                                     "[  0,   1)   │ └─{ := tok[0]\n"
-                                     "[  1, end)   └─expr =#1\n");
+  EXPECT_EQ(Parsed.dumpRecursive(TestLang.G),
+            "[  0, end) test := <ambiguous>\n"
+            "[  0, end) ├─test := { expr\n"
+            "[  0,   1) │ ├─{ := tok[0]\n"
+            "[  1, end) │ └─expr := IDENTIFIER #1\n"
+            "[  1, end) │   └─IDENTIFIER := tok[1]\n"
+            "[  0, end) ├─test := { IDENTIFIER\n"
+            "[  0,   1) │ ├─{ := tok[0]\n"
+            "[  1, end) │ └─IDENTIFIER := tok[1]\n"
+            "[  0, end) └─test := left-paren expr\n"
+            "[  0,   1)   ├─left-paren := {\n"
+            "[  0,   1)   │ └─{ := tok[0]\n"
+            "[  1, end)   └─expr =#1\n");
 }
 
 TEST_F(GLRTest, GLRReduceOrder) {
@@ -418,16 +430,17 @@ TEST_F(GLRTest, GLRReduceOrder) {
   )bnf");
   clang::LangOptions LOptions;
   const TokenStream &Tokens = cook(lex("IDENTIFIER", LOptions), LOptions);
-  auto LRTable = LRTable::buildSLR(G);
+  TestLang.Table = LRTable::buildSLR(TestLang.G);
 
   const ForestNode &Parsed =
-      glrParse(Tokens, {G, LRTable, Arena, GSStack}, id("test"));
-  EXPECT_EQ(Parsed.dumpRecursive(G), "[  0, end) test := <ambiguous>\n"
-                                     "[  0, end) ├─test := IDENTIFIER\n"
-                                     "[  0, end) │ └─IDENTIFIER := tok[0]\n"
-                                     "[  0, end) └─test := foo\n"
-                                     "[  0, end)   └─foo := IDENTIFIER\n"
-                                     "[  0, end)     └─IDENTIFIER := tok[0]\n");
+      glrParse(Tokens, {TestLang.G, TestLang.Table, Arena, GSStack}, id("test"));
+  EXPECT_EQ(Parsed.dumpRecursive(TestLang.G),
+            "[  0, end) test := <ambiguous>\n"
+            "[  0, end) ├─test := IDENTIFIER\n"
+            "[  0, end) │ └─IDENTIFIER := tok[0]\n"
+            "[  0, end) └─test := foo\n"
+            "[  0, end)   └─foo := IDENTIFIER\n"
+            "[  0, end)     └─IDENTIFIER := tok[0]\n");
 }
 
 TEST_F(GLRTest, NoExplicitAccept) {
@@ -442,14 +455,15 @@ TEST_F(GLRTest, NoExplicitAccept) {
   // of the nonterminal `test` when the next token is `eof`, verify that the
   // parser stops at the right state.
   const TokenStream &Tokens = cook(lex("id id", LOptions), LOptions);
-  auto LRTable = LRTable::buildSLR(G);
+  TestLang.Table = LRTable::buildSLR(TestLang.G);
 
   const ForestNode &Parsed =
-      glrParse(Tokens, {G, LRTable, Arena, GSStack}, id("test"));
-  EXPECT_EQ(Parsed.dumpRecursive(G), "[  0, end) test := IDENTIFIER test\n"
-                                     "[  0,   1) ├─IDENTIFIER := tok[0]\n"
-                                     "[  1, end) └─test := IDENTIFIER\n"
-                                     "[  1, end)   └─IDENTIFIER := tok[1]\n");
+      glrParse(Tokens, {TestLang.G, TestLang.Table, Arena, GSStack}, id("test"));
+  EXPECT_EQ(Parsed.dumpRecursive(TestLang.G),
+            "[  0, end) test := IDENTIFIER test\n"
+            "[  0,   1) ├─IDENTIFIER := tok[0]\n"
+            "[  1, end) └─test := IDENTIFIER\n"
+            "[  1, end)   └─IDENTIFIER := tok[1]\n");
 }
 
 TEST(GSSTest, GC) {


        


More information about the cfe-commits mailing list