[clang-tools-extra] 1a65c49 - [pseudo] Support parsing variant target symbols.

Haojian Wu via cfe-commits cfe-commits at lists.llvm.org
Mon May 16 01:38:25 PDT 2022


Author: Haojian Wu
Date: 2022-05-16T10:38:16+02:00
New Revision: 1a65c491be712f89b36af9d828b6ccce02de37db

URL: https://github.com/llvm/llvm-project/commit/1a65c491be712f89b36af9d828b6ccce02de37db
DIFF: https://github.com/llvm/llvm-project/commit/1a65c491be712f89b36af9d828b6ccce02de37db.diff

LOG: [pseudo] Support parsing variant target symbols.

With this patch, we're able to parse smaller chunks of C++ code (statement,
declaration), rather than translation-unit.

The start symbol is listed in the grammar in a form of `_ :=
statement`, each start symbol has a dedicated state (`_ := • statement`).
We create and track all these separate states in the LRTable. When we
start parsing, we lookup the corresponding state to start the parser.

LR pasing table changes with this patch:
- number of states: 1467 -> 1471
- number of actions: 82891 -> 83578
- size of the table (bytes): 334248 -> 336996

Differential Revision: https://reviews.llvm.org/D125006

Added: 
    clang-tools-extra/pseudo/test/glr-variant-start.cpp

Modified: 
    clang-tools-extra/pseudo/benchmarks/Benchmark.cpp
    clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
    clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
    clang-tools-extra/pseudo/include/clang-pseudo/Grammar.h
    clang-tools-extra/pseudo/include/clang-pseudo/LRGraph.h
    clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h
    clang-tools-extra/pseudo/lib/GLR.cpp
    clang-tools-extra/pseudo/lib/Grammar.cpp
    clang-tools-extra/pseudo/lib/LRGraph.cpp
    clang-tools-extra/pseudo/lib/LRTable.cpp
    clang-tools-extra/pseudo/lib/LRTableBuild.cpp
    clang-tools-extra/pseudo/lib/cxx.bnf
    clang-tools-extra/pseudo/tool/ClangPseudo.cpp
    clang-tools-extra/pseudo/unittests/GLRTest.cpp
    clang-tools-extra/pseudo/unittests/GrammarTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/pseudo/benchmarks/Benchmark.cpp b/clang-tools-extra/pseudo/benchmarks/Benchmark.cpp
index 9568f76774af2..5518a51166593 100644
--- a/clang-tools-extra/pseudo/benchmarks/Benchmark.cpp
+++ b/clang-tools-extra/pseudo/benchmarks/Benchmark.cpp
@@ -103,10 +103,11 @@ static void runGLRParse(benchmark::State &State) {
   clang::LangOptions LangOpts = genericLangOpts();
   LRTable Table = clang::pseudo::LRTable::buildSLR(*G);
   TokenStream ParseableStream = parseableTokenStream();
+  SymbolID StartSymbol = *G->findNonterminal("translation-unit");
   for (auto _ : State) {
     pseudo::ForestArena Forest;
     pseudo::GSS GSS;
-    glrParse(ParseableStream, ParseParams{*G, Table, Forest, GSS});
+    glrParse(ParseableStream, ParseParams{*G, Table, Forest, GSS}, StartSymbol);
   }
   State.SetBytesProcessed(static_cast<uint64_t>(State.iterations()) *
                           SourceText->size());
@@ -116,10 +117,12 @@ BENCHMARK(runGLRParse);
 static void runParseOverall(benchmark::State &State) {
   clang::LangOptions LangOpts = genericLangOpts();
   LRTable Table = clang::pseudo::LRTable::buildSLR(*G);
+  SymbolID StartSymbol = *G->findNonterminal("translation-unit");
   for (auto _ : State) {
     pseudo::ForestArena Forest;
     pseudo::GSS GSS;
-    glrParse(parseableTokenStream(), ParseParams{*G, Table, Forest, GSS});
+    glrParse(parseableTokenStream(), ParseParams{*G, Table, Forest, GSS},
+             StartSymbol);
   }
   State.SetBytesProcessed(static_cast<uint64_t>(State.iterations()) *
                           SourceText->size());

diff  --git a/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp b/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
index 4907fc9f9c049..7b59eec30dbdf 100644
--- a/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
+++ b/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
@@ -58,8 +58,9 @@ class Fuzzer {
 
     clang::pseudo::ForestArena Arena;
     clang::pseudo::GSS GSS;
-    auto &Root = glrParse(ParseableStream,
-                          clang::pseudo::ParseParams{*G, T, Arena, GSS});
+    auto &Root =
+        glrParse(ParseableStream, clang::pseudo::ParseParams{*G, T, Arena, GSS},
+                 *G->findNonterminal("translation-unit"));
     if (Print)
       llvm::outs() << Root.dumpRecursive(*G);
   }

diff  --git a/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h b/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
index f4eb424dd53fa..70d6e46e5aed3 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/GLR.h
@@ -122,13 +122,14 @@ struct ParseParams {
   ForestArena &Forest;  // Storage for the output forest.
   GSS &GSStack;         // Storage for parsing stacks.
 };
-// Parse the given token stream with the GLR algorithm, and return a forest node
-// of the start symbol.
+// Parses the given token stream as the start symbol with the GLR algorithm,
+// and returns a forest node of the start symbol.
 //
-// If the parsing fails, we model it as an opaque node in the forest.
+// A rule `_ := StartSymbol` must exit for the chosen start symbol.
 //
-// FIXME: add support for variant start symbols.
-const ForestNode &glrParse(const TokenStream &Code, const ParseParams &Params);
+// If the parsing fails, we model it as an opaque node in the forest.
+const ForestNode &glrParse(const TokenStream &Code, const ParseParams &Params,
+                           SymbolID StartSymbol);
 
 // An active stack head can have multiple available actions (reduce/reduce
 // actions, reduce/shift actions).

diff  --git a/clang-tools-extra/pseudo/include/clang-pseudo/Grammar.h b/clang-tools-extra/pseudo/include/clang-pseudo/Grammar.h
index 46e8fcbcafa2c..df757c61a1ede 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/Grammar.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/Grammar.h
@@ -39,6 +39,7 @@
 #include "clang/Basic/TokenKinds.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringRef.h"
 #include <cstdint>
 #include <vector>
@@ -117,8 +118,8 @@ class Grammar {
   static std::unique_ptr<Grammar> parseBNF(llvm::StringRef BNF,
                                            std::vector<std::string> &Diags);
 
-  // Returns the SymbolID of the start symbol '_'.
-  SymbolID startSymbol() const { return StartSymbol; };
+  // Returns the SymbolID of the symbol '_'.
+  SymbolID underscore() const { return Underscore; };
 
   // Returns all rules of the given nonterminal symbol.
   llvm::ArrayRef<Rule> rulesFor(SymbolID SID) const;
@@ -128,6 +129,9 @@ class Grammar {
   // Terminals have names like "," (kw_comma) or "OPERATOR" (kw_operator).
   llvm::StringRef symbolName(SymbolID) const;
 
+  // Lookup the SymbolID of the nonterminal symbol by Name.
+  llvm::Optional<SymbolID> findNonterminal(llvm::StringRef Name) const;
+
   // Dumps the whole grammar.
   std::string dump() const;
   // Dumps a particular rule.
@@ -139,8 +143,9 @@ class Grammar {
 
 private:
   std::unique_ptr<GrammarTable> T;
-  // The start symbol '_' of the augmented grammar.
-  SymbolID StartSymbol;
+  // The symbol ID of '_'. (In the LR literature, this is the start symbol of
+  // the augmented grammar.)
+  SymbolID Underscore;
 };
 // For each nonterminal X, computes the set of terminals that begin strings
 // derived from X. (Known as FIRST sets in grammar-based parsers).

diff  --git a/clang-tools-extra/pseudo/include/clang-pseudo/LRGraph.h b/clang-tools-extra/pseudo/include/clang-pseudo/LRGraph.h
index c357c1ef995f0..889c5e5decf50 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/LRGraph.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/LRGraph.h
@@ -139,15 +139,21 @@ class LRGraph {
 
   llvm::ArrayRef<State> states() const { return States; }
   llvm::ArrayRef<Edge> edges() const { return Edges; }
+  llvm::ArrayRef<std::pair<SymbolID, StateID>> startStates() const {
+    return StartStates;
+  }
 
   std::string dumpForTests(const Grammar &) const;
 
 private:
-  LRGraph(std::vector<State> States, std::vector<Edge> Edges)
-      : States(std::move(States)), Edges(std::move(Edges)) {}
+  LRGraph(std::vector<State> States, std::vector<Edge> Edges,
+          std::vector<std::pair<SymbolID, StateID>> StartStates)
+      : States(std::move(States)), Edges(std::move(Edges)),
+        StartStates(std::move(StartStates)) {}
 
   std::vector<State> States;
   std::vector<Edge> Edges;
+  std::vector<std::pair<SymbolID, StateID>> StartStates;
 };
 
 } // namespace pseudo

diff  --git a/clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h b/clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h
index d41fca0d4b5d2..735ef3b781ad4 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h
@@ -132,6 +132,17 @@ class LRTable {
   // Returns empty if no available actions in the table.
   llvm::ArrayRef<Action> find(StateID State, SymbolID Symbol) const;
 
+  // Returns the state from which the LR parser should start to parse the input
+  // tokens as the given StartSymbol.
+  //
+  // In LR parsing, the start state of `translation-unit` corresponds to
+  // `_ := • translation-unit`.
+  //
+  // Each start state responds to **a** single grammar rule like `_ := start`.
+  // REQUIRE: The given StartSymbol must exist in the grammar (in a form of
+  //          `_ := start`).
+  StateID getStartState(SymbolID StartSymbol) const;
+
   size_t bytes() const {
     return sizeof(*this) + Actions.capacity() * sizeof(Action) +
            States.capacity() * sizeof(StateID) +
@@ -171,6 +182,8 @@ class LRTable {
   std::vector<StateID> States;
   // A flat list of available actions, sorted by (SymbolID, State).
   std::vector<Action> Actions;
+  // A sorted table, storing the start state for each target parsing symbol.
+  std::vector<std::pair<SymbolID, StateID>> StartStates;
 };
 llvm::raw_ostream &operator<<(llvm::raw_ostream &, const LRTable::Action &);
 

diff  --git a/clang-tools-extra/pseudo/lib/GLR.cpp b/clang-tools-extra/pseudo/lib/GLR.cpp
index 771d69fb680dc..cf0a370820e16 100644
--- a/clang-tools-extra/pseudo/lib/GLR.cpp
+++ b/clang-tools-extra/pseudo/lib/GLR.cpp
@@ -36,8 +36,8 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const GSS::Node &N) {
   return OS;
 }
 
-const ForestNode &glrParse(const TokenStream &Tokens,
-                           const ParseParams &Params) {
+const ForestNode &glrParse(const TokenStream &Tokens, const ParseParams &Params,
+                           SymbolID StartSymbol) {
   llvm::ArrayRef<ForestNode> Terminals = Params.Forest.createTerminals(Tokens);
   auto &G = Params.G;
   auto &GSS = Params.GSStack;
@@ -61,9 +61,9 @@ const ForestNode &glrParse(const TokenStream &Tokens,
       }
     }
   };
-
   std::vector<const GSS::Node *> NewHeads = {
-      GSS.addNode(/*State=*/0, /*ForestNode*/ nullptr, {})};
+      GSS.addNode(/*State=*/Params.Table.getStartState(StartSymbol),
+                  /*ForestNode=*/nullptr, {})};
   for (const ForestNode &Terminal : Terminals) {
     LLVM_DEBUG(llvm::dbgs() << llvm::formatv("Next token {0} (id={1})\n",
                                              G.symbolName(Terminal.symbol()),
@@ -101,11 +101,7 @@ const ForestNode &glrParse(const TokenStream &Tokens,
     return *PendingAccept.front().Head->Payload;
   }
   // We failed to parse the input, returning an opaque forest node for recovery.
-  auto RulesForStart = G.rulesFor(G.startSymbol());
-  // FIXME: support multiple start symbols.
-  assert(RulesForStart.size() == 1 && RulesForStart.front().Size == 1 &&
-         "start symbol _ must have exactly one rule");
-  return Params.Forest.createOpaque(RulesForStart.front().Sequence[0], 0);
+  return Params.Forest.createOpaque(StartSymbol, /*Token::Index=*/0);
 }
 
 // Apply all pending shift actions.

diff  --git a/clang-tools-extra/pseudo/lib/Grammar.cpp b/clang-tools-extra/pseudo/lib/Grammar.cpp
index 563390430f524..e048f8753d9d9 100644
--- a/clang-tools-extra/pseudo/lib/Grammar.cpp
+++ b/clang-tools-extra/pseudo/lib/Grammar.cpp
@@ -24,13 +24,7 @@ Rule::Rule(SymbolID Target, llvm::ArrayRef<SymbolID> Sequence)
 }
 
 Grammar::Grammar(std::unique_ptr<GrammarTable> Table) : T(std::move(Table)) {
-  // start symbol is named _, binary search it.
-  auto It = llvm::partition_point(
-      T->Nonterminals,
-      [](const GrammarTable::Nonterminal &X) { return X.Name < "_"; });
-  assert(It != T->Nonterminals.end() && It->Name == "_" &&
-         "symbol _ must exist in the grammar!");
-  StartSymbol = It - T->Nonterminals.begin();
+  Underscore = *findNonterminal("_");
 }
 
 llvm::ArrayRef<Rule> Grammar::rulesFor(SymbolID SID) const {
@@ -51,6 +45,15 @@ llvm::StringRef Grammar::symbolName(SymbolID SID) const {
   return T->Nonterminals[SID].Name;
 }
 
+llvm::Optional<SymbolID> Grammar::findNonterminal(llvm::StringRef Name) const {
+  auto It = llvm::partition_point(
+      T->Nonterminals,
+      [&](const GrammarTable::Nonterminal &X) { return X.Name < Name; });
+  if (It != T->Nonterminals.end() && It->Name == Name)
+    return It - T->Nonterminals.begin();
+  return llvm::None;
+}
+
 std::string Grammar::dumpRule(RuleID RID) const {
   std::string Result;
   llvm::raw_string_ostream OS(Result);
@@ -132,8 +135,9 @@ std::vector<llvm::DenseSet<SymbolID>> followSets(const Grammar &G) {
   // is completed at a fixed point where there is no more new symbols can be
   // added to any of the follow sets.
   //
-  // Rule 1: add endmarker to the FOLLOW(S), where S is the start symbol.
-  FollowSets[G.startSymbol()].insert(tokenSymbol(tok::eof));
+  // Rule 1: add endmarker to the FOLLOW(S), where S is the start symbol of the
+  // augmented grammar, in our case it is '_'.
+  FollowSets[G.underscore()].insert(tokenSymbol(tok::eof));
   bool Changed = true;
   while (Changed) {
     Changed = false;

diff  --git a/clang-tools-extra/pseudo/lib/LRGraph.cpp b/clang-tools-extra/pseudo/lib/LRGraph.cpp
index 9dc0cdbbdab81..a186549a25e9d 100644
--- a/clang-tools-extra/pseudo/lib/LRGraph.cpp
+++ b/clang-tools-extra/pseudo/lib/LRGraph.cpp
@@ -187,10 +187,17 @@ LRGraph LRGraph::buildLR0(const Grammar &G) {
       return States[ID];
     }
 
+    void addStartState(SymbolID Sym, StateID State) {
+      StartStates.push_back({Sym, State});
+    }
+
     LRGraph build() && {
       States.shrink_to_fit();
       Edges.shrink_to_fit();
-      return LRGraph(std::move(States), std::move(Edges));
+      llvm::sort(StartStates);
+      StartStates.shrink_to_fit();
+      return LRGraph(std::move(States), std::move(Edges),
+                     std::move(StartStates));
     }
 
   private:
@@ -199,16 +206,22 @@ LRGraph LRGraph::buildLR0(const Grammar &G) {
     std::vector<State> States;
     std::vector<Edge> Edges;
     const Grammar &G;
+    std::vector<std::pair<SymbolID, StateID>> StartStates;
   } Builder(G);
 
   std::vector<StateID> PendingStates;
   // Initialize states with the start symbol.
-  auto RRange = G.table().Nonterminals[G.startSymbol()].RuleRange;
+  auto RRange = G.table().Nonterminals[G.underscore()].RuleRange;
   for (RuleID RID = RRange.Start; RID < RRange.End; ++RID) {
     auto StartState = std::vector<Item>{Item::start(RID, G)};
     auto Result = Builder.insert(std::move(StartState));
     assert(Result.second && "State must be new");
     PendingStates.push_back(Result.first);
+
+    const Rule &StartRule = G.lookupRule(RID);
+    assert(StartRule.Size == 1 &&
+           "Start rule must have exactly one symbol in its body!");
+    Builder.addStartState(StartRule.seq().front(), Result.first);
   }
 
   while (!PendingStates.empty()) {

diff  --git a/clang-tools-extra/pseudo/lib/LRTable.cpp b/clang-tools-extra/pseudo/lib/LRTable.cpp
index b9ff7e4aca6a9..34e7fce4f55dc 100644
--- a/clang-tools-extra/pseudo/lib/LRTable.cpp
+++ b/clang-tools-extra/pseudo/lib/LRTable.cpp
@@ -120,5 +120,16 @@ llvm::ArrayRef<LRTable::Action> LRTable::find(StateID Src, SymbolID ID) const {
                             /*length=*/End - Start);
 }
 
+LRTable::StateID LRTable::getStartState(SymbolID Target) const {
+  assert(llvm::is_sorted(StartStates) && "StartStates must be sorted!");
+  auto It = llvm::partition_point(
+      StartStates, [Target](const std::pair<SymbolID, StateID> &X) {
+        return X.first < Target;
+      });
+  assert(It != StartStates.end() && It->first == Target &&
+         "target symbol doesn't have a start state!");
+  return It->second;
+}
+
 } // namespace pseudo
 } // namespace clang

diff  --git a/clang-tools-extra/pseudo/lib/LRTableBuild.cpp b/clang-tools-extra/pseudo/lib/LRTableBuild.cpp
index cb4ea698c4bb8..827eb986ab226 100644
--- a/clang-tools-extra/pseudo/lib/LRTableBuild.cpp
+++ b/clang-tools-extra/pseudo/lib/LRTableBuild.cpp
@@ -40,6 +40,9 @@ namespace pseudo {
 
 class LRTable::Builder {
 public:
+  Builder(llvm::ArrayRef<std::pair<SymbolID, StateID>> StartStates)
+      : StartStates(StartStates) {}
+
   bool insert(Entry E) { return Entries.insert(std::move(E)).second; }
   LRTable build(const GrammarTable &GT) && {
     // E.g. given the following parsing table with 3 states and 3 terminals:
@@ -92,24 +95,26 @@ class LRTable::Builder {
                  tokenSymbol(static_cast<tok::TokenKind>(Terminal)))
         ++SortedIndex;
     }
+    Table.StartStates = std::move(StartStates);
     return Table;
   }
 
 private:
   llvm::DenseSet<Entry> Entries;
+  std::vector<std::pair<SymbolID, StateID>> StartStates;
 };
 
 LRTable LRTable::buildForTests(const GrammarTable &GT,
                                llvm::ArrayRef<Entry> Entries) {
-  Builder Build;
+  Builder Build({});
   for (const Entry &E : Entries)
     Build.insert(E);
   return std::move(Build).build(GT);
 }
 
 LRTable LRTable::buildSLR(const Grammar &G) {
-  Builder Build;
   auto Graph = LRGraph::buildLR0(G);
+  Builder Build(Graph.startStates());
   for (const auto &T : Graph.edges()) {
     Action Act = isToken(T.Label) ? Action::shift(T.Dst) : Action::goTo(T.Dst);
     Build.insert({T.Src, T.Label, Act});
@@ -120,7 +125,7 @@ LRTable LRTable::buildSLR(const Grammar &G) {
   for (StateID SID = 0; SID < Graph.states().size(); ++SID) {
     for (const Item &I : Graph.states()[SID].Items) {
       // If we've just parsed the start symbol, we can accept the input.
-      if (G.lookupRule(I.rule()).Target == G.startSymbol() && !I.hasNext()) {
+      if (G.lookupRule(I.rule()).Target == G.underscore() && !I.hasNext()) {
         Build.insert({SID, tokenSymbol(tok::eof), Action::accept(I.rule())});
         continue;
       }

diff  --git a/clang-tools-extra/pseudo/lib/cxx.bnf b/clang-tools-extra/pseudo/lib/cxx.bnf
index cf664b8e13e55..c740f742436dc 100644
--- a/clang-tools-extra/pseudo/lib/cxx.bnf
+++ b/clang-tools-extra/pseudo/lib/cxx.bnf
@@ -23,10 +23,15 @@
 #   - optional symbols are supported, with a _opt suffix;
 #
 # [1] https://isocpp.org/files/papers/N4860.pdf
+
+# _ lists all the start-symbols which we support parsing.
 #
-#
-# _ serves as a "fake" start symbol, coming with real grammar symbols.
+# We list important nonterminals as start symbols, rather than doing it for all
+# nonterminals by default, this reduces the number of states by 30% and LRTable
+# actions by 16%.
 _ := translation-unit
+_ := statement-seq
+_ := declaration-seq
 
 # gram.key
 typedef-name := IDENTIFIER

diff  --git a/clang-tools-extra/pseudo/test/glr-variant-start.cpp b/clang-tools-extra/pseudo/test/glr-variant-start.cpp
new file mode 100644
index 0000000000000..1bd073707353b
--- /dev/null
+++ b/clang-tools-extra/pseudo/test/glr-variant-start.cpp
@@ -0,0 +1,9 @@
+// RUN: clang-pseudo -grammar=%cxx-bnf-file -source=%s --start-symbol=statement-seq --print-forest | FileCheck %s
+
+a + a;
+// CHECK:      statement-seq~expression-statement := expression ;
+// CHECK-NEXT: ├─expression~additive-expression := additive-expression + multiplicative-expression
+// CHECK-NEXT: │ ├─additive-expression~IDENTIFIER :=
+// CHECK-NEXT: │ ├─+ :=
+// CHECK-NEXT: │ └─multiplicative-expression~IDENTIFIER :=
+// CHECK-NEXT: └─; :=

diff  --git a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp
index 47a0b2a10ae7e..5a6956df1f708 100644
--- a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp
+++ b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp
@@ -43,6 +43,9 @@ static opt<bool>
                     desc("Strip directives and select conditional sections"));
 static opt<bool> PrintStatistics("print-statistics", desc("Print GLR parser statistics"));
 static opt<bool> PrintForest("print-forest", desc("Print parse forest"));
+static opt<std::string> StartSymbol("start-symbol",
+                                    desc("specify the start symbol to parse"),
+                                    init("translation-unit"));
 
 static std::string readOrDie(llvm::StringRef Path) {
   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
@@ -110,9 +113,16 @@ int main(int argc, char *argv[]) {
     if (ParseableStream) {
       clang::pseudo::ForestArena Arena;
       clang::pseudo::GSS GSS;
-      auto &Root =
-          glrParse(*ParseableStream,
-                   clang::pseudo::ParseParams{*G, LRTable, Arena, GSS});
+      llvm::Optional<clang::pseudo::SymbolID> StartSymID =
+          G->findNonterminal(StartSymbol);
+      if (!StartSymID) {
+        llvm::errs() << llvm::formatv(
+            "The start symbol {0} doesn't exit in the grammar!\n", Grammar);
+        return 2;
+      }
+      auto &Root = glrParse(*ParseableStream,
+                            clang::pseudo::ParseParams{*G, LRTable, Arena, GSS},
+                            *StartSymID);
       if (PrintForest)
         llvm::outs() << Root.dumpRecursive(*G, /*Abbreviated=*/true);
 

diff  --git a/clang-tools-extra/pseudo/unittests/GLRTest.cpp b/clang-tools-extra/pseudo/unittests/GLRTest.cpp
index 1998c27dd90a5..f885094111a6c 100644
--- a/clang-tools-extra/pseudo/unittests/GLRTest.cpp
+++ b/clang-tools-extra/pseudo/unittests/GLRTest.cpp
@@ -344,7 +344,8 @@ TEST_F(GLRTest, PerfectForestNodeSharing) {
   const TokenStream &Tokens = cook(lex("{ abc", LOptions), LOptions);
   auto LRTable = LRTable::buildSLR(*G);
 
-  const ForestNode &Parsed = glrParse(Tokens, {*G, LRTable, Arena, GSStack});
+  const ForestNode &Parsed =
+      glrParse(Tokens, {*G, LRTable, Arena, GSStack}, id("test"));
   // Verify that there is no duplicated sequence node of `expr := IDENTIFIER`
   // in the forest, see the `#1` and `=#1` in the dump string.
   EXPECT_EQ(Parsed.dumpRecursive(*G),
@@ -381,7 +382,8 @@ TEST_F(GLRTest, GLRReduceOrder) {
   const TokenStream &Tokens = cook(lex("IDENTIFIER", LOptions), LOptions);
   auto LRTable = LRTable::buildSLR(*G);
 
-  const ForestNode &Parsed = glrParse(Tokens, {*G, LRTable, Arena, GSStack});
+  const ForestNode &Parsed =
+      glrParse(Tokens, {*G, LRTable, Arena, GSStack}, id("test"));
   EXPECT_EQ(Parsed.dumpRecursive(*G),
             "[  0, end) test := <ambiguous>\n"
             "[  0, end) ├─test := IDENTIFIER\n"

diff  --git a/clang-tools-extra/pseudo/unittests/GrammarTest.cpp b/clang-tools-extra/pseudo/unittests/GrammarTest.cpp
index 19e9481b7ed15..0f71c47019058 100644
--- a/clang-tools-extra/pseudo/unittests/GrammarTest.cpp
+++ b/clang-tools-extra/pseudo/unittests/GrammarTest.cpp
@@ -112,7 +112,7 @@ TEST_F(GrammarTest, Diagnostics) {
     b := a
   )cpp");
 
-  EXPECT_EQ(G->startSymbol(), id("_"));
+  EXPECT_EQ(G->underscore(), id("_"));
   EXPECT_THAT(Diags, UnorderedElementsAre(
                          "Rule '_ := ,_opt' has a nullable RHS",
                          "Rule 'null := ' has a nullable RHS",


        


More information about the cfe-commits mailing list