[clang-tools-extra] 74e4d5f - [pseudo] Simplify the glrReduce implementation.

Haojian Wu via cfe-commits cfe-commits at lists.llvm.org
Thu Jun 9 02:29:07 PDT 2022


Author: Haojian Wu
Date: 2022-06-09T11:28:52+02:00
New Revision: 74e4d5f256fab1a9a72ef60c1d672441696c52e9

URL: https://github.com/llvm/llvm-project/commit/74e4d5f256fab1a9a72ef60c1d672441696c52e9
DIFF: https://github.com/llvm/llvm-project/commit/74e4d5f256fab1a9a72ef60c1d672441696c52e9.diff

LOG: [pseudo] Simplify the glrReduce implementation.

glrReduce maintains two priority queues (one for bases, and the other
for Sequence), these queues are in parallel with each other, corresponding to a
single family. They can be folded into one.

This patch removes the bases priority queue, which improves the glrParse by
10%.

ASTReader.cpp: 2.03M/s (before) vs 2.26M/s (after)

Differential Revision: https://reviews.llvm.org/D127283

Added: 
    

Modified: 
    clang-tools-extra/pseudo/lib/GLR.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/pseudo/lib/GLR.cpp b/clang-tools-extra/pseudo/lib/GLR.cpp
index 0938d8846fe0..0a81556ef052 100644
--- a/clang-tools-extra/pseudo/lib/GLR.cpp
+++ b/clang-tools-extra/pseudo/lib/GLR.cpp
@@ -279,18 +279,16 @@ void glrReduce(std::vector<ParseStep> &PendingReduce, const ParseParams &Params,
     }
   };
 
-  // The base nodes are the heads after popping the GSS nodes we are reducing.
-  // We don't care which rule yielded each base. If Family.Symbol is S, the
-  // base includes an item X := ... • S ... and since the grammar is
-  // context-free, *all* parses of S are valid here.
-  // FIXME: reuse the queues across calls instead of reallocating.
-  KeyedQueue<Family, const GSS::Node *> Bases;
-
   // A sequence is the ForestNode payloads of the GSS nodes we are reducing.
   // These are the RHS of the rule, the RuleID is stored in the Family.
   // They specify a sequence ForestNode we may build (but we dedup first).
   using Sequence = llvm::SmallVector<const ForestNode *, Rule::MaxElements>;
-  KeyedQueue<Family, Sequence> Sequences;
+  struct PushSpec {
+    // A base node is the head after popping the GSS nodes we are reducing.
+    const GSS::Node* Base = nullptr;
+    Sequence Seq;
+  };
+  KeyedQueue<Family, PushSpec> Sequences;
 
   Sequence TempSequence;
   // Pop walks up the parent chain(s) for a reduction from Head by to Rule.
@@ -303,9 +301,8 @@ void glrReduce(std::vector<ParseStep> &PendingReduce, const ParseParams &Params,
     auto DFS = [&](const GSS::Node *N, unsigned I, auto &DFS) {
       if (I == Rule.Size) {
         F.Start = TempSequence.front()->startTokenIndex();
-        Bases.emplace(F, N);
         LLVM_DEBUG(llvm::dbgs() << "    --> base at S" << N->State << "\n");
-        Sequences.emplace(F, TempSequence);
+        Sequences.emplace(F, PushSpec{N, TempSequence});
         return;
       }
       TempSequence[Rule.Size - 1 - I] = N->Payload;
@@ -331,20 +328,26 @@ void glrReduce(std::vector<ParseStep> &PendingReduce, const ParseParams &Params,
   //  - process one family at a time, forming a forest node
   //  - produces new GSS heads which may enable more pops
   PopPending();
-  while (!Bases.empty()) {
-    // We should always have bases and sequences for the same families.
-    Family F = Bases.top().first;
-    assert(!Sequences.empty());
-    assert(Sequences.top().first == F);
+  while (!Sequences.empty()) {
+    Family F = Sequences.top().first;
 
     LLVM_DEBUG(llvm::dbgs() << "  Push " << Params.G.symbolName(F.Symbol)
                             << " from token " << F.Start << "\n");
 
-    // Grab the sequences for this family.
+    // Grab the sequences and bases for this family.
+    // We don't care which rule yielded each base. If Family.Symbol is S, the
+    // base includes an item X := ... • S ... and since the grammar is
+    // context-free, *all* parses of S are valid here.
     FamilySequences.clear();
+    FamilyBases.clear();
     do {
       FamilySequences.emplace_back(Sequences.top().first.Rule,
-                                   Sequences.top().second);
+                                   Sequences.top().second.Seq);
+      FamilyBases.emplace_back(
+          Params.Table.getGoToState(Sequences.top().second.Base->State,
+                                    F.Symbol),
+          Sequences.top().second.Base);
+
       Sequences.pop();
     } while (!Sequences.empty() && Sequences.top().first == F);
     // Build a forest node for each unique sequence.
@@ -361,15 +364,7 @@ void glrReduce(std::vector<ParseStep> &PendingReduce, const ParseParams &Params,
             : &Params.Forest.createAmbiguous(F.Symbol, SequenceNodes);
     LLVM_DEBUG(llvm::dbgs() << "    --> " << Parsed->dump(Params.G) << "\n");
 
-    // Grab the bases for this family.
-    // As well as deduplicating them, we'll group by the goto state.
-    FamilyBases.clear();
-    do {
-      FamilyBases.emplace_back(
-          Params.Table.getGoToState(Bases.top().second->State, F.Symbol),
-          Bases.top().second);
-      Bases.pop();
-    } while (!Bases.empty() && Bases.top().first == F);
+    // Bases for this family, deduplicate them, and group by the goTo State.
     sortAndUnique(FamilyBases);
     // Create a GSS node for each unique goto state.
     llvm::ArrayRef<decltype(FamilyBases)::value_type> BasesLeft = FamilyBases;


        


More information about the cfe-commits mailing list