[PATCH] D128307: [pseudo] Store reduction sequences by pointer in heaps, instead of by value.

Sam McCall via Phabricator via cfe-commits cfe-commits at lists.llvm.org
Tue Jun 21 15:20:49 PDT 2022


sammccall created this revision.
sammccall added a reviewer: hokein.
Herald added a project: All.
sammccall requested review of this revision.
Herald added subscribers: cfe-commits, alextsao1999.
Herald added a project: clang-tools-extra.

Copying sequences around as the heap resized is significantly expensive.

This speeds up glrParse by ~35% (2.4 => 3.25 MB/s)


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D128307

Files:
  clang-tools-extra/pseudo/lib/GLR.cpp


Index: clang-tools-extra/pseudo/lib/GLR.cpp
===================================================================
--- clang-tools-extra/pseudo/lib/GLR.cpp
+++ clang-tools-extra/pseudo/lib/GLR.cpp
@@ -192,13 +192,32 @@
   };
 
   // A sequence is the ForestNode payloads of the GSS nodes we are reducing.
-  // These are the RHS of the rule, the RuleID is stored in the Family.
-  // They specify a sequence ForestNode we may build (but we dedup first).
   using Sequence = llvm::SmallVector<const ForestNode *, Rule::MaxElements>;
+  // Like ArrayRef<const ForestNode*>, but with the missing operator<.
+  // (Sequences are big to move by value as the collections gets rearranged).
+  struct SequenceRef {
+    SequenceRef(const Sequence &S) : S(S) {}
+    llvm::ArrayRef<const ForestNode *> S;
+    friend bool operator==(SequenceRef A, SequenceRef B) { return A.S == B.S; }
+    friend bool operator<(const SequenceRef &A, const SequenceRef &B) {
+      return std::lexicographical_compare(A.S.begin(), A.S.end(), B.S.begin(),
+                                          B.S.end());
+    }
+  };
+  // Underlying storage for sequences pointed to by stored SequenceRefs.
+  std::deque<Sequence> SequenceStorage;
+  // We don't actually destroy the sequences between calls, to reuse storage.
+  // Everything SequenceStorage[ >=SequenceStorageCount ] is reusable scratch.
+  unsigned SequenceStorageCount;
+
+  // Halfway through a reduction (after the pop, before the push), we have
+  // collected nodes for the RHS of a rule, and reached a base node.
+  // They specify a sequence ForestNode we may build (but we dedup first).
+  // (The RuleID is not stored here, but rather in the Family).
   struct PushSpec {
     // A base node is the head after popping the GSS nodes we are reducing.
     const GSS::Node* Base = nullptr;
-    Sequence Seq;
+    Sequence *Seq;
   };
   KeyedQueue<Family, PushSpec> Sequences; // FIXME: rename => PendingPushes?
 
@@ -220,6 +239,7 @@
     this->Heads = &Heads;
     this->Lookahead = Lookahead;
     assert(Sequences.empty());
+    SequenceStorageCount = 0;
 
     popPending();
     while (!Sequences.empty()) {
@@ -240,7 +260,14 @@
       if (I == Rule.Size) {
         F.Start = TempSequence.front()->startTokenIndex();
         LLVM_DEBUG(llvm::dbgs() << "    --> base at S" << N->State << "\n");
-        Sequences.emplace(F, PushSpec{N, TempSequence});
+
+        // Copy the chain to stable storage so it can be enqueued.
+        if (SequenceStorageCount == SequenceStorage.size())
+          SequenceStorage.emplace_back();
+        SequenceStorage[SequenceStorageCount] = TempSequence;
+        Sequence *Seq = &SequenceStorage[SequenceStorageCount++];
+
+        Sequences.emplace(F, PushSpec{N, Seq});
         return;
       }
       TempSequence[Rule.Size - 1 - I] = N->Payload;
@@ -266,7 +293,7 @@
 
   // Storage reused by each call to pushNext.
   std::vector<std::pair</*Goto*/ StateID, const GSS::Node *>> FamilyBases;
-  std::vector<std::pair<RuleID, Sequence>> FamilySequences;
+  std::vector<std::pair<RuleID, SequenceRef>> FamilySequences;
   std::vector<const GSS::Node *> Parents;
   std::vector<const ForestNode *> SequenceNodes;
 
@@ -287,7 +314,7 @@
     FamilyBases.clear();
     do {
       FamilySequences.emplace_back(Sequences.top().first.Rule,
-                                   Sequences.top().second.Seq);
+                                   *Sequences.top().second.Seq); // XXX
       FamilyBases.emplace_back(
           Params.Table.getGoToState(Sequences.top().second.Base->State,
                                     F.Symbol),
@@ -300,7 +327,7 @@
     SequenceNodes.clear();
     for (const auto &SequenceSpec : FamilySequences)
       SequenceNodes.push_back(&Params.Forest.createSequence(
-          F.Symbol, SequenceSpec.first, SequenceSpec.second));
+          F.Symbol, SequenceSpec.first, SequenceSpec.second.S));
     // Wrap in an ambiguous node if needed.
     const ForestNode *Parsed =
         SequenceNodes.size() == 1


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D128307.438851.patch
Type: text/x-patch
Size: 4015 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20220621/edc11e1d/attachment.bin>


More information about the cfe-commits mailing list