[clang-tools-extra] 7aff663 - [pseudo] Store reduction sequences by pointer in heaps, instead of by value.
Sam McCall via cfe-commits
cfe-commits at lists.llvm.org
Thu Jun 23 10:41:30 PDT 2022
Author: Sam McCall
Date: 2022-06-23T19:41:11+02:00
New Revision: 7aff663b2a04f6bea0732ed3b11e10466affb2ac
URL: https://github.com/llvm/llvm-project/commit/7aff663b2a04f6bea0732ed3b11e10466affb2ac
DIFF: https://github.com/llvm/llvm-project/commit/7aff663b2a04f6bea0732ed3b11e10466affb2ac.diff
LOG: [pseudo] Store reduction sequences by pointer in heaps, instead of by value.
Copying sequences around as the heap resized is significantly expensive.
This speeds up glrParse by ~35% (2.4 => 3.25 MB/s)
Differential Revision: https://reviews.llvm.org/D128307
Added:
Modified:
clang-tools-extra/pseudo/lib/GLR.cpp
Removed:
################################################################################
diff --git a/clang-tools-extra/pseudo/lib/GLR.cpp b/clang-tools-extra/pseudo/lib/GLR.cpp
index f83b99964561..447878351003 100644
--- a/clang-tools-extra/pseudo/lib/GLR.cpp
+++ b/clang-tools-extra/pseudo/lib/GLR.cpp
@@ -192,13 +192,32 @@ class GLRReduce {
};
// A sequence is the ForestNode payloads of the GSS nodes we are reducing.
- // These are the RHS of the rule, the RuleID is stored in the Family.
- // They specify a sequence ForestNode we may build (but we dedup first).
using Sequence = llvm::SmallVector<const ForestNode *, Rule::MaxElements>;
+ // Like ArrayRef<const ForestNode*>, but with the missing operator<.
+ // (Sequences are big to move by value as the collections gets rearranged).
+ struct SequenceRef {
+ SequenceRef(const Sequence &S) : S(S) {}
+ llvm::ArrayRef<const ForestNode *> S;
+ friend bool operator==(SequenceRef A, SequenceRef B) { return A.S == B.S; }
+ friend bool operator<(const SequenceRef &A, const SequenceRef &B) {
+ return std::lexicographical_compare(A.S.begin(), A.S.end(), B.S.begin(),
+ B.S.end());
+ }
+ };
+ // Underlying storage for sequences pointed to by stored SequenceRefs.
+ std::deque<Sequence> SequenceStorage;
+ // We don't actually destroy the sequences between calls, to reuse storage.
+ // Everything SequenceStorage[ >=SequenceStorageCount ] is reusable scratch.
+ unsigned SequenceStorageCount;
+
+ // Halfway through a reduction (after the pop, before the push), we have
+ // collected nodes for the RHS of a rule, and reached a base node.
+ // They specify a sequence ForestNode we may build (but we dedup first).
+ // (The RuleID is not stored here, but rather in the Family).
struct PushSpec {
// A base node is the head after popping the GSS nodes we are reducing.
const GSS::Node* Base = nullptr;
- Sequence Seq;
+ Sequence *Seq = nullptr;
};
KeyedQueue<Family, PushSpec> Sequences; // FIXME: rename => PendingPushes?
@@ -219,6 +238,7 @@ class GLRReduce {
this->Heads = &Heads;
this->Lookahead = Lookahead;
assert(Sequences.empty());
+ SequenceStorageCount = 0;
popPending();
while (!Sequences.empty()) {
@@ -239,7 +259,14 @@ class GLRReduce {
if (I == Rule.Size) {
F.Start = TempSequence.front()->startTokenIndex();
LLVM_DEBUG(llvm::dbgs() << " --> base at S" << N->State << "\n");
- Sequences.emplace(F, PushSpec{N, TempSequence});
+
+ // Copy the chain to stable storage so it can be enqueued.
+ if (SequenceStorageCount == SequenceStorage.size())
+ SequenceStorage.emplace_back();
+ SequenceStorage[SequenceStorageCount] = TempSequence;
+ Sequence *Seq = &SequenceStorage[SequenceStorageCount++];
+
+ Sequences.emplace(F, PushSpec{N, Seq});
return;
}
TempSequence[Rule.Size - 1 - I] = N->Payload;
@@ -266,7 +293,7 @@ class GLRReduce {
// Storage reused by each call to pushNext.
std::vector<std::pair</*Goto*/ StateID, const GSS::Node *>> FamilyBases;
- std::vector<std::pair<RuleID, Sequence>> FamilySequences;
+ std::vector<std::pair<RuleID, SequenceRef>> FamilySequences;
std::vector<const GSS::Node *> Parents;
std::vector<const ForestNode *> SequenceNodes;
@@ -287,7 +314,7 @@ class GLRReduce {
FamilyBases.clear();
do {
FamilySequences.emplace_back(Sequences.top().first.Rule,
- Sequences.top().second.Seq);
+ *Sequences.top().second.Seq);
FamilyBases.emplace_back(
Params.Table.getGoToState(Sequences.top().second.Base->State,
F.Symbol),
@@ -300,7 +327,7 @@ class GLRReduce {
SequenceNodes.clear();
for (const auto &SequenceSpec : FamilySequences)
SequenceNodes.push_back(&Params.Forest.createSequence(
- F.Symbol, SequenceSpec.first, SequenceSpec.second));
+ F.Symbol, SequenceSpec.first, SequenceSpec.second.S));
// Wrap in an ambiguous node if needed.
const ForestNode *Parsed =
SequenceNodes.size() == 1
More information about the cfe-commits
mailing list