[clang-tools-extra] 93bcff8 - [pseudo] Invert rows/columns of LRTable storage for speedup. NFC
Sam McCall via cfe-commits
cfe-commits at lists.llvm.org
Wed Jun 8 14:35:21 PDT 2022
Author: Sam McCall
Date: 2022-06-08T23:35:14+02:00
New Revision: 93bcff8aa85512f2b24e822d857b426c31b6d051
URL: https://github.com/llvm/llvm-project/commit/93bcff8aa85512f2b24e822d857b426c31b6d051
DIFF: https://github.com/llvm/llvm-project/commit/93bcff8aa85512f2b24e822d857b426c31b6d051.diff
LOG: [pseudo] Invert rows/columns of LRTable storage for speedup. NFC
There are more states than symbols.
This means first partioning the action list by state leaves us with a smaller
range to binary search over. This improves find() a lot and glrParse() by 7%.
The tradeoff is storing more smaller ranges increases the size of the offsets
array, overall grammar memory is +1% (337->340KB).
Before:
glrParse 188795975 ns 188778003 ns 77 bytes_per_second=1.98068M/s
After:
glrParse 175936203 ns 175916873 ns 81 bytes_per_second=2.12548M/s
Differential Revision: https://reviews.llvm.org/D127006
Added:
Modified:
clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h
clang-tools-extra/pseudo/lib/grammar/LRTable.cpp
clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
clang-tools-extra/pseudo/tool/ClangPseudo.cpp
Removed:
################################################################################
diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h b/clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h
index 735ef3b781ad4..3cff9aec8c5eb 100644
--- a/clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h
+++ b/clang-tools-extra/pseudo/include/clang-pseudo/LRTable.h
@@ -145,9 +145,8 @@ class LRTable {
size_t bytes() const {
return sizeof(*this) + Actions.capacity() * sizeof(Action) +
- States.capacity() * sizeof(StateID) +
- NontermOffset.capacity() * sizeof(uint32_t) +
- TerminalOffset.capacity() * sizeof(uint32_t);
+ Symbols.capacity() * sizeof(SymbolID) +
+ StateOffset.capacity() * sizeof(uint32_t);
}
std::string dumpStatistics() const;
@@ -170,17 +169,15 @@ class LRTable {
// Conceptually the LR table is a multimap from (State, SymbolID) => Action.
// Our physical representation is quite
diff erent for compactness.
- // Index is nonterminal SymbolID, value is the offset into States/Actions
- // where the entries for this nonterminal begin.
- // Give a nonterminal id, the corresponding half-open range of StateIdx is
- // [NontermIdx[id], NontermIdx[id+1]).
- std::vector<uint32_t> NontermOffset;
- // Similar to NontermOffset, but for terminals, index is tok::TokenKind.
- std::vector<uint32_t> TerminalOffset;
- // Parallel to Actions, the value is State (rows of the matrix).
- // Grouped by the SymbolID, and only subranges are sorted.
- std::vector<StateID> States;
- // A flat list of available actions, sorted by (SymbolID, State).
+ // Index is StateID, value is the offset into Symbols/Actions
+ // where the entries for this state begin.
+ // Give a state id, the corresponding half-open range of Symbols/Actions is
+ // [StateOffset[id], StateOffset[id+1]).
+ std::vector<uint32_t> StateOffset;
+ // Parallel to Actions, the value is SymbolID (columns of the matrix).
+ // Grouped by the StateID, and only subranges are sorted.
+ std::vector<SymbolID> Symbols;
+ // A flat list of available actions, sorted by (State, SymbolID).
std::vector<Action> Actions;
// A sorted table, storing the start state for each target parsing symbol.
std::vector<std::pair<SymbolID, StateID>> StartStates;
diff --git a/clang-tools-extra/pseudo/lib/grammar/LRTable.cpp b/clang-tools-extra/pseudo/lib/grammar/LRTable.cpp
index 745ad44bafa6c..74e4fb0fedb53 100644
--- a/clang-tools-extra/pseudo/lib/grammar/LRTable.cpp
+++ b/clang-tools-extra/pseudo/lib/grammar/LRTable.cpp
@@ -34,27 +34,20 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const LRTable::Action &A) {
}
std::string LRTable::dumpStatistics() const {
- StateID NumOfStates = 0;
- for (StateID It : States)
- NumOfStates = std::max(It, NumOfStates);
return llvm::formatv(R"(
Statistics of the LR parsing table:
number of states: {0}
number of actions: {1}
size of the table (bytes): {2}
)",
- NumOfStates, Actions.size(), bytes())
+ StateOffset.size() - 1, Actions.size(), bytes())
.str();
}
std::string LRTable::dumpForTests(const Grammar &G) const {
std::string Result;
llvm::raw_string_ostream OS(Result);
- StateID MaxState = 0;
- for (StateID It : States)
- MaxState = std::max(MaxState, It);
- OS << "LRTable:\n";
- for (StateID S = 0; S <= MaxState; ++S) {
+ for (StateID S = 0; S < StateOffset.size() - 1; ++S) {
OS << llvm::formatv("State {0}\n", S);
for (uint16_t Terminal = 0; Terminal < NumTerminals; ++Terminal) {
SymbolID TokID = tokenSymbol(static_cast<tok::TokenKind>(Terminal));
@@ -97,26 +90,22 @@ LRTable::StateID LRTable::getGoToState(StateID State,
}
llvm::ArrayRef<LRTable::Action> LRTable::find(StateID Src, SymbolID ID) const {
- size_t Idx = isToken(ID) ? static_cast<size_t>(symbolToToken(ID)) : ID;
- assert(isToken(ID) ? Idx + 1 < TerminalOffset.size()
- : Idx + 1 < NontermOffset.size());
- std::pair<size_t, size_t> TargetStateRange =
- isToken(ID) ? std::make_pair(TerminalOffset[Idx], TerminalOffset[Idx + 1])
- : std::make_pair(NontermOffset[Idx], NontermOffset[Idx + 1]);
- auto TargetedStates =
- llvm::makeArrayRef(States.data() + TargetStateRange.first,
- States.data() + TargetStateRange.second);
+ assert(Src + 1 < StateOffset.size());
+ std::pair<size_t, size_t> Range =
+ std::make_pair(StateOffset[Src], StateOffset[Src + 1]);
+ auto SymbolRange = llvm::makeArrayRef(Symbols.data() + Range.first,
+ Symbols.data() + Range.second);
- assert(llvm::is_sorted(TargetedStates) &&
- "subrange of the StateIdx should be sorted!");
- const LRTable::StateID *Start = llvm::partition_point(
- TargetedStates, [&Src](LRTable::StateID S) { return S < Src; });
- if (Start == TargetedStates.end())
+ assert(llvm::is_sorted(SymbolRange) &&
+ "subrange of the Symbols should be sorted!");
+ const LRTable::StateID *Start =
+ llvm::partition_point(SymbolRange, [&ID](SymbolID S) { return S < ID; });
+ if (Start == SymbolRange.end())
return {};
const LRTable::StateID *End = Start;
- while (End != TargetedStates.end() && *End == Src)
+ while (End != SymbolRange.end() && *End == ID)
++End;
- return llvm::makeArrayRef(&Actions[Start - States.data()],
+ return llvm::makeArrayRef(&Actions[Start - Symbols.data()],
/*length=*/End - Start);
}
diff --git a/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp b/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
index 827eb986ab226..f59c8736fbec5 100644
--- a/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
+++ b/clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp
@@ -44,7 +44,7 @@ class LRTable::Builder {
: StartStates(StartStates) {}
bool insert(Entry E) { return Entries.insert(std::move(E)).second; }
- LRTable build(const GrammarTable >) && {
+ LRTable build(const GrammarTable >, unsigned NumStates) && {
// E.g. given the following parsing table with 3 states and 3 terminals:
//
// a b c
@@ -55,44 +55,34 @@ class LRTable::Builder {
// +-------+----+-------+-+
//
// The final LRTable:
- // - TerminalOffset: [a] = 0, [b] = 1, [c] = 4, [d] = 4 (d is a sentinel)
- // - States: [ 1, 0, 0, 2]
- // Actions: [ acc, s0, r0, r1]
- // ~~~ corresponding range for terminal a
- // ~~~~~~~~~~ corresponding range for terminal b
- // First step, we sort all entries by (Symbol, State, Action).
+ // - StateOffset: [s0] = 0, [s1] = 2, [s2] = 3, [sentinel] = 4
+ // - Symbols: [ b, b, a, b]
+ // Actions: [ s0, r0, acc, r1]
+ // ~~~~~~ range for state 0
+ // ~~~~ range for state 1
+ // ~~ range for state 2
+ // First step, we sort all entries by (State, Symbol, Action).
std::vector<Entry> Sorted(Entries.begin(), Entries.end());
llvm::sort(Sorted, [](const Entry &L, const Entry &R) {
- return std::forward_as_tuple(L.Symbol, L.State, L.Act.opaque()) <
- std::forward_as_tuple(R.Symbol, R.State, R.Act.opaque());
+ return std::forward_as_tuple(L.State, L.Symbol, L.Act.opaque()) <
+ std::forward_as_tuple(R.State, R.Symbol, R.Act.opaque());
});
LRTable Table;
Table.Actions.reserve(Sorted.size());
- Table.States.reserve(Sorted.size());
+ Table.Symbols.reserve(Sorted.size());
// We are good to finalize the States and Actions.
for (const auto &E : Sorted) {
Table.Actions.push_back(E.Act);
- Table.States.push_back(E.State);
+ Table.Symbols.push_back(E.Symbol);
}
// Initialize the terminal and nonterminal offset, all ranges are empty by
// default.
- Table.TerminalOffset = std::vector<uint32_t>(GT.Terminals.size() + 1, 0);
- Table.NontermOffset = std::vector<uint32_t>(GT.Nonterminals.size() + 1, 0);
+ Table.StateOffset = std::vector<uint32_t>(NumStates + 1, 0);
size_t SortedIndex = 0;
- for (SymbolID NonterminalID = 0; NonterminalID < Table.NontermOffset.size();
- ++NonterminalID) {
- Table.NontermOffset[NonterminalID] = SortedIndex;
- while (SortedIndex < Sorted.size() &&
- Sorted[SortedIndex].Symbol == NonterminalID)
- ++SortedIndex;
- }
- for (size_t Terminal = 0; Terminal < Table.TerminalOffset.size();
- ++Terminal) {
- Table.TerminalOffset[Terminal] = SortedIndex;
- while (SortedIndex < Sorted.size() &&
- Sorted[SortedIndex].Symbol ==
- tokenSymbol(static_cast<tok::TokenKind>(Terminal)))
+ for (StateID State = 0; State < Table.StateOffset.size(); ++State) {
+ Table.StateOffset[State] = SortedIndex;
+ while (SortedIndex < Sorted.size() && Sorted[SortedIndex].State == State)
++SortedIndex;
}
Table.StartStates = std::move(StartStates);
@@ -106,10 +96,13 @@ class LRTable::Builder {
LRTable LRTable::buildForTests(const GrammarTable >,
llvm::ArrayRef<Entry> Entries) {
+ StateID MaxState = 0;
+ for (const auto &Entry : Entries)
+ MaxState = std::max(MaxState, Entry.State);
Builder Build({});
for (const Entry &E : Entries)
Build.insert(E);
- return std::move(Build).build(GT);
+ return std::move(Build).build(GT, /*NumStates=*/MaxState + 1);
}
LRTable LRTable::buildSLR(const Grammar &G) {
@@ -139,7 +132,7 @@ LRTable LRTable::buildSLR(const Grammar &G) {
}
}
}
- return std::move(Build).build(G.table());
+ return std::move(Build).build(G.table(), Graph.states().size());
}
} // namespace pseudo
diff --git a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp
index 1d3ab19b3c09d..cbb45504f40c8 100644
--- a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp
+++ b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp
@@ -111,6 +111,8 @@ int main(int argc, char *argv[]) {
auto LRTable = clang::pseudo::LRTable::buildSLR(*G);
if (PrintTable)
llvm::outs() << LRTable.dumpForTests(*G);
+ if (PrintStatistics)
+ llvm::outs() << LRTable.dumpStatistics();
if (ParseableStream) {
clang::pseudo::ForestArena Arena;
More information about the cfe-commits
mailing list