[llvm] 8faffa3 - SuffixTree: Don't save entire leaf nodes in advance()
Jessica Paquette via llvm-commits
llvm-commits at lists.llvm.org
Thu May 11 22:12:52 PDT 2023
Author: Jessica Paquette
Date: 2023-05-11T22:11:52-07:00
New Revision: 8faffa3cd3e1fb5b1c57d96b3ab58ff4f695d724
URL: https://github.com/llvm/llvm-project/commit/8faffa3cd3e1fb5b1c57d96b3ab58ff4f695d724
DIFF: https://github.com/llvm/llvm-project/commit/8faffa3cd3e1fb5b1c57d96b3ab58ff4f695d724.diff
LOG: SuffixTree: Don't save entire leaf nodes in advance()
All we need is the suffix indices.
Just store those instead.
Also improve code readability a little while we're here.
Added:
Modified:
llvm/include/llvm/Support/SuffixTree.h
Removed:
################################################################################
diff --git a/llvm/include/llvm/Support/SuffixTree.h b/llvm/include/llvm/Support/SuffixTree.h
index d74b7c4661c3..aff6d9bfded1 100644
--- a/llvm/include/llvm/Support/SuffixTree.h
+++ b/llvm/include/llvm/Support/SuffixTree.h
@@ -164,11 +164,11 @@ class SuffixTree {
N = nullptr;
// Each leaf node represents a repeat of a string.
- SmallVector<SuffixTreeLeafNode *> LeafChildren;
+ SmallVector<unsigned> RepeatedSubstringStarts;
// Continue visiting nodes until we find one which repeats more than once.
while (!InternalNodesToVisit.empty()) {
- LeafChildren.clear();
+ RepeatedSubstringStarts.clear();
auto *Curr = InternalNodesToVisit.back();
InternalNodesToVisit.pop_back();
@@ -182,13 +182,17 @@ class SuffixTree {
for (auto &ChildPair : Curr->Children) {
// Save all of this node's children for processing.
if (auto *InternalChild =
- dyn_cast<SuffixTreeInternalNode>(ChildPair.second))
+ dyn_cast<SuffixTreeInternalNode>(ChildPair.second)) {
InternalNodesToVisit.push_back(InternalChild);
+ continue;
+ }
- // It's not an internal node, so it must be a leaf. If we have a
- // long enough string, then save the leaf children.
- else if (Length >= MinLength)
- LeafChildren.push_back(cast<SuffixTreeLeafNode>(ChildPair.second));
+ if (Length < MinLength)
+ continue;
+
+ // Have an occurrence of a potentially repeated string. Save it.
+ auto *Leaf = cast<SuffixTreeLeafNode>(ChildPair.second);
+ RepeatedSubstringStarts.push_back(Leaf->getSuffixIdx());
}
// The root never represents a repeated substring. If we're looking at
@@ -197,14 +201,15 @@ class SuffixTree {
continue;
// Do we have any repeated substrings?
- if (LeafChildren.size() >= 2) {
- // Yes. Update the state to reflect this, and then bail out.
- N = Curr;
- RS.Length = Length;
- for (SuffixTreeLeafNode *Leaf : LeafChildren)
- RS.StartIndices.push_back(Leaf->getSuffixIdx());
- break;
- }
+ if (RepeatedSubstringStarts.size() < 2)
+ continue;
+
+ // Yes. Update the state to reflect this, and then bail out.
+ N = Curr;
+ RS.Length = Length;
+ for (unsigned StartIdx : RepeatedSubstringStarts)
+ RS.StartIndices.push_back(StartIdx);
+ break;
}
// At this point, either NewRS is an empty RepeatedSubstring, or it was
// set in the above loop. Similarly, N is either nullptr, or the node
More information about the llvm-commits
mailing list