[llvm] 8faffa3 - SuffixTree: Don't save entire leaf nodes in advance()

Jessica Paquette via llvm-commits llvm-commits at lists.llvm.org
Thu May 11 22:12:52 PDT 2023


Author: Jessica Paquette
Date: 2023-05-11T22:11:52-07:00
New Revision: 8faffa3cd3e1fb5b1c57d96b3ab58ff4f695d724

URL: https://github.com/llvm/llvm-project/commit/8faffa3cd3e1fb5b1c57d96b3ab58ff4f695d724
DIFF: https://github.com/llvm/llvm-project/commit/8faffa3cd3e1fb5b1c57d96b3ab58ff4f695d724.diff

LOG: SuffixTree: Don't save entire leaf nodes in advance()

All we need is the suffix indices.

Just store those instead.

Also improve code readability a little while we're here.

Added: 
    

Modified: 
    llvm/include/llvm/Support/SuffixTree.h

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Support/SuffixTree.h b/llvm/include/llvm/Support/SuffixTree.h
index d74b7c4661c3..aff6d9bfded1 100644
--- a/llvm/include/llvm/Support/SuffixTree.h
+++ b/llvm/include/llvm/Support/SuffixTree.h
@@ -164,11 +164,11 @@ class SuffixTree {
       N = nullptr;
 
       // Each leaf node represents a repeat of a string.
-      SmallVector<SuffixTreeLeafNode *> LeafChildren;
+      SmallVector<unsigned> RepeatedSubstringStarts;
 
       // Continue visiting nodes until we find one which repeats more than once.
       while (!InternalNodesToVisit.empty()) {
-        LeafChildren.clear();
+        RepeatedSubstringStarts.clear();
         auto *Curr = InternalNodesToVisit.back();
         InternalNodesToVisit.pop_back();
 
@@ -182,13 +182,17 @@ class SuffixTree {
         for (auto &ChildPair : Curr->Children) {
           // Save all of this node's children for processing.
           if (auto *InternalChild =
-                  dyn_cast<SuffixTreeInternalNode>(ChildPair.second))
+                  dyn_cast<SuffixTreeInternalNode>(ChildPair.second)) {
             InternalNodesToVisit.push_back(InternalChild);
+            continue;
+          }
 
-          // It's not an internal node, so it must be a leaf. If we have a
-          // long enough string, then save the leaf children.
-          else if (Length >= MinLength)
-            LeafChildren.push_back(cast<SuffixTreeLeafNode>(ChildPair.second));
+          if (Length < MinLength)
+            continue;
+
+          // Have an occurrence of a potentially repeated string. Save it.
+          auto *Leaf = cast<SuffixTreeLeafNode>(ChildPair.second);
+          RepeatedSubstringStarts.push_back(Leaf->getSuffixIdx());
         }
 
         // The root never represents a repeated substring. If we're looking at
@@ -197,14 +201,15 @@ class SuffixTree {
           continue;
 
         // Do we have any repeated substrings?
-        if (LeafChildren.size() >= 2) {
-          // Yes. Update the state to reflect this, and then bail out.
-          N = Curr;
-          RS.Length = Length;
-          for (SuffixTreeLeafNode *Leaf : LeafChildren)
-            RS.StartIndices.push_back(Leaf->getSuffixIdx());
-          break;
-        }
+        if (RepeatedSubstringStarts.size() < 2)
+          continue;
+
+        // Yes. Update the state to reflect this, and then bail out.
+        N = Curr;
+        RS.Length = Length;
+        for (unsigned StartIdx : RepeatedSubstringStarts)
+          RS.StartIndices.push_back(StartIdx);
+        break;
       }
       // At this point, either NewRS is an empty RepeatedSubstring, or it was
       // set in the above loop. Similarly, N is either nullptr, or the node


        


More information about the llvm-commits mailing list