[PATCH] D115855: [NFC] Add debug log and some comments for suffixtree

Yifeng Dong via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 15 23:59:00 PST 2021


dongAxis1944 created this revision.
Herald added subscribers: dexonsmith, hiraditya.
dongAxis1944 requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

This is an NFC change for the suffix tree.

We use MachineOutliner to reduce our binary size recently, so I just look into the code of the suffix tree.
But I think it is hard to debug, so I use this NFC change to enhance this.

BTW, I found the suffix tree does not have a terminator in the array.


https://reviews.llvm.org/D115855

Files:
  llvm/include/llvm/Support/SuffixTree.h
  llvm/lib/CodeGen/MachineOutliner.cpp


Index: llvm/lib/CodeGen/MachineOutliner.cpp
===================================================================
--- llvm/lib/CodeGen/MachineOutliner.cpp
+++ llvm/lib/CodeGen/MachineOutliner.cpp
@@ -514,6 +514,7 @@
     InstructionMapper &Mapper, std::vector<OutlinedFunction> &FunctionList) {
   FunctionList.clear();
   SuffixTree ST(Mapper.UnsignedVec);
+  LLVM_DEBUG(dbgs() << "Dumping SuffixTree:\n" << ST << "\n");
 
   // First, find all of the repeated substrings in the tree of minimum length
   // 2.
Index: llvm/include/llvm/Support/SuffixTree.h
===================================================================
--- llvm/include/llvm/Support/SuffixTree.h
+++ llvm/include/llvm/Support/SuffixTree.h
@@ -15,6 +15,8 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include <vector>
 
 namespace llvm {
@@ -38,6 +40,8 @@
 /// in \p Link. Each leaf node stores the start index of its respective
 /// suffix in \p SuffixIdx.
 struct SuffixTreeNode {
+  // The ID of the node
+  uint64_t ID = 0;
 
   /// The children of this node.
   ///
@@ -137,6 +141,9 @@
 class SuffixTree {
 public:
   /// Each element is an integer representing an instruction in the module.
+  ///
+  /// If the developer want to build a valid suffix tree, the last number of
+  /// array should be unique
   llvm::ArrayRef<unsigned> Str;
 
   /// A repeated substring in the tree.
@@ -234,6 +241,8 @@
   /// \param Str The string to construct the suffix tree for.
   SuffixTree(const std::vector<unsigned> &Str);
 
+  const SuffixTreeNode *getRoot() const { return Root; }
+
   /// Iterator for finding all repeated substrings in the suffix tree.
   struct RepeatedSubstringIterator {
   private:
@@ -345,6 +354,41 @@
   iterator end() { return iterator(nullptr); }
 };
 
+#if !defined(NDEBUG)
+
+inline raw_ostream &operator<<(raw_ostream &OS, const SuffixTree &Tree) {
+  std::vector<const SuffixTreeNode *> Worklist;
+  Worklist.push_back(Tree.getRoot());
+  while (!Worklist.empty()) {
+    const SuffixTreeNode *Node = Worklist.back();
+    Worklist.pop_back();
+
+    if (!Node)
+      continue;
+
+    std::string NodeType = "Internal";
+    if (Node->isRoot()) {
+      NodeType = "Root";
+    } else if (Node->isLeaf()) {
+      NodeType = "Leaf";
+    }
+
+    std::string EndIdxValue =
+        Node->EndIdx ? std::to_string(*Node->EndIdx) : "$";
+    OS << "Node[" << NodeType << "#" << std::to_string(Node->ID)
+       << "]: StartIdx=" << Node->StartIdx << ", EndIdx=" << EndIdxValue;
+    OS << ", Childrens = [";
+    for (auto &ChildPair : Node->Children) {
+      OS << ChildPair.second->ID << ",";
+      Worklist.push_back(ChildPair.second);
+    }
+    OS << "]\n";
+  }
+  return OS;
+}
+
+#endif
+
 } // namespace llvm
 
 #endif // LLVM_SUPPORT_SUFFIXTREE_H


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D115855.394761.patch
Type: text/x-patch
Size: 2890 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20211216/222e9fda/attachment.bin>


More information about the llvm-commits mailing list