[PATCH] D115855: [NFC] Add debug log and some comments for suffixtree
Yifeng Dong via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 15 23:59:00 PST 2021
dongAxis1944 created this revision.
Herald added subscribers: dexonsmith, hiraditya.
dongAxis1944 requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.
This is an NFC change for the suffix tree.
We use MachineOutliner to reduce our binary size recently, so I just look into the code of the suffix tree.
But I think it is hard to debug, so I use this NFC change to enhance this.
BTW, I found the suffix tree does not have a terminator in the array.
https://reviews.llvm.org/D115855
Files:
llvm/include/llvm/Support/SuffixTree.h
llvm/lib/CodeGen/MachineOutliner.cpp
Index: llvm/lib/CodeGen/MachineOutliner.cpp
===================================================================
--- llvm/lib/CodeGen/MachineOutliner.cpp
+++ llvm/lib/CodeGen/MachineOutliner.cpp
@@ -514,6 +514,7 @@
InstructionMapper &Mapper, std::vector<OutlinedFunction> &FunctionList) {
FunctionList.clear();
SuffixTree ST(Mapper.UnsignedVec);
+ LLVM_DEBUG(dbgs() << "Dumping SuffixTree:\n" << ST << "\n");
// First, find all of the repeated substrings in the tree of minimum length
// 2.
Index: llvm/include/llvm/Support/SuffixTree.h
===================================================================
--- llvm/include/llvm/Support/SuffixTree.h
+++ llvm/include/llvm/Support/SuffixTree.h
@@ -15,6 +15,8 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
#include <vector>
namespace llvm {
@@ -38,6 +40,8 @@
/// in \p Link. Each leaf node stores the start index of its respective
/// suffix in \p SuffixIdx.
struct SuffixTreeNode {
+ // The ID of the node
+ uint64_t ID = 0;
/// The children of this node.
///
@@ -137,6 +141,9 @@
class SuffixTree {
public:
/// Each element is an integer representing an instruction in the module.
+ ///
+ /// If the developer want to build a valid suffix tree, the last number of
+ /// array should be unique
llvm::ArrayRef<unsigned> Str;
/// A repeated substring in the tree.
@@ -234,6 +241,8 @@
/// \param Str The string to construct the suffix tree for.
SuffixTree(const std::vector<unsigned> &Str);
+ const SuffixTreeNode *getRoot() const { return Root; }
+
/// Iterator for finding all repeated substrings in the suffix tree.
struct RepeatedSubstringIterator {
private:
@@ -345,6 +354,41 @@
iterator end() { return iterator(nullptr); }
};
+#if !defined(NDEBUG)
+
+inline raw_ostream &operator<<(raw_ostream &OS, const SuffixTree &Tree) {
+ std::vector<const SuffixTreeNode *> Worklist;
+ Worklist.push_back(Tree.getRoot());
+ while (!Worklist.empty()) {
+ const SuffixTreeNode *Node = Worklist.back();
+ Worklist.pop_back();
+
+ if (!Node)
+ continue;
+
+ std::string NodeType = "Internal";
+ if (Node->isRoot()) {
+ NodeType = "Root";
+ } else if (Node->isLeaf()) {
+ NodeType = "Leaf";
+ }
+
+ std::string EndIdxValue =
+ Node->EndIdx ? std::to_string(*Node->EndIdx) : "$";
+ OS << "Node[" << NodeType << "#" << std::to_string(Node->ID)
+ << "]: StartIdx=" << Node->StartIdx << ", EndIdx=" << EndIdxValue;
+ OS << ", Childrens = [";
+ for (auto &ChildPair : Node->Children) {
+ OS << ChildPair.second->ID << ",";
+ Worklist.push_back(ChildPair.second);
+ }
+ OS << "]\n";
+ }
+ return OS;
+}
+
+#endif
+
} // namespace llvm
#endif // LLVM_SUPPORT_SUFFIXTREE_H
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D115855.394761.patch
Type: text/x-patch
Size: 2890 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20211216/222e9fda/attachment.bin>
More information about the llvm-commits
mailing list