[llvm] [NFC][Support] Add RadixTree (PR #164524)
Florian Mayer via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 23 11:30:18 PDT 2025
================
@@ -0,0 +1,345 @@
+//===-- RadixTree.h - Radix Tree implementation -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//===----------------------------------------------------------------------===//
+//
+// This file implements a Radix Tree.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_RADIXTREE_H
+#define LLVM_SUPPORT_RADIXTREE_H
+
+#include "llvm/ADT/ADL.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/ADT/iterator_range.h"
+#include <cassert>
+#include <cstddef>
+#include <iterator>
+#include <limits>
+#include <list>
+#include <utility>
+
+namespace llvm {
+
+/// \brief A Radix Tree implementation.
+///
+/// A Radix Tree (also known as a compact prefix tree or radix trie) is a
+/// data structure that stores a dynamic set or associative array where keys
+/// are strings and values are associated with these keys. Unlike a regular
+/// trie, the edges of a radix tree can be labeled with sequences of characters
+/// as well as single characters. This makes radix trees more efficient for
+/// storing sparse data sets, where many nodes in a regular trie would have
+/// only one child.
+///
+/// This implementation supports arbitrary key types that can be iterated over
+/// (e.g., `std::string`, `std::vector<char>`, `ArrayRef<char>`). The key type
+/// must provide `begin()` and `end()` for iteration.
+///
+/// The tree stores `std::pair<const KeyType, T>` as its value type.
+///
+/// Example usage:
+/// \code
+/// llvm::RadixTree<StringRef, int> Tree;
+/// Tree.emplace("apple", 1);
+/// Tree.emplace("grapefruit", 2);
+/// Tree.emplace("grape", 3);
+///
+/// // Find prefixes
+/// for (const auto &pair : Tree.find_prefixes("grapefruit juice")) {
+/// // pair will be {"grape", 3}
+/// // pair will be {"grapefruit", 2}
+/// llvm::outs() << pair.first << ": " << pair.second << "\n";
+/// }
+///
+/// // Iterate over all elements
+/// for (const auto &pair : Tree) {
+/// llvm::outs() << pair.first << ": " << pair.second << "\n";
+/// }
+/// \endcode
+///
+/// \note
+/// The `RadixTree` takes ownership of the `KeyType` and `T` objects
+/// inserted into it. When an element is removed or the tree is destroyed,
+/// these objects will be destructed.
+/// However, if `KeyType` is a reference-like type, e.g. StringRef or range,
+/// User must guarantee that destination has lifetime longer than the tree.
+template <typename KeyType, typename T> class RadixTree {
+public:
+ using key_type = KeyType;
+ using mapped_type = T;
+ using value_type = std::pair<const KeyType, mapped_type>;
+
+private:
+ using KeyConstIteratorType =
+ decltype(adl_begin(std::declval<const key_type &>()));
+ using KeyConstIteratorRangeType = iterator_range<KeyConstIteratorType>;
+ using KeyValueType =
+ remove_cvref_t<decltype(*adl_begin(std::declval<key_type &>()))>;
+ using ContainerType = std::list<value_type>;
+
+ /// Represents an internal node in the Radix Tree.
+ struct Node {
+ KeyConstIteratorRangeType Key = {KeyConstIteratorType{},
+ KeyConstIteratorType{}};
+ std::vector<Node> Children;
+
+ /// An iterator to the value associated with this node.
+ ///
+ /// If this node does not have a value (i.e., it's an internal node that
+ /// only serves as a path to other values), this iterator will be equal
+ /// to default constructed `ContainerType::iterator()`.
+ typename ContainerType::iterator Value;
+
+ /// The first character of the Key. Used for fast child lookup.
+ KeyValueType KeyFront;
+
+ Node() = default;
+ Node(const KeyConstIteratorRangeType &Key)
+ : Key(Key), KeyFront(*Key.begin()) {
+ assert(!Key.empty());
+ }
+
+ Node(Node &&) = default;
+ Node &operator=(Node &&) = default;
+
+ Node(const Node &) = delete;
+ Node &operator=(const Node &) = delete;
+
+ const Node *findChild(const KeyConstIteratorRangeType &Key) const {
+ if (Key.empty())
+ return nullptr;
+ for (const auto &Child : Children) {
+ assert(!Child.Key.empty()); // Only root can be empty.
+ if (Child.KeyFront == *Key.begin())
+ return &Child;
+ }
+ return nullptr;
+ }
+
+ Node *findChild(const KeyConstIteratorRangeType &Query) {
+ const Node *This = this;
+ return const_cast<Node *>(This->findChild(Query));
+ }
+
+ size_t countNodes() const {
+ size_t R = 1;
+ for (const auto &C : Children)
+ R += C.countNodes();
+ return R;
+ }
+
+ ///
+ /// Splits the current node into two.
+ ///
+ /// This function is used when a new key needs to be inserted that shares
+ /// a common prefix with the current node's key, but then diverges.
+ /// The current `Key` is truncated to the common prefix, and a new child
+ /// node is created for the remainder of the original node's `Key`.
+ ///
+ /// \param SplitPoint An iterator pointing to the character in the current
+ /// `Key` where the split should occur.
+ void split(KeyConstIteratorType SplitPoint) {
+ Node Child(make_range(SplitPoint, Key.end()));
+ Key = make_range(Key.begin(), SplitPoint);
+
+ Children.swap(Child.Children);
+ std::swap(Value, Child.Value);
+
+ Children.emplace_back(std::move(Child));
+ }
+ };
+
+ Node Root; // Root is always for empty range.
----------------
fmayer wrote:
I am not sure what "Root is always for empty range." is trying to tell me
https://github.com/llvm/llvm-project/pull/164524
More information about the llvm-commits
mailing list