[llvm] [SpecialCaseList] Filtering Globs with matching prefix (PR #164531)
Vitaly Buka via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 24 22:02:58 PDT 2025
https://github.com/vitalybuka updated https://github.com/llvm/llvm-project/pull/164531
>From b386ed04a29d51c6f4e322d5cb5ca4ae484d1afa Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Tue, 21 Oct 2025 19:00:07 -0700
Subject: [PATCH 1/5] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?=
=?UTF-8?q?anges=20to=20main=20this=20commit=20is=20based=20on?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.7
[skip ci]
---
llvm/include/llvm/Support/GlobPattern.h | 22 +-
llvm/include/llvm/Support/RadixTree.h | 345 +++++++++++++++++++
llvm/lib/Support/GlobPattern.cpp | 67 +++-
llvm/unittests/Support/CMakeLists.txt | 1 +
llvm/unittests/Support/GlobPatternTest.cpp | 66 ++++
llvm/unittests/Support/RadixTreeTest.cpp | 372 +++++++++++++++++++++
6 files changed, 858 insertions(+), 15 deletions(-)
create mode 100644 llvm/include/llvm/Support/RadixTree.h
create mode 100644 llvm/unittests/Support/RadixTreeTest.cpp
diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h
index c1b44849b9794..8b8ac89304e31 100644
--- a/llvm/include/llvm/Support/GlobPattern.h
+++ b/llvm/include/llvm/Support/GlobPattern.h
@@ -63,22 +63,30 @@ class GlobPattern {
// Returns true for glob pattern "*". Can be used to avoid expensive
// preparation/acquisition of the input for match().
bool isTrivialMatchAll() const {
- if (!Prefix.empty())
+ if (PrefixSize)
return false;
- if (!Suffix.empty())
+ if (SuffixSize)
return false;
if (SubGlobs.size() != 1)
return false;
return SubGlobs[0].getPat() == "*";
}
- StringRef prefix() const { return Prefix; }
- StringRef suffix() const { return Suffix; }
+ // The following functions are just shortcuts for faster matching. They are
+ // conservative to simplify implementations.
-private:
- StringRef Prefix;
- StringRef Suffix;
+ // Returns plain prefix of the pattern.
+ StringRef prefix() const { return Pattern.take_front(PrefixSize); }
+ // Returns plain suffix of the pattern.
+ StringRef suffix() const { return Pattern.take_back(SuffixSize); }
+ // Returns the longest plain substring of the pattern between prefix and
+ // suffix.
+ StringRef longest_substr() const;
+private:
+ StringRef Pattern;
+ size_t PrefixSize = 0;
+ size_t SuffixSize = 0;
struct SubGlobPattern {
/// \param Pat the pattern to match against
LLVM_ABI static Expected<SubGlobPattern> create(StringRef Pat);
diff --git a/llvm/include/llvm/Support/RadixTree.h b/llvm/include/llvm/Support/RadixTree.h
new file mode 100644
index 0000000000000..05697fc267628
--- /dev/null
+++ b/llvm/include/llvm/Support/RadixTree.h
@@ -0,0 +1,345 @@
+//===-- RadixTree.h - Radix Tree implementation -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//===----------------------------------------------------------------------===//
+//
+// This file implements a Radix Tree.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_RADIXTREE_H
+#define LLVM_SUPPORT_RADIXTREE_H
+
+#include "llvm/ADT/ADL.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/ADT/iterator_range.h"
+#include <cassert>
+#include <cstddef>
+#include <iterator>
+#include <limits>
+#include <list>
+#include <utility>
+
+namespace llvm {
+
+/// \brief A Radix Tree implementation.
+///
+/// A Radix Tree (also known as a compact prefix tree or radix trie) is a
+/// data structure that stores a dynamic set or associative array where keys
+/// are strings and values are associated with these keys. Unlike a regular
+/// trie, the edges of a radix tree can be labeled with sequences of characters
+/// as well as single characters. This makes radix trees more efficient for
+/// storing sparse data sets, where many nodes in a regular trie would have
+/// only one child.
+///
+/// This implementation supports arbitrary key types that can be iterated over
+/// (e.g., `std::string`, `std::vector<char>`, `ArrayRef<char>`). The key type
+/// must provide `begin()` and `end()` for iteration.
+///
+/// The tree stores `std::pair<const KeyType, T>` as its value type.
+///
+/// Example usage:
+/// \code
+/// llvm::RadixTree<StringRef, int> Tree;
+/// Tree.emplace("apple", 1);
+/// Tree.emplace("grapefruit", 2);
+/// Tree.emplace("grape", 3);
+///
+/// // Find prefixes
+/// for (const auto &pair : Tree.find_prefixes("grapefruit juice")) {
+/// // pair will be {"grape", 3}
+/// // pair will be {"grapefruit", 2}
+/// llvm::outs() << pair.first << ": " << pair.second << "\n";
+/// }
+///
+/// // Iterate over all elements
+/// for (const auto &pair : Tree) {
+/// llvm::outs() << pair.first << ": " << pair.second << "\n";
+/// }
+/// \endcode
+///
+/// \note
+/// The `RadixTree` takes ownership of the `KeyType` and `T` objects
+/// inserted into it. When an element is removed or the tree is destroyed,
+/// these objects will be destructed.
+/// However, if `KeyType` is a reference-like type, e.g. StringRef or range,
+/// User must guarantee that destination has lifetime longer than the tree.
+template <typename KeyType, typename T> class RadixTree {
+public:
+ using key_type = KeyType;
+ using mapped_type = T;
+ using value_type = std::pair<const KeyType, mapped_type>;
+
+private:
+ using KeyConstIteratorType =
+ decltype(adl_begin(std::declval<const key_type &>()));
+ using KeyConstIteratorRangeType = iterator_range<KeyConstIteratorType>;
+ using KeyValueType =
+ remove_cvref_t<decltype(*adl_begin(std::declval<key_type &>()))>;
+ using ContainerType = std::list<value_type>;
+
+ /// Represents an internal node in the Radix Tree.
+ struct Node {
+ KeyConstIteratorRangeType Key = {KeyConstIteratorType{},
+ KeyConstIteratorType{}};
+ std::vector<Node> Children;
+
+ /// An iterator to the value associated with this node.
+ ///
+ /// If this node does not have a value (i.e., it's an internal node that
+ /// only serves as a path to other values), this iterator will be equal
+ /// to default constructed `ContainerType::iterator()`.
+ typename ContainerType::iterator Value;
+
+ /// The first character of the Key. Used for fast child lookup.
+ KeyValueType KeyFront;
+
+ Node() = default;
+ Node(const KeyConstIteratorRangeType &Key)
+ : Key(Key), KeyFront(*Key.begin()) {
+ assert(!Key.empty());
+ }
+
+ Node(Node &&) = default;
+ Node &operator=(Node &&) = default;
+
+ Node(const Node &) = delete;
+ Node &operator=(const Node &) = delete;
+
+ const Node *findChild(const KeyConstIteratorRangeType &Key) const {
+ if (Key.empty())
+ return nullptr;
+ for (const auto &Child : Children) {
+ assert(!Child.Key.empty()); // Only root can be empty.
+ if (Child.KeyFront == *Key.begin())
+ return &Child;
+ }
+ return nullptr;
+ }
+
+ Node *findChild(const KeyConstIteratorRangeType &Query) {
+ const Node *This = this;
+ return const_cast<Node *>(This->findChild(Query));
+ }
+
+ size_t countNodes() const {
+ size_t R = 1;
+ for (const auto &C : Children)
+ R += C.countNodes();
+ return R;
+ }
+
+ ///
+ /// Splits the current node into two.
+ ///
+ /// This function is used when a new key needs to be inserted that shares
+ /// a common prefix with the current node's key, but then diverges.
+ /// The current `Key` is truncated to the common prefix, and a new child
+ /// node is created for the remainder of the original node's `Key`.
+ ///
+ /// \param SplitPoint An iterator pointing to the character in the current
+ /// `Key` where the split should occur.
+ void split(KeyConstIteratorType SplitPoint) {
+ Node Child(make_range(SplitPoint, Key.end()));
+ Key = make_range(Key.begin(), SplitPoint);
+
+ Children.swap(Child.Children);
+ std::swap(Value, Child.Value);
+
+ Children.emplace_back(std::move(Child));
+ }
+ };
+
+ Node Root; // Root is always for empty range.
+ ContainerType Values;
+
+ /// Finds or creates a new tail or leaf node corresponding to the `Key`.
+ Node &findOrCreate(KeyConstIteratorRangeType Key) {
+ Node *Curr = &Root;
+ if (Key.empty())
+ return *Curr;
+
+ for (;;) {
+ auto [I1, I2] = llvm::mismatch(Key, Curr->Key);
+ Key = make_range(I1, Key.end());
+
+ if (I2 != Curr->Key.end()) {
+ // Match is partial. Either query is too short, or there is mismatching
+ // character. Split either way, and put new node in between of the
+ // current and its children.
+ Curr->split(I2);
+
+ // Split was caused by mismatch, so `findChild` will fail.
+ break;
+ }
+
+ Node *Child = Curr->findChild(Key);
+ if (!Child)
+ break;
+
+ // Move to child with the same first character.
+ Curr = Child;
+ }
+
+ if (Key.empty()) {
+ // The current node completely matches the key, return it.
+ return *Curr;
+ }
+
+ // `Key` a suffix of original `Key` unmatched by path from the `Root` to the
+ // `Curr`, and we have no candidate in the children to match more. Create a
+ // new one.
+ return Curr->Children.emplace_back(Key);
+ }
+
+ ///
+ /// An iterator for traversing prefixes search results.
+ ///
+ /// This iterator is used by `find_prefixes` to traverse the tree and find
+ /// elements that are prefixes to the given key. It's a forward iterator.
+ ///
+ /// \tparam MappedType The type of the value pointed to by the iterator.
+ /// This will be `value_type` for non-const iterators
+ /// and `const value_type` for const iterators.
+ template <typename MappedType>
+ class IteratorImpl
+ : public iterator_facade_base<IteratorImpl<MappedType>,
+ std::forward_iterator_tag, MappedType> {
+ const Node *Curr = nullptr;
+ KeyConstIteratorRangeType Query;
+
+ void findNextValid() {
+ while (Curr && Curr->Value == typename ContainerType::iterator())
+ advance();
+ }
+
+ void advance() {
+ assert(Curr);
+ if (Query.empty()) {
+ Curr = nullptr;
+ return;
+ }
+
+ Curr = Curr->findChild(Query);
+ if (!Curr) {
+ Curr = nullptr;
+ return;
+ }
+
+ auto [I1, I2] = llvm::mismatch(Query, Curr->Key);
+ if (I2 != Curr->Key.end()) {
+ Curr = nullptr;
+ return;
+ }
+ Query = make_range(I1, Query.end());
+ }
+
+ friend class RadixTree;
+ IteratorImpl(const Node *C, const KeyConstIteratorRangeType &Q)
+ : Curr(C), Query(Q) {
+ findNextValid();
+ }
+
+ public:
+ IteratorImpl() : Query{{}, {}} {}
+
+ MappedType &operator*() const { return *Curr->Value; }
+
+ IteratorImpl &operator++() {
+ advance();
+ findNextValid();
+ return *this;
+ }
+
+ bool operator==(const IteratorImpl &Other) const {
+ return Curr == Other.Curr;
+ }
+ };
+
+public:
+ RadixTree() = default;
+ RadixTree(RadixTree &&) = default;
+ RadixTree &operator=(RadixTree &&) = default;
+
+ using prefix_iterator = IteratorImpl<value_type>;
+ using const_prefix_iterator = IteratorImpl<const value_type>;
+
+ using iterator = typename ContainerType::iterator;
+ using const_iterator = typename ContainerType::const_iterator;
+
+ /// Returns true if the tree is empty.
+ bool empty() const { return Values.empty(); }
+
+ /// Returns the number of elements in the tree.
+ size_t size() const { return Values.size(); }
+
+ /// Returns the number of nodes in the tree.
+ ///
+ /// This function counts all internal nodes in the tree. It can be useful for
+ /// understanding the memory footprint or complexity of the tree structure.
+ size_t countNodes() const { return Root.countNodes(); }
+
+ /// Returns an iterator to the first element.
+ iterator begin() { return Values.begin(); }
+ const_iterator begin() const { return Values.begin(); }
+
+ /// Returns an iterator to the end of the tree.
+ iterator end() { return Values.end(); }
+ const_iterator end() const { return Values.end(); }
+
+ /// Constructs and inserts a new element into the tree.
+ ///
+ /// This function constructs an element in-place within the tree. If an
+ /// element with the same key already exists, the insertion fails and the
+ /// function returns an iterator to the existing element along with `false`.
+ /// Otherwise, the new element is inserted and the function returns an
+ /// iterator to the new element along with `true`.
+ ///
+ /// \param Key The key of the element to construct.
+ /// \param Args Arguments to forward to the constructor of the mapped_type.
+ /// \return A pair consisting of an iterator to the inserted element (or to
+ /// the element that prevented insertion) and a boolean value
+ /// indicating whether the insertion took place.
+ template <typename... Ts>
+ std::pair<iterator, bool> emplace(key_type &&Key, Ts &&...Args) {
+ const value_type &NewValue =
+ Values.emplace_front(std::move(Key), T(std::move(Args)...));
+ Node &Node = findOrCreate(NewValue.first);
+ bool HasValue = Node.Value != typename ContainerType::iterator();
+ if (!HasValue) {
+ Node.Value = Values.begin();
+ } else {
+ Values.pop_front();
+ }
+ return std::make_pair(Node.Value, !HasValue);
+ }
+
+ ///
+ /// Finds all elements whose keys are prefixes of the given `Key`.
+ ///
+ /// This function returns an iterator range over all elements in the tree
+ /// whose keys are prefixes of the provided `Key`. For example, if the tree
+ /// contains "abcde", "abc", "abcdefgh", and `Key` is "abcde", this function
+ /// would return iterators to "abcde" and "abc".
+ ///
+ /// \param Key The key to search for prefixes of.
+ /// \return An `iterator_range` of `const_prefix_iterator`s, allowing
+ /// iteration over the found prefix elements.
+ /// \note The returned iterators reference the `Key` provided by the caller.
+ /// The caller must ensure that `Key` remains valid for the lifetime
+ /// of the iterators.
+ iterator_range<const_prefix_iterator>
+ find_prefixes(const key_type &Key) const {
+ return iterator_range<const_prefix_iterator>{
+ const_prefix_iterator(
+ &Root, KeyConstIteratorRangeType{adl_begin(Key), adl_end(Key)}),
+ const_prefix_iterator{}};
+ }
+};
+
+} // namespace llvm
+
+#endif // LLVM_SUPPORT_RADIXTREE_H
diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp
index 0ecf47dc1d3d1..2715229c65be1 100644
--- a/llvm/lib/Support/GlobPattern.cpp
+++ b/llvm/lib/Support/GlobPattern.cpp
@@ -132,24 +132,70 @@ parseBraceExpansions(StringRef S, std::optional<size_t> MaxSubPatterns) {
return std::move(SubPatterns);
}
+static StringRef maxPlainSubstring(StringRef S) {
+ StringRef Best;
+ while (!S.empty()) {
+ size_t PrefixSize = S.find_first_of("?*[{\\");
+ if (PrefixSize == std::string::npos)
+ PrefixSize = S.size();
+
+ if (Best.size() < PrefixSize)
+ Best = S.take_front(PrefixSize);
+
+ S = S.drop_front(PrefixSize);
+
+ // It's impossible, as the first and last characters of the input string
+ // must be Glob special characters, otherwise they would be parts of
+ // the prefix or the suffix.
+ assert(!S.empty());
+
+ switch (S.front()) {
+ case '\\':
+ S = S.drop_front(2);
+ break;
+ case '[': {
+ // Drop '[' and the first character which can be ']'.
+ S = S.drop_front(2);
+ size_t EndBracket = S.find_first_of("]");
+ // Should not be possible, SubGlobPattern::create should fail on invalid
+ // pattern before we get here.
+ assert(EndBracket != std::string::npos);
+ S = S.drop_front(EndBracket + 1);
+ break;
+ }
+ case '{':
+ // TODO: implement.
+ // Fallback to whatever is best for now.
+ return Best;
+ default:
+ S = S.drop_front(1);
+ }
+ }
+
+ return Best;
+}
+
Expected<GlobPattern>
GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) {
GlobPattern Pat;
+ Pat.Pattern = S;
// Store the prefix that does not contain any metacharacter.
- size_t PrefixSize = S.find_first_of("?*[{\\");
- Pat.Prefix = S.substr(0, PrefixSize);
- if (PrefixSize == std::string::npos)
+ Pat.PrefixSize = S.find_first_of("?*[{\\");
+ if (Pat.PrefixSize == std::string::npos) {
+ Pat.PrefixSize = S.size();
return Pat;
- S = S.substr(PrefixSize);
+ }
+ S = S.substr(Pat.PrefixSize);
// Just in case we stop on unmatched opening brackets.
size_t SuffixStart = S.find_last_of("?*[]{}\\");
assert(SuffixStart != std::string::npos);
if (S[SuffixStart] == '\\')
++SuffixStart;
- ++SuffixStart;
- Pat.Suffix = S.substr(SuffixStart);
+ if (SuffixStart < S.size())
+ ++SuffixStart;
+ Pat.SuffixSize = S.size() - SuffixStart;
S = S.substr(0, SuffixStart);
SmallVector<std::string, 1> SubPats;
@@ -199,10 +245,15 @@ GlobPattern::SubGlobPattern::create(StringRef S) {
return Pat;
}
+StringRef GlobPattern::longest_substr() const {
+ return maxPlainSubstring(
+ Pattern.drop_front(PrefixSize).drop_back(SuffixSize));
+}
+
bool GlobPattern::match(StringRef S) const {
- if (!S.consume_front(Prefix))
+ if (!S.consume_front(prefix()))
return false;
- if (!S.consume_back(Suffix))
+ if (!S.consume_back(suffix()))
return false;
if (SubGlobs.empty() && S.empty())
return true;
diff --git a/llvm/unittests/Support/CMakeLists.txt b/llvm/unittests/Support/CMakeLists.txt
index 21f10eb610f11..80646cfc0ef1f 100644
--- a/llvm/unittests/Support/CMakeLists.txt
+++ b/llvm/unittests/Support/CMakeLists.txt
@@ -76,6 +76,7 @@ add_llvm_unittest(SupportTests
ProcessTest.cpp
ProgramTest.cpp
ProgramStackTest.cpp
+ RadixTreeTest.cpp
RecyclerTest.cpp
RegexTest.cpp
ReverseIterationTest.cpp
diff --git a/llvm/unittests/Support/GlobPatternTest.cpp b/llvm/unittests/Support/GlobPatternTest.cpp
index 58fd7678131c6..872a21e948d7a 100644
--- a/llvm/unittests/Support/GlobPatternTest.cpp
+++ b/llvm/unittests/Support/GlobPatternTest.cpp
@@ -329,6 +329,72 @@ TEST_F(GlobPatternTest, PrefixSuffix) {
EXPECT_EQ("cd", Pat->suffix());
}
+TEST_F(GlobPatternTest, Substr) {
+ auto Pat = GlobPattern::create("");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("", Pat->longest_substr());
+
+ Pat = GlobPattern::create("abcd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bcd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("", Pat->longest_substr());
+
+ Pat = GlobPattern::create("*abcd");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("", Pat->longest_substr());
+
+ Pat = GlobPattern::create("abcd*");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bc*d");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bc", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bc*def*g");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("def", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bcd*ef*g");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bcd", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bcd*efg*h");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bcd", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bcd[ef]g*h");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bcd", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bc[d]efg*h");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("efg", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bc[]]efg*h");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("efg", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bcde\\fg*h");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bcde", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bcde\\[fg*h");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bcde", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bcde?fg*h");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bcde", Pat->longest_substr());
+
+ Pat = GlobPattern::create("a*bcdef{g}*h");
+ ASSERT_TRUE((bool)Pat);
+ EXPECT_EQ("bcdef", Pat->longest_substr());
+}
+
TEST_F(GlobPatternTest, Pathological) {
std::string P, S(40, 'a');
StringRef Pieces[] = {"a*", "[ba]*", "{b*,a*}*"};
diff --git a/llvm/unittests/Support/RadixTreeTest.cpp b/llvm/unittests/Support/RadixTreeTest.cpp
new file mode 100644
index 0000000000000..e94a40eaf0264
--- /dev/null
+++ b/llvm/unittests/Support/RadixTreeTest.cpp
@@ -0,0 +1,372 @@
+//===- llvm/unittest/Support/RadixTreeTypeTest.cpp ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/RadixTree.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include <iterator>
+#include <list>
+#include <vector>
+
+using namespace llvm;
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::ElementsAreArray;
+using ::testing::Pair;
+using ::testing::UnorderedElementsAre;
+
+// Test with StringRef.
+
+TEST(RadixTreeTest, Empty) {
+ RadixTree<StringRef, int> T;
+ EXPECT_TRUE(T.empty());
+ EXPECT_EQ(0u, T.size());
+
+ EXPECT_TRUE(T.find_prefixes("").empty());
+ EXPECT_TRUE(T.find_prefixes("A").empty());
+
+ EXPECT_EQ(1u, T.countNodes());
+}
+
+TEST(RadixTreeTest, InsertEmpty) {
+ RadixTree<StringRef, int> T;
+ auto [It, IsNew] = T.emplace("", 4);
+ EXPECT_TRUE(!T.empty());
+ EXPECT_EQ(1u, T.size());
+ EXPECT_TRUE(IsNew);
+ const auto &[K, V] = *It;
+ EXPECT_TRUE(K.empty());
+ EXPECT_EQ(4, V);
+
+ EXPECT_THAT(T, ElementsAre(Pair("", 4)));
+
+ EXPECT_THAT(T.find_prefixes(""), ElementsAre(Pair("", 4)));
+
+ EXPECT_THAT(T.find_prefixes("a"), ElementsAre(Pair("", 4)));
+
+ EXPECT_EQ(1u, T.countNodes());
+}
+
+TEST(RadixTreeTest, Complex) {
+ RadixTree<StringRef, int> T;
+ T.emplace("abcd", 1);
+ EXPECT_EQ(2u, T.countNodes());
+ T.emplace("abklm", 2);
+ EXPECT_EQ(4u, T.countNodes());
+ T.emplace("123abklm", 3);
+ EXPECT_EQ(5u, T.countNodes());
+ T.emplace("123abklm", 4);
+ EXPECT_EQ(5u, T.countNodes());
+ T.emplace("ab", 5);
+ EXPECT_EQ(5u, T.countNodes());
+ T.emplace("1234567", 6);
+ EXPECT_EQ(7u, T.countNodes());
+ T.emplace("123456", 7);
+ EXPECT_EQ(8u, T.countNodes());
+ T.emplace("123456789", 8);
+ EXPECT_EQ(9u, T.countNodes());
+
+ EXPECT_THAT(T, UnorderedElementsAre(Pair("abcd", 1), Pair("abklm", 2),
+ Pair("123abklm", 3), Pair("ab", 5),
+ Pair("1234567", 6), Pair("123456", 7),
+ Pair("123456789", 8)));
+
+ EXPECT_THAT(T.find_prefixes("1234567890"),
+ UnorderedElementsAre(Pair("1234567", 6), Pair("123456", 7),
+ Pair("123456789", 8)));
+
+ EXPECT_THAT(T.find_prefixes("123abklm"),
+ UnorderedElementsAre(Pair("123abklm", 3)));
+
+ EXPECT_THAT(T.find_prefixes("abcdefg"),
+ UnorderedElementsAre(Pair("abcd", 1), Pair("ab", 5)));
+
+ EXPECT_EQ(9u, T.countNodes());
+}
+
+// Test different types, less readable.
+
+template <typename T> struct TestData {
+ static const T Data1[];
+ static const T Data2[];
+};
+
+template <> const char TestData<char>::Data1[] = "abcdedcba";
+template <> const char TestData<char>::Data2[] = "abCDEDCba";
+
+template <> const int TestData<int>::Data1[] = {1, 2, 3, 4, 5, 4, 3, 2, 1};
+template <> const int TestData<int>::Data2[] = {1, 2, 4, 8, 16, 8, 4, 2, 1};
+
+template <typename T> class RadixTreeTypeTest : public ::testing::Test {
+public:
+ using IteratorType = decltype(adl_begin(std::declval<const T &>()));
+ using CharType = remove_cvref_t<decltype(*adl_begin(std::declval<T &>()))>;
+
+ T make(const CharType *Data, size_t N) { return T(StringRef(Data, N)); }
+
+ T make1(size_t N) { return make(TestData<CharType>::Data1, N); }
+ T make2(size_t N) { return make(TestData<CharType>::Data2, N); }
+};
+
+template <>
+iterator_range<StringRef::const_iterator>
+RadixTreeTypeTest<iterator_range<StringRef::const_iterator>>::make(
+ const char *Data, size_t N) {
+ return StringRef(Data).take_front(N);
+}
+
+template <>
+iterator_range<StringRef::const_reverse_iterator>
+RadixTreeTypeTest<iterator_range<StringRef::const_reverse_iterator>>::make(
+ const char *Data, size_t N) {
+ return reverse(StringRef(Data).take_back(N));
+}
+
+template <>
+ArrayRef<int> RadixTreeTypeTest<ArrayRef<int>>::make(const int *Data,
+ size_t N) {
+ return ArrayRef<int>(Data, Data + N);
+}
+
+template <>
+std::vector<int> RadixTreeTypeTest<std::vector<int>>::make(const int *Data,
+ size_t N) {
+ return std::vector<int>(Data, Data + N);
+}
+
+template <>
+std::list<int> RadixTreeTypeTest<std::list<int>>::make(const int *Data,
+ size_t N) {
+ return std::list<int>(Data, Data + N);
+}
+
+class TypeNameGenerator {
+public:
+ template <typename T> static std::string GetName(int) {
+ if (std::is_same_v<T, StringRef>)
+ return "StringRef";
+ if (std::is_same_v<T, std::string>)
+ return "string";
+ if (std::is_same_v<T, iterator_range<StringRef::const_iterator>>)
+ return "iterator_range";
+ if (std::is_same_v<T, iterator_range<StringRef::const_reverse_iterator>>)
+ return "reverse_iterator_range";
+ if (std::is_same_v<T, ArrayRef<int>>)
+ return "ArrayRef";
+ if (std::is_same_v<T, std::vector<int>>)
+ return "vector";
+ if (std::is_same_v<T, std::list<int>>)
+ return "list";
+ return "Unknown";
+ }
+};
+
+using TestTypes =
+ ::testing::Types<StringRef, std::string,
+ iterator_range<StringRef::const_iterator>,
+ iterator_range<StringRef::const_reverse_iterator>,
+ ArrayRef<int>, std::vector<int>, std::list<int>>;
+
+TYPED_TEST_SUITE(RadixTreeTypeTest, TestTypes, TypeNameGenerator);
+
+TYPED_TEST(RadixTreeTypeTest, Helpers) {
+ for (size_t i = 0; i < 9; ++i) {
+ auto R1 = this->make1(i);
+ auto R2 = this->make2(i);
+ EXPECT_EQ(i, llvm::range_size(R1));
+ EXPECT_EQ(i, llvm::range_size(R2));
+ auto [I1, I2] = llvm::mismatch(R1, R2);
+ // Exactly 2 first elements of Data1 and Data2 must match.
+ EXPECT_EQ(std::min<int>(2, i), std::distance(R1.begin(), I1));
+ }
+}
+
+TYPED_TEST(RadixTreeTypeTest, Empty) {
+ RadixTree<TypeParam, int> T;
+ EXPECT_TRUE(T.empty());
+ EXPECT_EQ(0u, T.size());
+
+ EXPECT_TRUE(T.find_prefixes(this->make1(0)).empty());
+ EXPECT_TRUE(T.find_prefixes(this->make2(1)).empty());
+
+ EXPECT_EQ(1u, T.countNodes());
+}
+
+TYPED_TEST(RadixTreeTypeTest, InsertEmpty) {
+ using TreeType = RadixTree<TypeParam, int>;
+ TreeType T;
+ auto [It, IsNew] = T.emplace(this->make1(0), 5);
+ EXPECT_TRUE(!T.empty());
+ EXPECT_EQ(1u, T.size());
+ EXPECT_TRUE(IsNew);
+ const auto &[K, V] = *It;
+ EXPECT_TRUE(K.empty());
+ EXPECT_EQ(5, V);
+
+ EXPECT_THAT(T.find_prefixes(this->make1(0)),
+ ElementsAre(Pair(ElementsAre(), 5)));
+
+ EXPECT_THAT(T.find_prefixes(this->make2(1)),
+ ElementsAre(Pair(ElementsAre(), 5)));
+
+ EXPECT_THAT(T, ElementsAre(Pair(ElementsAre(), 5)));
+
+ EXPECT_EQ(1u, T.countNodes());
+}
+
+TYPED_TEST(RadixTreeTypeTest, InsertEmptyTwice) {
+ using TreeType = RadixTree<TypeParam, int>;
+ TreeType T;
+ T.emplace(this->make1(0), 5);
+ auto [It, IsNew] = T.emplace(this->make1(0), 6);
+ EXPECT_TRUE(!T.empty());
+ EXPECT_EQ(1u, T.size());
+ EXPECT_TRUE(!IsNew);
+ const auto &[K, V] = *It;
+ EXPECT_TRUE(K.empty());
+ EXPECT_EQ(5, V);
+
+ EXPECT_THAT(T.find_prefixes(this->make1(0)),
+ ElementsAre(Pair(ElementsAre(), 5)));
+
+ EXPECT_THAT(T.find_prefixes(this->make2(1)),
+ ElementsAre(Pair(ElementsAre(), 5)));
+
+ EXPECT_THAT(T, ElementsAre(Pair(ElementsAre(), 5)));
+
+ EXPECT_EQ(1u, T.countNodes());
+}
+
+TYPED_TEST(RadixTreeTypeTest, InsertOne) {
+ using TreeType = RadixTree<TypeParam, int>;
+ TreeType T;
+ auto [It, IsNew] = T.emplace(this->make1(1), 4);
+ EXPECT_TRUE(!T.empty());
+ EXPECT_EQ(1u, T.size());
+ EXPECT_TRUE(IsNew);
+ const auto &[K, V] = *It;
+ EXPECT_THAT(K, ElementsAreArray(this->make1(1)));
+ EXPECT_EQ(4, V);
+
+ EXPECT_THAT(T, ElementsAre(Pair(ElementsAreArray(this->make1(1)), 4)));
+
+ EXPECT_THAT(T.find_prefixes(this->make1(1)),
+ ElementsAre(Pair(ElementsAreArray(this->make1(1)), 4)));
+
+ EXPECT_THAT(T.find_prefixes(this->make1(2)),
+ ElementsAre(Pair(ElementsAreArray(this->make1(1)), 4)));
+
+ EXPECT_EQ(2u, T.countNodes());
+}
+
+TYPED_TEST(RadixTreeTypeTest, InsertOneTwice) {
+ using TreeType = RadixTree<TypeParam, int>;
+ TreeType T;
+ T.emplace(this->make1(1), 4);
+ auto [It, IsNew] = T.emplace(this->make1(1), 4);
+ EXPECT_TRUE(!T.empty());
+ EXPECT_EQ(1u, T.size());
+ EXPECT_TRUE(!IsNew);
+
+ EXPECT_THAT(T, ElementsAre(Pair(ElementsAreArray(this->make1(1)), 4)));
+ EXPECT_EQ(2u, T.countNodes());
+}
+
+TYPED_TEST(RadixTreeTypeTest, InsertSuperStrings) {
+ using TreeType = RadixTree<TypeParam, int>;
+ TreeType T;
+
+ for (size_t Len = 0; Len < 7; Len += 2) {
+ auto [It, IsNew] = T.emplace(this->make1(Len), Len);
+ EXPECT_TRUE(IsNew);
+ }
+
+ EXPECT_THAT(T,
+ UnorderedElementsAre(Pair(ElementsAreArray(this->make1(0)), 0),
+ Pair(ElementsAreArray(this->make1(2)), 2),
+ Pair(ElementsAreArray(this->make1(4)), 4),
+ Pair(ElementsAreArray(this->make1(6)), 6)));
+
+ EXPECT_THAT(T.find_prefixes(this->make1(0)),
+ UnorderedElementsAre(Pair(ElementsAreArray(this->make1(0)), 0)));
+
+ EXPECT_THAT(T.find_prefixes(this->make1(3)),
+ UnorderedElementsAre(Pair(ElementsAreArray(this->make1(0)), 0),
+ Pair(ElementsAreArray(this->make1(2)), 2)));
+
+ EXPECT_THAT(T.find_prefixes(this->make1(7)),
+ UnorderedElementsAre(Pair(ElementsAreArray(this->make1(0)), 0),
+ Pair(ElementsAreArray(this->make1(2)), 2),
+ Pair(ElementsAreArray(this->make1(4)), 4),
+ Pair(ElementsAreArray(this->make1(6)), 6)));
+
+ EXPECT_EQ(4u, T.countNodes());
+}
+
+TYPED_TEST(RadixTreeTypeTest, InsertSubStrings) {
+ using TreeType = RadixTree<TypeParam, int>;
+ TreeType T;
+
+ for (size_t Len = 0; Len < 7; Len += 2) {
+ auto [It, IsNew] = T.emplace(this->make1(7 - Len), 7 - Len);
+ EXPECT_TRUE(IsNew);
+ }
+
+ EXPECT_THAT(T,
+ UnorderedElementsAre(Pair(ElementsAreArray(this->make1(1)), 1),
+ Pair(ElementsAreArray(this->make1(3)), 3),
+ Pair(ElementsAreArray(this->make1(5)), 5),
+ Pair(ElementsAreArray(this->make1(7)), 7)));
+
+ EXPECT_THAT(T.find_prefixes(this->make1(0)), UnorderedElementsAre());
+
+ EXPECT_THAT(T.find_prefixes(this->make1(3)),
+ UnorderedElementsAre(Pair(ElementsAreArray(this->make1(1)), 1),
+ Pair(ElementsAreArray(this->make1(3)), 3)));
+
+ EXPECT_THAT(T.find_prefixes(this->make1(6)),
+ UnorderedElementsAre(Pair(ElementsAreArray(this->make1(1)), 1),
+ Pair(ElementsAreArray(this->make1(3)), 3),
+ Pair(ElementsAreArray(this->make1(5)), 5)));
+
+ EXPECT_EQ(5u, T.countNodes());
+}
+
+TYPED_TEST(RadixTreeTypeTest, InsertVShape) {
+ using TreeType = RadixTree<TypeParam, int>;
+ TreeType T;
+
+ EXPECT_EQ(1u, T.countNodes());
+ T.emplace(this->make1(5), 15);
+ EXPECT_EQ(2u, T.countNodes());
+ T.emplace(this->make2(6), 26);
+ EXPECT_EQ(4u, T.countNodes());
+ T.emplace(this->make2(1), 21);
+ EXPECT_EQ(5u, T.countNodes());
+
+ EXPECT_THAT(T,
+ UnorderedElementsAre(Pair(ElementsAreArray(this->make1(5)), 15),
+ Pair(ElementsAreArray(this->make2(6)), 26),
+ Pair(ElementsAreArray(this->make2(1)), 21)));
+
+ EXPECT_THAT(T.find_prefixes(this->make1(7)),
+ UnorderedElementsAre(Pair(ElementsAreArray(this->make2(1)), 21),
+ Pair(ElementsAreArray(this->make1(5)), 15)));
+
+ EXPECT_THAT(T.find_prefixes(this->make2(7)),
+ UnorderedElementsAre(Pair(ElementsAreArray(this->make2(1)), 21),
+ Pair(ElementsAreArray(this->make2(6)), 26)));
+
+ EXPECT_EQ(5u, T.countNodes());
+}
+
+} // namespace
>From a3e5a1347f1ebe4a42fa90acc757d467caf40466 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Thu, 23 Oct 2025 00:32:35 -0700
Subject: [PATCH 2/5] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?=
=?UTF-8?q?anges=20introduced=20through=20rebase?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.7
[skip ci]
---
llvm/docs/ProgrammersManual.rst | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/llvm/docs/ProgrammersManual.rst b/llvm/docs/ProgrammersManual.rst
index 9cdac9c59fa9b..26865e4f671d7 100644
--- a/llvm/docs/ProgrammersManual.rst
+++ b/llvm/docs/ProgrammersManual.rst
@@ -2161,6 +2161,16 @@ that are not simple pointers (use :ref:`SmallPtrSet <dss_smallptrset>` for
pointers). Note that ``DenseSet`` has the same requirements for the value type that
:ref:`DenseMap <dss_densemap>` has.
+.. _dss_radixtree:
+
+llvm/ADT/RadixTree.h
+^^^^^^^^^^^^^^^^^^^^
+
+``RadixTree`` is a trie-based data structure that stores range like keys and
+their associated values. It is particularly efficient for storing keys that
+share common prefixes, as it can compress these prefixes to save memory. It
+supports efficient search of matching prefixes.
+
.. _dss_sparseset:
llvm/ADT/SparseSet.h
>From 7fbe75b0b3c6af24fee60cb755c2be0fb7b28794 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Thu, 23 Oct 2025 23:20:07 -0700
Subject: [PATCH 3/5] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?=
=?UTF-8?q?anges=20introduced=20through=20rebase?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.7
[skip ci]
---
llvm/unittests/ADT/RadixTreeTest.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/unittests/ADT/RadixTreeTest.cpp b/llvm/unittests/ADT/RadixTreeTest.cpp
index c9fd44e3497ff..2c92e9d939852 100644
--- a/llvm/unittests/ADT/RadixTreeTest.cpp
+++ b/llvm/unittests/ADT/RadixTreeTest.cpp
@@ -1,4 +1,4 @@
-//===- llvm/unittest/ADT/RadixTreeTypeTest.cpp ------------------------===//
+//===- llvm/unittest/ADT/RadixTreeTest.cpp --------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
>From d9944aaf92fcee109368c8cb07129f0bbfc09ff5 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Fri, 24 Oct 2025 22:02:39 -0700
Subject: [PATCH 4/5] Update llvm/lib/Support/SpecialCaseList.cpp
Co-authored-by: Copilot <175728472+Copilot at users.noreply.github.com>
---
llvm/lib/Support/SpecialCaseList.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index 567cc9421020c..041626876d1fb 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -104,7 +104,7 @@ void SpecialCaseList::GlobMatcher::match(
if (!PrefixToGlob.empty()) {
for (const auto &[_, V] : PrefixToGlob.find_prefixes(Query)) {
for (const auto *G : V) {
- // Each value of the map is vector of globs sorted as from best to
+ // Each value of the map is a vector of globs sorted as from best to
// worst.
if (G->Pattern.match(Query)) {
Cb(G->Name, G->LineNo);
>From 40cdcee733c5f7be511c9240ce8b4762b15e8ad7 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Fri, 24 Oct 2025 22:02:49 -0700
Subject: [PATCH 5/5] Update llvm/lib/Support/SpecialCaseList.cpp
Co-authored-by: Copilot <175728472+Copilot at users.noreply.github.com>
---
llvm/lib/Support/SpecialCaseList.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index 041626876d1fb..642ec34ce43e1 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -108,7 +108,7 @@ void SpecialCaseList::GlobMatcher::match(
// worst.
if (G->Pattern.match(Query)) {
Cb(G->Name, G->LineNo);
- // As soon as we find match in the vector we can break for the vector,
+ // As soon as we find a match in the vector we can break for the vector,
// but we still need to continue for other values in the map, as they
// may contain a better match.
break;
More information about the llvm-commits
mailing list