[libc-commits] [libc] [libc][tsearch] use weak AVL tree for tsearch implementation (PR #172411)
Schrodinger ZHU Yifan via libc-commits
libc-commits at lists.llvm.org
Tue Dec 16 04:49:17 PST 2025
https://github.com/SchrodingerZhu updated https://github.com/llvm/llvm-project/pull/172411
>From 1588de4b6f0da428155f4175f23043c292fb6575 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Mon, 15 Dec 2025 03:09:11 -0500
Subject: [PATCH 01/13] [libc][tsearch] scaffold WAVL header
---
libc/src/__support/CMakeLists.txt | 9 +++++++++
libc/src/__support/weak_avl.h | 19 +++++++++++++++++++
2 files changed, 28 insertions(+)
create mode 100644 libc/src/__support/weak_avl.h
diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt
index c7f127d6934a0..99a1641d629f3 100644
--- a/libc/src/__support/CMakeLists.txt
+++ b/libc/src/__support/CMakeLists.txt
@@ -391,6 +391,15 @@ add_header_library(
libc.src.__support.macros.attributes
)
+add_header_library(
+ weak_avl
+ HDRS
+ weak_avl.h
+ DEPENDS
+ libc.src.__support.CPP.new
+ libc.src.__support.macros.config
+)
+
add_subdirectory(FPUtil)
add_subdirectory(OSUtil)
add_subdirectory(StringUtil)
diff --git a/libc/src/__support/weak_avl.h b/libc/src/__support/weak_avl.h
new file mode 100644
index 0000000000000..817146e8b9ab9
--- /dev/null
+++ b/libc/src/__support/weak_avl.h
@@ -0,0 +1,19 @@
+//===-- Implementation header for weak AVL tree -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_WEAK_AVL_H
+#define LLVM_LIBC_SRC___SUPPORT_WEAK_AVL_H
+
+#include "src/__support/CPP/new.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_WEAK_AVL_H
>From c58c5373397e2c3353e934edbe8a51d0ee5c80be Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Mon, 15 Dec 2025 16:42:22 -0500
Subject: [PATCH 02/13] stage
---
libc/src/__support/weak_avl.h | 97 +++++++++++++++++++++++++++++++++++
1 file changed, 97 insertions(+)
diff --git a/libc/src/__support/weak_avl.h b/libc/src/__support/weak_avl.h
index 817146e8b9ab9..552f6bf4e47be 100644
--- a/libc/src/__support/weak_avl.h
+++ b/libc/src/__support/weak_avl.h
@@ -5,15 +5,112 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
+// Weak AVL tree implementation based on the algorithm described in:
+// 1. https://maskray.me/blog/2025-12-14-weak-avl-tree
+// 2. https://reviews.freebsd.org/D25480
+//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC___SUPPORT_WEAK_AVL_H
#define LLVM_LIBC_SRC___SUPPORT_WEAK_AVL_H
+#include "hdr/stdint_proxy.h"
+#include "src/__support/CPP/bit.h"
#include "src/__support/CPP/new.h"
+#include "src/__support/CPP/utility/move.h"
+#include "src/__support/macros/attributes.h"
#include "src/__support/macros/config.h"
namespace LIBC_NAMESPACE_DECL {
+namespace wavl {
+struct WeakAVLDefaultCompare {
+ template <typename T>
+ LIBC_INLINE bool operator()(const T &a, const T &b) const {
+ return a < b;
+ }
+};
+
+template <typename T, typename Compare = WeakAVLDefaultCompare>
+class WeakAVLNode {
+ // Data
+ T data;
+
+ // Packs the parent pointer with 2 flag bits in the low bits. Bit 0 indicates
+ // whether the left child has rank difference 2; bit 1 indicates whether the
+ // right child has rank difference 2. A cleared bit means rank difference 1.
+ uintptr_t parent_and_flags;
+
+ // Children pointers
+ WeakAVLNode *children[2];
+
+ // Constants
+ static LIBC_INLINE_VAR constexpr uintptr_t FLAGS_MASK = 0b11;
+ static LIBC_INLINE_VAR constexpr uintptr_t LEFT_FLAG_BIT = 0b01;
+ static LIBC_INLINE_VAR constexpr uintptr_t RIGHT_FLAG_BIT = 0b10;
+ static_assert(alignof(WeakAVLNode) >= 4,
+ "WeakAVLNode alignment must be at least 4 to store flags.");
+
+ // Auxiliary methods for accessing fields
+ LIBC_INLINE WeakAVLNode *parent() const {
+ return cpp::bit_cast<WeakAVLNode *>(parent_and_flags & ~FLAGS_MASK);
+ }
+ LIBC_INLINE uintptr_t flags() const { return parent_and_flags & FLAGS_MASK; }
+ LIBC_INLINE void set_parent(WeakAVLNode *p) {
+ parent_and_flags = cpp::bit_cast<uintptr_t>(p) | flags();
+ }
+ LIBC_INLINE bool has_rank_diff_2(bool is_right) const {
+ return flags() & (is_right ? RIGHT_FLAG_BIT : LEFT_FLAG_BIT);
+ }
+ LIBC_INLINE void toggle_rank_diff_2(bool is_right) {
+ parent_and_flags ^= (is_right ? RIGHT_FLAG_BIT : LEFT_FLAG_BIT);
+ }
+ LIBC_INLINE void clear_flags() { parent_and_flags &= ~FLAGS_MASK; }
+ LIBC_INLINE bool operator<(const WeakAVLNode &other) const {
+ return Compare{}(data, other.data);
+ }
+
+ LIBC_INLINE WeakAVLNode(T data)
+ : data(cpp::move(data)), parent_and_flags(0), children{nullptr, nullptr} {
+ }
+
+public:
+ LIBC_INLINE static WeakAVLNode *create(T value) {
+ AllocChecker ac;
+ WeakAVLNode *res = ::new (ac) WeakAVLNode(value);
+ if (ac)
+ return res;
+ return nullptr;
+ }
+ LIBC_INLINE static void destroy(WeakAVLNode *node) {
+ if (!node)
+ return;
+ destroy(node->children[0]);
+ destroy(node->children[1]);
+ ::delete node;
+ }
+ LIBC_INLINE static WeakAVLNode *rotate(WeakAVLNode *&root, WeakAVLNode *node,
+ bool is_right) {
+ WeakAVLNode *pivot = node->children[is_right];
+ // Handover pivot's child
+ WeakAVLNode *grandchild = pivot->children[!is_right];
+ node->children[is_right] = grandchild;
+ if (grandchild)
+ grandchild->set_parent(node);
+ pivot->set_parent(node->parent());
+ // Pivot becomes the new root of the subtree
+ if (!node->parent())
+ root = pivot;
+ else {
+ bool node_is_right = node->parent()->children[1] == node;
+ node->parent()->children[node_is_right] = pivot;
+ }
+ pivot->children[!is_right] = node;
+ node->set_parent(pivot);
+ return pivot;
+ }
+};
+} // namespace wavl
+
} // namespace LIBC_NAMESPACE_DECL
#endif // LLVM_LIBC_SRC___SUPPORT_WEAK_AVL_H
>From 1b6a1d6f689144c41d915dbd8ec874f4adc9300c Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Mon, 15 Dec 2025 17:01:06 -0500
Subject: [PATCH 03/13] stage
---
libc/src/__support/weak_avl.h | 3 +++
1 file changed, 3 insertions(+)
diff --git a/libc/src/__support/weak_avl.h b/libc/src/__support/weak_avl.h
index 552f6bf4e47be..965dc69e4c530 100644
--- a/libc/src/__support/weak_avl.h
+++ b/libc/src/__support/weak_avl.h
@@ -108,6 +108,9 @@ class WeakAVLNode {
node->set_parent(pivot);
return pivot;
}
+ LIBC_INLINE static void insert(WeakAVLNode*& root, WeakAVLNode* node) {
+
+ }
};
} // namespace wavl
>From e3b68949d5dfbf18b3097cbe78421f16b73e23eb Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Mon, 15 Dec 2025 22:52:14 -0500
Subject: [PATCH 04/13] stage the work
---
libc/src/__support/CMakeLists.txt | 5 +
libc/src/__support/weak_avl.h | 172 +++++++++++++++++++---
libc/test/src/__support/CMakeLists.txt | 10 ++
libc/test/src/__support/weak_avl_test.cpp | 128 ++++++++++++++++
4 files changed, 296 insertions(+), 19 deletions(-)
create mode 100644 libc/test/src/__support/weak_avl_test.cpp
diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt
index 99a1641d629f3..bde12abaa291e 100644
--- a/libc/src/__support/CMakeLists.txt
+++ b/libc/src/__support/CMakeLists.txt
@@ -396,7 +396,12 @@ add_header_library(
HDRS
weak_avl.h
DEPENDS
+ libc.hdr.stdint_proxy
+ libc.src.__support.CPP.bit
libc.src.__support.CPP.new
+ libc.src.__support.CPP.utility
+ libc.src.__support.libc_assert
+ libc.src.__support.macros.attributes
libc.src.__support.macros.config
)
diff --git a/libc/src/__support/weak_avl.h b/libc/src/__support/weak_avl.h
index 965dc69e4c530..aa38199cc1bfa 100644
--- a/libc/src/__support/weak_avl.h
+++ b/libc/src/__support/weak_avl.h
@@ -17,27 +17,20 @@
#include "src/__support/CPP/bit.h"
#include "src/__support/CPP/new.h"
#include "src/__support/CPP/utility/move.h"
+#include "src/__support/libc_assert.h"
#include "src/__support/macros/attributes.h"
#include "src/__support/macros/config.h"
namespace LIBC_NAMESPACE_DECL {
-namespace wavl {
-struct WeakAVLDefaultCompare {
- template <typename T>
- LIBC_INLINE bool operator()(const T &a, const T &b) const {
- return a < b;
- }
-};
-
-template <typename T, typename Compare = WeakAVLDefaultCompare>
-class WeakAVLNode {
+template <typename T> class WeakAVLNode {
// Data
T data;
// Packs the parent pointer with 2 flag bits in the low bits. Bit 0 indicates
// whether the left child has rank difference 2; bit 1 indicates whether the
// right child has rank difference 2. A cleared bit means rank difference 1.
+ // All rank differences are 1 or 2, and every leaf has rank 0.
uintptr_t parent_and_flags;
// Children pointers
@@ -47,8 +40,6 @@ class WeakAVLNode {
static LIBC_INLINE_VAR constexpr uintptr_t FLAGS_MASK = 0b11;
static LIBC_INLINE_VAR constexpr uintptr_t LEFT_FLAG_BIT = 0b01;
static LIBC_INLINE_VAR constexpr uintptr_t RIGHT_FLAG_BIT = 0b10;
- static_assert(alignof(WeakAVLNode) >= 4,
- "WeakAVLNode alignment must be at least 4 to store flags.");
// Auxiliary methods for accessing fields
LIBC_INLINE WeakAVLNode *parent() const {
@@ -65,15 +56,13 @@ class WeakAVLNode {
parent_and_flags ^= (is_right ? RIGHT_FLAG_BIT : LEFT_FLAG_BIT);
}
LIBC_INLINE void clear_flags() { parent_and_flags &= ~FLAGS_MASK; }
- LIBC_INLINE bool operator<(const WeakAVLNode &other) const {
- return Compare{}(data, other.data);
+ LIBC_INLINE void set_flags(uintptr_t flags) {
+ parent_and_flags |= (FLAGS_MASK & flags);
}
-
LIBC_INLINE WeakAVLNode(T data)
: data(cpp::move(data)), parent_and_flags(0), children{nullptr, nullptr} {
}
-public:
LIBC_INLINE static WeakAVLNode *create(T value) {
AllocChecker ac;
WeakAVLNode *res = ::new (ac) WeakAVLNode(value);
@@ -81,6 +70,16 @@ class WeakAVLNode {
return res;
return nullptr;
}
+
+public:
+ LIBC_INLINE const WeakAVLNode *get_left() const { return children[0]; }
+ LIBC_INLINE const WeakAVLNode *get_right() const { return children[1]; }
+ LIBC_INLINE const T &get_data() const { return data; }
+ LIBC_INLINE bool is_rank_diff_2(bool is_right) const {
+ return has_rank_diff_2(is_right);
+ }
+
+ // Destroy the subtree rooted at node
LIBC_INLINE static void destroy(WeakAVLNode *node) {
if (!node)
return;
@@ -88,6 +87,16 @@ class WeakAVLNode {
destroy(node->children[1]);
::delete node;
}
+ // Rotate the subtree rooted at node in the given direction.
+ //
+ // Illustration for is_right = true (Left Rotation):
+ //
+ // (Node) (Pivot)
+ // / \ / \
+ // A (Pivot) => (Node) C
+ // / \ / \
+ // B C A B
+ //
LIBC_INLINE static WeakAVLNode *rotate(WeakAVLNode *&root, WeakAVLNode *node,
bool is_right) {
WeakAVLNode *pivot = node->children[is_right];
@@ -108,11 +117,136 @@ class WeakAVLNode {
node->set_parent(pivot);
return pivot;
}
- LIBC_INLINE static void insert(WeakAVLNode*& root, WeakAVLNode* node) {
-
+ // Insert data into the subtree rooted at root.
+ // Returns the node if insertion is successful or the node exists in
+ // the tree.
+ // Returns nullptr if memory allocation fails.
+ // `Compare` returns integer values for ternary comparison.
+ template <typename Compare>
+ LIBC_INLINE static WeakAVLNode *find_or_insert(WeakAVLNode *&root, T data,
+ Compare &&comp) {
+ WeakAVLNode *parent = nullptr, *cursor = root;
+ bool is_right = false;
+ while (cursor != nullptr) {
+ parent = cursor;
+ int comp_result = comp(parent->data, data);
+ if (comp_result == 0)
+ return parent; // Node already exists
+ is_right = comp_result < 0;
+ cursor = cursor->children[is_right];
+ }
+ WeakAVLNode *allocated = create(cpp::move(data));
+ if (!allocated)
+ return nullptr;
+ WeakAVLNode *node = allocated;
+ node->set_parent(parent);
+
+ // Case 0: inserting into an empty tree
+ if (!parent) {
+ root = node; // Tree was empty
+ return node;
+ }
+
+ parent->children[is_right] = node;
+ // Rebalance process
+ while (parent) {
+ // Case 1: parent does not need to be promoted as node is lowering
+ // than the parent by 2 ranks.
+ // (P) (P)
+ // / \ / \
+ // 2 1 => 1 1
+ // / \ / \
+ // (N) (*) (N) (*)
+ if (parent->has_rank_diff_2(is_right)) {
+ parent->toggle_rank_diff_2(is_right);
+ break;
+ }
+
+ bool sibling_has_rank_diff_2 = parent->has_rank_diff_2(!is_right);
+ // Case 2: node's sibling has rank-difference 1.
+ // Promoting parent will fix the conflict of the trinodes but we may need
+ // to continue on parent.
+ //
+ // (GP) (GP)
+ // | Promote | x - 1
+ // | x -----> (P)
+ // 0 | / 1 / \
+ // (N) --- (P) ---- (N) \ 2
+ // \ 1 \
+ // (S) (S)
+ if (!sibling_has_rank_diff_2) {
+ parent->toggle_rank_diff_2(!is_right);
+ node = parent;
+ parent = node->parent();
+ continue;
+ }
+
+ LIBC_ASSERT((node->flags() != 0b11) &&
+ "there should be no 2-2 nodes in a weak AVL tree");
+
+ LIBC_ASSERT((node == allocated || node->flags() != 0) &&
+ "Internal node must have a child with rank-difference 2, "
+ "otherwise it should have already been handled.");
+
+ bool node_is_right = (node == parent->children[1]);
+ // Case 3: node's sibling has rank-difference 2. And node has a 1-node
+ // along the same direction. We can do a single rotation to fix the
+ // trinode.
+ // (GP) (GP)
+ // 0 | X Rotate |
+ // (N) ----- (P) => (N)
+ // 1 / \ 2 \ 2 1 / \ 1
+ // (C1) \ \ (C1) (P)
+ // (C2) (S) 1 / \ 1
+ // (C2) (S)
+ if (node->has_rank_diff_2(!node_is_right)) {
+ WeakAVLNode *new_subroot = rotate(root, parent, node_is_right);
+ new_subroot->clear_flags();
+ parent->clear_flags();
+ break;
+ }
+ // Case 4: node's sibling has rank-difference 2. And node has a 1-node
+ // along the opposite direction. We need a double rotation to fix the
+ // trinode.
+ // (GP) (GP)
+ // 0 | X Zig-Zag | X
+ // (N) ----- (P) => (C1)
+ // 2 / \ 1 \ 2 1 / \ 1
+ // / (C1) \ (N) (P)
+ // (C2) L / \ R (S) 1 / \ L R / \ 1
+ // (A) (B) (C2) (A)(B) (S)
+ // (mirrored)
+ // (GP) (GP)
+ // X | 0 Zig-Zag | X
+ // (P) ----- (N) => (C1)
+ // 2 / 1 / \ 2 1 / \ 1
+ // / (C1) \ (P) (N)
+ // (S) L / \ R (C2) 1 / \ L R / \ 1
+ // (A) (B) (S)(A) (B)(C2)
+ WeakAVLNode *subroot1 =
+ rotate(root, node, !node_is_right); // First rotation
+ [[maybe_unused]] WeakAVLNode *subroot2 =
+ rotate(root, parent, node_is_right); // Second rotation
+ LIBC_ASSERT(subroot1 == subroot2 &&
+ "Subroots after double rotation should be the same");
+ uintptr_t flags = subroot1->flags();
+ node->clear_flags();
+ parent->clear_flags();
+ subroot1->clear_flags();
+ // Select destinations
+ WeakAVLNode *dst_left = node_is_right ? parent : node;
+ WeakAVLNode *dst_right = node_is_right ? node : parent;
+ // Masked toggles
+ if (flags & LEFT_FLAG_BIT)
+ dst_left->toggle_rank_diff_2(true);
+
+ if (flags & RIGHT_FLAG_BIT)
+ dst_right->toggle_rank_diff_2(false);
+ break;
+ }
+ return allocated;
}
};
-} // namespace wavl
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt
index 138866b4cc869..97ee42347c9f3 100644
--- a/libc/test/src/__support/CMakeLists.txt
+++ b/libc/test/src/__support/CMakeLists.txt
@@ -280,6 +280,16 @@ add_libc_test(
libc.src.__support.CPP.bit
)
+add_libc_test(
+ weak_avl_test
+ SUITE
+ libc-support-tests
+ SRCS
+ weak_avl_test.cpp
+ DEPENDS
+ libc.src.__support.weak_avl
+)
+
add_subdirectory(CPP)
add_subdirectory(File)
add_subdirectory(RPC)
diff --git a/libc/test/src/__support/weak_avl_test.cpp b/libc/test/src/__support/weak_avl_test.cpp
new file mode 100644
index 0000000000000..daddc047ab9f0
--- /dev/null
+++ b/libc/test/src/__support/weak_avl_test.cpp
@@ -0,0 +1,128 @@
+//===-- Unittests for WeakAVL ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/weak_avl.h"
+#include "test/UnitTest/Test.h"
+
+using Node = LIBC_NAMESPACE::WeakAVLNode<int>;
+
+namespace {
+
+// Validate weak-AVL rank-difference invariant:
+// no node may have rank-difference 2 on both sides.
+bool validate(const Node *node) {
+ if (!node)
+ return true;
+ bool left_2 = node->is_rank_diff_2(false);
+ bool right_2 = node->is_rank_diff_2(true);
+ return (!left_2 || !right_2) && validate(node->get_left()) &&
+ validate(node->get_right());
+}
+
+// Insert according to pattern `next(i)`
+using NextFn = int (*)(int);
+
+static Node *build_tree(NextFn next, int N, int (*compare)(int, int)) {
+ Node *root = nullptr;
+ for (int i = 0; i < N; ++i)
+ Node::find_or_insert(root, next(i), compare);
+ return root;
+}
+
+// Insertion patterns
+
+static int seq(int i) { return i; }
+
+static int rev(int i) {
+ constexpr int N = 1000;
+ return N - 1 - i;
+}
+
+// Coprime stride permutation: i -> (i * X) % N
+static int stride(int i) {
+ constexpr int N = 1000;
+ constexpr int X = 7919; // gcd(X, N) = 1
+ return (i * X) % N;
+}
+
+} // namespace
+
+TEST(LlvmLibcWeakAVLTest, SimpleInsertion) {
+ Node *root = nullptr;
+ auto compare = [](int a, int b) { return a - b; };
+
+ Node *node10 = Node::find_or_insert(root, 10, compare);
+ ASSERT_TRUE(node10 != nullptr);
+ ASSERT_EQ(root, node10);
+ ASSERT_TRUE(validate(root));
+
+ Node *node5 = Node::find_or_insert(root, 5, compare);
+ ASSERT_TRUE(node5 != nullptr);
+ ASSERT_TRUE(validate(root));
+
+ Node *node15 = Node::find_or_insert(root, 15, compare);
+ ASSERT_TRUE(node15 != nullptr);
+ ASSERT_TRUE(validate(root));
+
+ Node *node10_again = Node::find_or_insert(root, 10, compare);
+ ASSERT_EQ(node10, node10_again);
+ ASSERT_TRUE(validate(root));
+
+ Node::destroy(root);
+}
+
+TEST(LlvmLibcWeakAVLTest, SequentialInsertion) {
+ auto compare = [](int a, int b) { return a - b; };
+ constexpr int N = 1000;
+
+ Node *root = build_tree(seq, N, compare);
+ ASSERT_TRUE(validate(root));
+
+ for (int i = 0; i < N; ++i) {
+ Node *node = Node::find_or_insert(root, i, compare);
+ ASSERT_TRUE(node != nullptr);
+ ASSERT_EQ(node->get_data(), i);
+ }
+
+ ASSERT_TRUE(validate(root));
+ Node::destroy(root);
+}
+
+TEST(LlvmLibcWeakAVLTest, ReversedInsertion) {
+ auto compare = [](int a, int b) { return a - b; };
+ constexpr int N = 1000;
+
+ Node *root = build_tree(rev, N, compare);
+ ASSERT_TRUE(validate(root));
+
+ for (int i = 0; i < N; ++i) {
+ Node *node = Node::find_or_insert(root, i, compare);
+ ASSERT_TRUE(node != nullptr);
+ ASSERT_EQ(node->get_data(), i);
+ }
+
+ ASSERT_TRUE(validate(root));
+ Node::destroy(root);
+}
+
+TEST(LlvmLibcWeakAVLTest, StridedInsertion) {
+ auto compare = [](int a, int b) { return a - b; };
+ constexpr int N = 1000;
+
+ Node *root = build_tree(stride, N, compare);
+ ASSERT_TRUE(validate(root));
+
+ for (int i = 0; i < N; ++i) {
+ Node *node = Node::find_or_insert(root, i, compare);
+ ASSERT_TRUE(node != nullptr);
+ ASSERT_EQ(node->get_data(), i);
+ }
+
+ ASSERT_TRUE(validate(root));
+ Node::destroy(root);
+}
>From 5640d0fb67d07611a2ef98eb3413c794e9b6a35a Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Mon, 15 Dec 2025 23:04:49 -0500
Subject: [PATCH 05/13] stage the work
---
libc/src/__support/weak_avl.h | 15 +++++++--------
1 file changed, 7 insertions(+), 8 deletions(-)
diff --git a/libc/src/__support/weak_avl.h b/libc/src/__support/weak_avl.h
index aa38199cc1bfa..492e8df0e32d9 100644
--- a/libc/src/__support/weak_avl.h
+++ b/libc/src/__support/weak_avl.h
@@ -150,6 +150,7 @@ template <typename T> class WeakAVLNode {
parent->children[is_right] = node;
// Rebalance process
while (parent) {
+ is_right = (parent->children[1] == node);
// Case 1: parent does not need to be promoted as node is lowering
// than the parent by 2 ranks.
// (P) (P)
@@ -188,7 +189,6 @@ template <typename T> class WeakAVLNode {
"Internal node must have a child with rank-difference 2, "
"otherwise it should have already been handled.");
- bool node_is_right = (node == parent->children[1]);
// Case 3: node's sibling has rank-difference 2. And node has a 1-node
// along the same direction. We can do a single rotation to fix the
// trinode.
@@ -199,8 +199,8 @@ template <typename T> class WeakAVLNode {
// (C1) \ \ (C1) (P)
// (C2) (S) 1 / \ 1
// (C2) (S)
- if (node->has_rank_diff_2(!node_is_right)) {
- WeakAVLNode *new_subroot = rotate(root, parent, node_is_right);
+ if (node->has_rank_diff_2(!is_right)) {
+ WeakAVLNode *new_subroot = rotate(root, parent, is_right);
new_subroot->clear_flags();
parent->clear_flags();
break;
@@ -223,10 +223,9 @@ template <typename T> class WeakAVLNode {
// / (C1) \ (P) (N)
// (S) L / \ R (C2) 1 / \ L R / \ 1
// (A) (B) (S)(A) (B)(C2)
- WeakAVLNode *subroot1 =
- rotate(root, node, !node_is_right); // First rotation
+ WeakAVLNode *subroot1 = rotate(root, node, !is_right); // First rotation
[[maybe_unused]] WeakAVLNode *subroot2 =
- rotate(root, parent, node_is_right); // Second rotation
+ rotate(root, parent, is_right); // Second rotation
LIBC_ASSERT(subroot1 == subroot2 &&
"Subroots after double rotation should be the same");
uintptr_t flags = subroot1->flags();
@@ -234,8 +233,8 @@ template <typename T> class WeakAVLNode {
parent->clear_flags();
subroot1->clear_flags();
// Select destinations
- WeakAVLNode *dst_left = node_is_right ? parent : node;
- WeakAVLNode *dst_right = node_is_right ? node : parent;
+ WeakAVLNode *dst_left = is_right ? parent : node;
+ WeakAVLNode *dst_right = is_right ? node : parent;
// Masked toggles
if (flags & LEFT_FLAG_BIT)
dst_left->toggle_rank_diff_2(true);
>From 4b58c2bf2a2a5b008c262f4093d7ccf497374550 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Tue, 16 Dec 2025 01:01:33 -0500
Subject: [PATCH 06/13] update ref
---
libc/src/__support/weak_avl.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/libc/src/__support/weak_avl.h b/libc/src/__support/weak_avl.h
index 492e8df0e32d9..7ce862d6d78b3 100644
--- a/libc/src/__support/weak_avl.h
+++ b/libc/src/__support/weak_avl.h
@@ -8,6 +8,7 @@
// Weak AVL tree implementation based on the algorithm described in:
// 1. https://maskray.me/blog/2025-12-14-weak-avl-tree
// 2. https://reviews.freebsd.org/D25480
+// 3. https://ics.uci.edu/~goodrich/teach/cs165/notes/WeakAVLTrees.pdf
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC___SUPPORT_WEAK_AVL_H
>From 287cfe8d88e3ac35fe378856cd0fefc4e82f0a33 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Tue, 16 Dec 2025 05:04:29 -0500
Subject: [PATCH 07/13] update
---
libc/src/__support/weak_avl.h | 225 +++++++++++++++++++++++++++++++++-
1 file changed, 224 insertions(+), 1 deletion(-)
diff --git a/libc/src/__support/weak_avl.h b/libc/src/__support/weak_avl.h
index 7ce862d6d78b3..0b46f9179ab12 100644
--- a/libc/src/__support/weak_avl.h
+++ b/libc/src/__support/weak_avl.h
@@ -21,6 +21,7 @@
#include "src/__support/libc_assert.h"
#include "src/__support/macros/attributes.h"
#include "src/__support/macros/config.h"
+#include <cstdint>
namespace LIBC_NAMESPACE_DECL {
@@ -60,6 +61,9 @@ template <typename T> class WeakAVLNode {
LIBC_INLINE void set_flags(uintptr_t flags) {
parent_and_flags |= (FLAGS_MASK & flags);
}
+ LIBC_INLINE void is_external() {
+ return (children[0] == nullptr) && (children[1] == nullptr);
+ }
LIBC_INLINE WeakAVLNode(T data)
: data(cpp::move(data)), parent_and_flags(0), children{nullptr, nullptr} {
}
@@ -72,6 +76,76 @@ template <typename T> class WeakAVLNode {
return nullptr;
}
+ // Unlink a node from tree. The corresponding flag is not update. The node is
+ // not deleted and its pointers are not cleared.
+ // FixupSite is the lowest surviving node from which rank/flag invariants may
+ // be violated.
+ struct FixupSite {
+ WeakAVLNode *parent;
+ bool is_right;
+ };
+ LIBC_INLINE static FixupSite unlink(WeakAVLNode *&root, WeakAVLNode *node) {
+ bool has_left = node->children[0] != nullptr;
+ bool has_right = node->children[1] != nullptr;
+
+ // Case 0. node has no children
+ if (!has_left && !has_right) {
+ if (node->parent() == nullptr) {
+ root = nullptr; // Tree becomes empty
+ return {nullptr, false};
+ }
+ FixupSite site = {node->parent(), node->parent()->children[1] == node};
+ site.parent->children[site.is_right] = nullptr;
+ return site;
+ }
+
+ // Case 1. node has only one child
+ if (has_left != has_right) {
+ if (!node->parent()) {
+ root = node->children[has_right];
+ root->set_parent(nullptr);
+ return {nullptr, false};
+ }
+ FixupSite site = {node->parent(), node->parent()->children[1] == node};
+ site.parent->children[site.is_right] = node->children[has_right];
+ node->children[has_right]->set_parent(site.parent);
+ return site;
+ }
+
+ // Case 2. node has two children
+ // We cannot swap field as tree node must be valid outside.
+ WeakAVLNode *replacement = node->children[1];
+ while (replacement->children[0] != nullptr)
+ replacement = replacement->children[0];
+ bool replacement_was_right =
+ (replacement->parent()->children[1] == replacement);
+ WeakAVLNode *replacement_parent = replacement->parent();
+ WeakAVLNode *replacement_rchild = replacement->children[1];
+ replacement->set_parent(node->parent());
+ replacement->set_flags(node->flags());
+ replacement->children[0] = node->children[0];
+ replacement->children[1] = node->children[1] == replacement
+ ? replacement_rchild
+ : node->children[1];
+ if (replacement->children[0])
+ replacement->children[0]->set_parent(replacement);
+ if (replacement->children[1])
+ replacement->children[1]->set_parent(replacement);
+ if (replacement->parent()) {
+ bool node_was_right = node->parent()->children[1] == node;
+ replacement->parent()->children[node_was_right] = replacement;
+ } else
+ root = replacement;
+ FixupSite site = {replacement_parent == node ? replacement
+ : replacement_parent,
+ replacement_was_right};
+ if (replacement_rchild)
+ replacement_rchild->set_parent(site.parent);
+ if (site.parent)
+ site.parent->children[site.is_right] = replacement_rchild;
+ return site;
+ }
+
public:
LIBC_INLINE const WeakAVLNode *get_left() const { return children[0]; }
LIBC_INLINE const WeakAVLNode *get_right() const { return children[1]; }
@@ -118,6 +192,22 @@ template <typename T> class WeakAVLNode {
node->set_parent(pivot);
return pivot;
}
+
+ // Find data in the subtree rooted at root. If not found, returns nullptr.
+ // `Compare` returns integer values for ternary comparison.
+ template <typename Compare>
+ LIBC_INLINE static WeakAVLNode *find(WeakAVLNode *root, T data,
+ Compare &&comp) {
+ WeakAVLNode *cursor = root;
+ while (cursor != nullptr) {
+ int comp_result = comp(cursor->data, data);
+ if (comp_result == 0)
+ return cursor; // Node found
+ bool is_right = comp_result < 0;
+ cursor = cursor->children[is_right];
+ }
+ return nullptr; // Node not found
+ }
// Insert data into the subtree rooted at root.
// Returns the node if insertion is successful or the node exists in
// the tree.
@@ -184,7 +274,7 @@ template <typename T> class WeakAVLNode {
}
LIBC_ASSERT((node->flags() != 0b11) &&
- "there should be no 2-2 nodes in a weak AVL tree");
+ "there should be no 2-2 node along the insertion fixup path");
LIBC_ASSERT((node == allocated || node->flags() != 0) &&
"Internal node must have a child with rank-difference 2, "
@@ -246,6 +336,139 @@ template <typename T> class WeakAVLNode {
}
return allocated;
}
+
+ // Erase the node from the tree rooted at root.
+ LIBC_INLINE static void erase(WeakAVLNode *&root, WeakAVLNode *node) {
+ // Unlink the node from the tree
+ auto [cursor, is_right] = unlink(root, node);
+ ::delete node;
+ while (cursor) {
+ // Case 0. cursor previously had rank-difference 1 on the side of the
+ // deleted node. We can simply update the rank-difference and stop.
+ // Notice that this step may create 2-2 nodes, thus deviate from "strong"
+ // AVL tree.
+ //
+ // (C) (C)
+ // X / \ 1 => X / \
+ // (*) (D) (*) \ 2
+ // (D)
+ if (!cursor->has_rank_diff_2(is_right)) {
+ cursor->toggle_rank_diff_2(is_right);
+ break;
+ }
+
+ // Case 1. cursor previously had rank-difference 2 on the side of the
+ // deleted node. Now it has rank-difference 3, which violates the
+ // weak-AVL property. We found that we have a sibling with rank-difference
+ // 2, so we can demote cursor and continue upwards.
+ //
+ // (P) (P)
+ // | X | (X + 1)
+ // (C) |
+ // / \ => (C)
+ // 2 / \ 1 / \
+ // (*) \ 3 (*) \ 2
+ // (D) (D)
+ if (cursor->has_rank_diff_2(!is_right)) {
+ cursor->toggle_rank_diff_2(!is_right);
+ if (cursor->parent())
+ is_right = (cursor->parent()->children[1] == cursor);
+ cursor = cursor->parent();
+ continue;
+ }
+
+ // Case 2. continue from Case 1; but the sibling has rank-difference 1.
+ // However, we found that the sibling is a 2-2 node. We demote both
+ // sibling and cursor, and continue upwards.
+ //
+ // (P) (P)
+ // | X | (X + 1)
+ // (C) |
+ // 1 / \ => (C)
+ // (S) \ 1 / \
+ // / \ \ 3 (S) \ 2
+ // 2 / \ 2 (D) 1 / \ 1 (D)
+ // (*) (*) (*) (*)
+ WeakAVLNode *sibling = cursor->children[!is_right];
+ LIBC_ASSERT(sibling && "rank-difference 1 sibling cannot be empty");
+ if (sibling->flags() == 0b11) {
+ sibling->clear_flags();
+ if (cursor->parent())
+ is_right = (cursor->parent()->children[1] == cursor);
+ cursor = cursor->parent();
+ continue;
+ }
+
+ // Case 3. continue from Case 2; but the sibling cannot be demoted.
+ // Sibling has a node T along the same direction with rank-difference 1.
+ //
+ // (P) (P)
+ // | X | X
+ // (C) (S)
+ // 1 / \ Rotate 2 / \ 1
+ // (S) \ => / (C)
+ // 1 / \ Y \ 3 (T) Y / \ 2
+ // (T) \ (D) (*) \
+ // (*) (D)
+ bool sibling_is_right = !is_right;
+ if (!sibling->has_rank_diff_2(sibling_is_right)) {
+ WeakAVLNode *new_subroot = rotate(root, cursor, sibling_is_right);
+ LIBC_ASSERT(new_subroot == sibling &&
+ "sibling should become the subtree root");
+ // Update flags
+ bool sibling_alter_child_has_rank_diff_2 =
+ new_subroot->has_rank_diff_2(!sibling_is_right);
+ new_subroot->clear_flags();
+ new_subroot->toggle_rank_diff_2(sibling_is_right);
+
+ // Cursor only needs to be updated if it become a 2-2 node
+ if (sibling_alter_child_has_rank_diff_2) {
+ // Demote a 2-2 cursor if it is also external
+ if (cursor->is_external()) {
+ cursor->clear_flags();
+ new_subroot->toggle_rank_diff_2(!sibling_is_right);
+ LIBC_ASSERT(new_subroot->flags() == 0b00 &&
+ "sibling should become a 1-1 node.");
+ } else {
+ cursor->toggle_rank_diff_2(sibling_is_right);
+ LIBC_ASSERT(cursor->flags() == 0b11 &&
+ "cursor should become a 2-2 node.");
+ }
+ }
+ break;
+ }
+ // Case 4. continue from Case 3; but rank-difference 1 child T of sibling
+ // is on the opposite direction.
+ //
+ // (P) (P)
+ // | X | X
+ // (C) Zig-Zag (T)
+ // 1 / \ => / \
+ // (S) \ 2 / \ 2
+ // / \ 1 \ 3 (S) (C)
+ // 2 / (T) (D) 1 / Y \ / Z \ 1
+ // (*) Y / \ Z (*) (A)(B) (D)
+ // (A) (B)
+ WeakAVLNode *target_child = rotate(root, sibling, !sibling_is_right);
+ uintptr_t flags = target_child->flags();
+ WeakAVLNode *new_subroot = rotate(root, cursor, sibling_is_right);
+ LIBC_ASSERT(new_subroot == target_child &&
+ "target_child should become the subtree root");
+ // Set flags
+ target_child->set_flags(0b11);
+ cursor->clear_flags();
+ sibling->clear_flags();
+ // Select destinations
+ WeakAVLNode *dst_left = sibling_is_right ? cursor : sibling;
+ WeakAVLNode *dst_right = sibling_is_right ? sibling : cursor;
+ // Masked toggles
+ if (flags & LEFT_FLAG_BIT)
+ dst_left->toggle_rank_diff_2(true);
+ if (flags & RIGHT_FLAG_BIT)
+ dst_right->toggle_rank_diff_2(false);
+ break;
+ }
+ }
};
} // namespace LIBC_NAMESPACE_DECL
>From 912a18f3efaca15641e4bddce17c878ea20cde77 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Tue, 16 Dec 2025 05:31:20 -0500
Subject: [PATCH 08/13] update
---
libc/src/__support/weak_avl.h | 11 +-
libc/test/src/__support/weak_avl_test.cpp | 152 +++++++++++++++++++---
2 files changed, 137 insertions(+), 26 deletions(-)
diff --git a/libc/src/__support/weak_avl.h b/libc/src/__support/weak_avl.h
index 0b46f9179ab12..1e6b6c4d25fe4 100644
--- a/libc/src/__support/weak_avl.h
+++ b/libc/src/__support/weak_avl.h
@@ -21,7 +21,6 @@
#include "src/__support/libc_assert.h"
#include "src/__support/macros/attributes.h"
#include "src/__support/macros/config.h"
-#include <cstdint>
namespace LIBC_NAMESPACE_DECL {
@@ -59,10 +58,10 @@ template <typename T> class WeakAVLNode {
}
LIBC_INLINE void clear_flags() { parent_and_flags &= ~FLAGS_MASK; }
LIBC_INLINE void set_flags(uintptr_t flags) {
- parent_and_flags |= (FLAGS_MASK & flags);
+ parent_and_flags = (parent_and_flags & ~FLAGS_MASK) | (flags & FLAGS_MASK);
}
- LIBC_INLINE void is_external() {
- return (children[0] == nullptr) && (children[1] == nullptr);
+ LIBC_INLINE bool is_external() {
+ return (children[0] == nullptr) || (children[1] == nullptr);
}
LIBC_INLINE WeakAVLNode(T data)
: data(cpp::move(data)), parent_and_flags(0), children{nullptr, nullptr} {
@@ -427,8 +426,8 @@ template <typename T> class WeakAVLNode {
if (cursor->is_external()) {
cursor->clear_flags();
new_subroot->toggle_rank_diff_2(!sibling_is_right);
- LIBC_ASSERT(new_subroot->flags() == 0b00 &&
- "sibling should become a 1-1 node.");
+ LIBC_ASSERT(new_subroot->flags() == 0b11 &&
+ "sibling should become a 2-2 node.");
} else {
cursor->toggle_rank_diff_2(sibling_is_right);
LIBC_ASSERT(cursor->flags() == 0b11 &&
diff --git a/libc/test/src/__support/weak_avl_test.cpp b/libc/test/src/__support/weak_avl_test.cpp
index daddc047ab9f0..18384ed07265a 100644
--- a/libc/test/src/__support/weak_avl_test.cpp
+++ b/libc/test/src/__support/weak_avl_test.cpp
@@ -13,15 +13,19 @@ using Node = LIBC_NAMESPACE::WeakAVLNode<int>;
namespace {
-// Validate weak-AVL rank-difference invariant:
-// no node may have rank-difference 2 on both sides.
-bool validate(const Node *node) {
+// Validate weak-AVL rank-difference invariant assuming **pure insertion only**
+// (i.e. no erasure has occurred).
+//
+// NOTE: This validator is intentionally *not* correct after erase(), because
+// weak-AVL allows transient or permanent 2-2 configurations during deletion
+// fixup.
+bool validate_pure_insertion(const Node *node) {
if (!node)
return true;
bool left_2 = node->is_rank_diff_2(false);
bool right_2 = node->is_rank_diff_2(true);
- return (!left_2 || !right_2) && validate(node->get_left()) &&
- validate(node->get_right());
+ return (!left_2 || !right_2) && validate_pure_insertion(node->get_left()) &&
+ validate_pure_insertion(node->get_right());
}
// Insert according to pattern `next(i)`
@@ -35,7 +39,6 @@ static Node *build_tree(NextFn next, int N, int (*compare)(int, int)) {
}
// Insertion patterns
-
static int seq(int i) { return i; }
static int rev(int i) {
@@ -44,12 +47,19 @@ static int rev(int i) {
}
// Coprime stride permutation: i -> (i * X) % N
-static int stride(int i) {
+static int stride(int i, int prime = 7919) {
constexpr int N = 1000;
- constexpr int X = 7919; // gcd(X, N) = 1
- return (i * X) % N;
+ return (i * prime) % N;
}
+// Thin wrappers to make test intent explicit.
+template <typename Compare>
+static Node *find(Node *root, int value, Compare &&comp) {
+ return Node::find(root, value, comp);
+}
+
+static void erase(Node *&root, Node *node) { Node::erase(root, node); }
+
} // namespace
TEST(LlvmLibcWeakAVLTest, SimpleInsertion) {
@@ -59,19 +69,19 @@ TEST(LlvmLibcWeakAVLTest, SimpleInsertion) {
Node *node10 = Node::find_or_insert(root, 10, compare);
ASSERT_TRUE(node10 != nullptr);
ASSERT_EQ(root, node10);
- ASSERT_TRUE(validate(root));
+ ASSERT_TRUE(validate_pure_insertion(root));
Node *node5 = Node::find_or_insert(root, 5, compare);
ASSERT_TRUE(node5 != nullptr);
- ASSERT_TRUE(validate(root));
+ ASSERT_TRUE(validate_pure_insertion(root));
Node *node15 = Node::find_or_insert(root, 15, compare);
ASSERT_TRUE(node15 != nullptr);
- ASSERT_TRUE(validate(root));
+ ASSERT_TRUE(validate_pure_insertion(root));
Node *node10_again = Node::find_or_insert(root, 10, compare);
ASSERT_EQ(node10, node10_again);
- ASSERT_TRUE(validate(root));
+ ASSERT_TRUE(validate_pure_insertion(root));
Node::destroy(root);
}
@@ -81,7 +91,7 @@ TEST(LlvmLibcWeakAVLTest, SequentialInsertion) {
constexpr int N = 1000;
Node *root = build_tree(seq, N, compare);
- ASSERT_TRUE(validate(root));
+ ASSERT_TRUE(validate_pure_insertion(root));
for (int i = 0; i < N; ++i) {
Node *node = Node::find_or_insert(root, i, compare);
@@ -89,7 +99,7 @@ TEST(LlvmLibcWeakAVLTest, SequentialInsertion) {
ASSERT_EQ(node->get_data(), i);
}
- ASSERT_TRUE(validate(root));
+ ASSERT_TRUE(validate_pure_insertion(root));
Node::destroy(root);
}
@@ -98,7 +108,7 @@ TEST(LlvmLibcWeakAVLTest, ReversedInsertion) {
constexpr int N = 1000;
Node *root = build_tree(rev, N, compare);
- ASSERT_TRUE(validate(root));
+ ASSERT_TRUE(validate_pure_insertion(root));
for (int i = 0; i < N; ++i) {
Node *node = Node::find_or_insert(root, i, compare);
@@ -106,7 +116,7 @@ TEST(LlvmLibcWeakAVLTest, ReversedInsertion) {
ASSERT_EQ(node->get_data(), i);
}
- ASSERT_TRUE(validate(root));
+ ASSERT_TRUE(validate_pure_insertion(root));
Node::destroy(root);
}
@@ -114,8 +124,8 @@ TEST(LlvmLibcWeakAVLTest, StridedInsertion) {
auto compare = [](int a, int b) { return a - b; };
constexpr int N = 1000;
- Node *root = build_tree(stride, N, compare);
- ASSERT_TRUE(validate(root));
+ Node *root = build_tree([](int i) { return stride(i); }, N, compare);
+ ASSERT_TRUE(validate_pure_insertion(root));
for (int i = 0; i < N; ++i) {
Node *node = Node::find_or_insert(root, i, compare);
@@ -123,6 +133,108 @@ TEST(LlvmLibcWeakAVLTest, StridedInsertion) {
ASSERT_EQ(node->get_data(), i);
}
- ASSERT_TRUE(validate(root));
+ ASSERT_TRUE(validate_pure_insertion(root));
+ Node::destroy(root);
+}
+
+TEST(LlvmLibcWeakAVLTest, FindExistingAndMissing) {
+ auto compare = [](int a, int b) { return a - b; };
+ constexpr int N = 1000;
+
+ Node *root = build_tree(seq, N, compare);
+ ASSERT_TRUE(validate_pure_insertion(root));
+
+ for (int i = 0; i < N; ++i) {
+ Node *node = find(root, i, compare);
+ ASSERT_TRUE(node != nullptr);
+ ASSERT_EQ(node->get_data(), i);
+ }
+
+ ASSERT_TRUE(find(root, -1, compare) == nullptr);
+ ASSERT_TRUE(find(root, N, compare) == nullptr);
+ ASSERT_TRUE(find(root, 2 * N, compare) == nullptr);
+
+ Node::destroy(root);
+}
+
+TEST(LlvmLibcWeakAVLTest, SequentialErase) {
+ auto compare = [](int a, int b) { return a - b; };
+ constexpr int N = 1000;
+
+ Node *root = build_tree(seq, N, compare);
+
+ for (int i = 0; i < N; ++i) {
+ Node *node = find(root, i, compare);
+ ASSERT_TRUE(node != nullptr);
+
+ erase(root, node);
+ ASSERT_TRUE(find(root, i, compare) == nullptr);
+ }
+
+ ASSERT_TRUE(root == nullptr);
+}
+
+TEST(LlvmLibcWeakAVLTest, ReverseErase) {
+ auto compare = [](int a, int b) { return a - b; };
+ constexpr int N = 1000;
+
+ Node *root = build_tree(seq, N, compare);
+
+ for (int i = N - 1; i >= 0; --i) {
+ Node *node = find(root, i, compare);
+ ASSERT_TRUE(node != nullptr);
+
+ erase(root, node);
+ ASSERT_TRUE(find(root, i, compare) == nullptr);
+ }
+
+ ASSERT_TRUE(root == nullptr);
+}
+
+TEST(LlvmLibcWeakAVLTest, StridedErase) {
+ auto compare = [](int a, int b) { return a - b; };
+ constexpr int N = 1000;
+
+ Node *root = build_tree(seq, N, compare);
+
+ for (int i = 0; i < N; ++i) {
+ int key = stride(i, 5261);
+ Node *node = find(root, key, compare);
+ ASSERT_TRUE(node != nullptr);
+
+ erase(root, node);
+ ASSERT_TRUE(find(root, key, compare) == nullptr);
+ }
+
+ ASSERT_TRUE(root == nullptr);
+}
+
+TEST(LlvmLibcWeakAVLTest, EraseStructuralCases) {
+ auto compare = [](int a, int b) { return a - b; };
+
+ Node *root = nullptr;
+ int keys[] = {10, 5, 15, 3, 7, 12, 18};
+
+ for (int k : keys)
+ Node::find_or_insert(root, k, compare);
+
+ // Erase leaf.
+ erase(root, find(root, 3, compare));
+ ASSERT_TRUE(find(root, 3, compare) == nullptr);
+
+ // Erase internal nodes.
+ erase(root, find(root, 5, compare));
+ ASSERT_TRUE(find(root, 5, compare) == nullptr);
+
+ erase(root, find(root, 10, compare));
+ ASSERT_TRUE(find(root, 10, compare) == nullptr);
+
+ int attempts[] = {7, 12, 15, 18};
+ for (int k : attempts) {
+ Node *n = find(root, k, compare);
+ ASSERT_TRUE(n != nullptr);
+ ASSERT_EQ(n->get_data(), k);
+ }
+
Node::destroy(root);
}
>From 0e6d925313e9db6642cfa18218d4f601e026c811 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Tue, 16 Dec 2025 05:47:34 -0500
Subject: [PATCH 09/13] update
---
libc/src/__support/weak_avl.h | 90 ++++++++++++++++++++---------------
1 file changed, 51 insertions(+), 39 deletions(-)
diff --git a/libc/src/__support/weak_avl.h b/libc/src/__support/weak_avl.h
index 1e6b6c4d25fe4..b8c4c9811f4c9 100644
--- a/libc/src/__support/weak_avl.h
+++ b/libc/src/__support/weak_avl.h
@@ -87,10 +87,10 @@ template <typename T> class WeakAVLNode {
bool has_left = node->children[0] != nullptr;
bool has_right = node->children[1] != nullptr;
- // Case 0. node has no children
+ // Case 0: no children
if (!has_left && !has_right) {
- if (node->parent() == nullptr) {
- root = nullptr; // Tree becomes empty
+ if (!node->parent()) {
+ root = nullptr;
return {nullptr, false};
}
FixupSite site = {node->parent(), node->parent()->children[1] == node};
@@ -98,50 +98,62 @@ template <typename T> class WeakAVLNode {
return site;
}
- // Case 1. node has only one child
+ // Case 1: one child
if (has_left != has_right) {
+ WeakAVLNode *child = node->children[has_right];
if (!node->parent()) {
- root = node->children[has_right];
- root->set_parent(nullptr);
+ root = child;
+ child->set_parent(nullptr);
return {nullptr, false};
}
FixupSite site = {node->parent(), node->parent()->children[1] == node};
- site.parent->children[site.is_right] = node->children[has_right];
- node->children[has_right]->set_parent(site.parent);
+ site.parent->children[site.is_right] = child;
+ child->set_parent(site.parent);
return site;
}
- // Case 2. node has two children
- // We cannot swap field as tree node must be valid outside.
- WeakAVLNode *replacement = node->children[1];
- while (replacement->children[0] != nullptr)
- replacement = replacement->children[0];
- bool replacement_was_right =
- (replacement->parent()->children[1] == replacement);
- WeakAVLNode *replacement_parent = replacement->parent();
- WeakAVLNode *replacement_rchild = replacement->children[1];
- replacement->set_parent(node->parent());
- replacement->set_flags(node->flags());
- replacement->children[0] = node->children[0];
- replacement->children[1] = node->children[1] == replacement
- ? replacement_rchild
- : node->children[1];
- if (replacement->children[0])
- replacement->children[0]->set_parent(replacement);
- if (replacement->children[1])
- replacement->children[1]->set_parent(replacement);
- if (replacement->parent()) {
- bool node_was_right = node->parent()->children[1] == node;
- replacement->parent()->children[node_was_right] = replacement;
- } else
- root = replacement;
- FixupSite site = {replacement_parent == node ? replacement
- : replacement_parent,
- replacement_was_right};
- if (replacement_rchild)
- replacement_rchild->set_parent(site.parent);
- if (site.parent)
- site.parent->children[site.is_right] = replacement_rchild;
+ // Case 2: two children: replace by successor (leftmost in right subtree)
+ WeakAVLNode *succ = node->children[1];
+ while (succ->children[0])
+ succ = succ->children[0];
+
+ WeakAVLNode *succ_parent = succ->parent();
+ bool succ_was_right =
+ succ_parent->children[1] == succ; // true only if succ_parent==node
+ WeakAVLNode *succ_rchild = succ->children[1];
+
+ // 1) Splice successor out of its old position (flags intentionally
+ // unchanged)
+ FixupSite site = {succ_parent, succ_was_right};
+ succ_parent->children[succ_was_right] = succ_rchild;
+ if (succ_rchild)
+ succ_rchild->set_parent(succ_parent);
+
+ // 2) Transplant successor into node's position
+ succ->set_parent(node->parent());
+ succ->set_flags(
+ node->flags()); // key-swap emulation: successor takes node's rank/flags
+
+ succ->children[0] = node->children[0];
+ succ->children[1] = node->children[1];
+ if (succ->children[0])
+ succ->children[0]->set_parent(succ);
+ if (succ->children[1])
+ succ->children[1]->set_parent(succ);
+
+ if (succ->parent()) {
+ bool node_was_right = succ->parent()->children[1] == node;
+ succ->parent()->children[node_was_right] = succ;
+ } else {
+ root = succ;
+ }
+
+ // 3) If the physical removal was under `node`, fixup parent must be the
+ // successor (since `node` is deleted and successor now occupies that
+ // spot).
+ if (site.parent == node)
+ site.parent = succ;
+
return site;
}
>From a4b2e83664a8f6856f62050dd1e22cdc6c15fded Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Tue, 16 Dec 2025 06:42:51 -0500
Subject: [PATCH 10/13] update
---
libc/src/__support/weak_avl.h | 16 ++++++++++++----
1 file changed, 12 insertions(+), 4 deletions(-)
diff --git a/libc/src/__support/weak_avl.h b/libc/src/__support/weak_avl.h
index b8c4c9811f4c9..f3a3adf67ce75 100644
--- a/libc/src/__support/weak_avl.h
+++ b/libc/src/__support/weak_avl.h
@@ -60,8 +60,8 @@ template <typename T> class WeakAVLNode {
LIBC_INLINE void set_flags(uintptr_t flags) {
parent_and_flags = (parent_and_flags & ~FLAGS_MASK) | (flags & FLAGS_MASK);
}
- LIBC_INLINE bool is_external() {
- return (children[0] == nullptr) || (children[1] == nullptr);
+ LIBC_INLINE bool is_leaf() {
+ return (children[0] == nullptr) && (children[1] == nullptr);
}
LIBC_INLINE WeakAVLNode(T data)
: data(cpp::move(data)), parent_and_flags(0), children{nullptr, nullptr} {
@@ -365,6 +365,14 @@ template <typename T> class WeakAVLNode {
// (D)
if (!cursor->has_rank_diff_2(is_right)) {
cursor->toggle_rank_diff_2(is_right);
+ // If we created a 2-2 leaf, we must demote it and continue.
+ if (cursor->flags() == 0b11 && cursor->is_leaf()) {
+ cursor->clear_flags();
+ if (cursor->parent())
+ is_right = (cursor->parent()->children[1] == cursor);
+ cursor = cursor->parent();
+ continue;
+ }
break;
}
@@ -434,8 +442,8 @@ template <typename T> class WeakAVLNode {
// Cursor only needs to be updated if it become a 2-2 node
if (sibling_alter_child_has_rank_diff_2) {
- // Demote a 2-2 cursor if it is also external
- if (cursor->is_external()) {
+ // Demote a 2-2 cursor if it is a leaf
+ if (cursor->is_leaf()) {
cursor->clear_flags();
new_subroot->toggle_rank_diff_2(!sibling_is_right);
LIBC_ASSERT(new_subroot->flags() == 0b11 &&
>From 7ca40c642b268350ab0cf656d1300d71a7167298 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Tue, 16 Dec 2025 07:17:59 -0500
Subject: [PATCH 11/13] add fuzzing
---
libc/fuzzing/__support/CMakeLists.txt | 8 ++
libc/fuzzing/__support/weak_avl_fuzz.cpp | 94 ++++++++++++++++++++++++
2 files changed, 102 insertions(+)
create mode 100644 libc/fuzzing/__support/weak_avl_fuzz.cpp
diff --git a/libc/fuzzing/__support/CMakeLists.txt b/libc/fuzzing/__support/CMakeLists.txt
index 9c674d2fb0d65..f9b49cc1b0a4b 100644
--- a/libc/fuzzing/__support/CMakeLists.txt
+++ b/libc/fuzzing/__support/CMakeLists.txt
@@ -25,6 +25,14 @@ add_libc_fuzzer(
-D__LIBC_EXPLICIT_SIMD_OPT
)
+add_libc_fuzzer(
+ weak_avl_fuzz
+ SRCS
+ weak_avl_fuzz.cpp
+ DEPENDS
+ libc.src.__support.weak_avl
+)
+
# TODO: FreeListHeap uses the _end symbol which conflicts with the _end symbol
# defined by GPU start.cpp files so for now we exclude this fuzzer on GPU.
if(LLVM_LIBC_FULL_BUILD AND NOT LIBC_TARGET_OS_IS_GPU)
diff --git a/libc/fuzzing/__support/weak_avl_fuzz.cpp b/libc/fuzzing/__support/weak_avl_fuzz.cpp
new file mode 100644
index 0000000000000..7c6e50d1fa252
--- /dev/null
+++ b/libc/fuzzing/__support/weak_avl_fuzz.cpp
@@ -0,0 +1,94 @@
+//===-- weak_avl_fuzz.cpp -------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// Fuzzing test for llvm-libc weak AVL implementations.
+///
+//===----------------------------------------------------------------------===//
+#include "hdr/types/ENTRY.h"
+#include "src/__support/CPP/bit.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/weak_avl.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+// A sequence of actions:
+// - Erase: a single byte valued (5, 6 mod 7) followed by an int
+// - Find: a single byte valued (4 mod 7) followed by an int
+// - FindOrInsert: a single byte valued (0,1,2,3 mod 7) followed by an int
+extern "C" size_t LLVMFuzzerMutate(uint8_t *data, size_t size, size_t max_size);
+extern "C" size_t LLVMFuzzerCustomMutator(uint8_t *data, size_t size,
+ size_t max_size, unsigned int seed) {
+ size = LLVMFuzzerMutate(data, size, max_size);
+ return size / (1 + sizeof(int)) * (1 + sizeof(int));
+}
+
+class AVLTree {
+ using Node = WeakAVLNode<int>;
+ Node *root = nullptr;
+ bool reversed = false;
+ static int compare(int a, int b) { return (a > b) - (a < b); }
+ static int reverse_compare(int a, int b) { return (b > a) - (b < a); }
+
+public:
+ AVLTree(bool reversed = false) : reversed(reversed) {}
+ bool find(int key) {
+ return Node::find(root, key, reversed ? reverse_compare : compare);
+ }
+ bool find_or_insert(int key) {
+ return Node::find_or_insert(root, key,
+ reversed ? reverse_compare : compare);
+ }
+ bool erase(int key) {
+ Node *node = Node::find(root, key, reversed ? reverse_compare : compare);
+ if (node)
+ Node::erase(root, node);
+ return node;
+ }
+ ~AVLTree() { Node::destroy(root); }
+};
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ AVLTree tree1;
+ AVLTree tree2(true);
+ for (size_t i = 0; i + (1 + sizeof(int)) <= size; i += 1 + sizeof(int)) {
+ uint8_t action = data[i];
+ int key;
+ __builtin_memcpy(&key, data + i + 1, sizeof(int));
+ if (action % 7 == 4) {
+ // Find
+ bool res1 = tree1.find(key);
+ bool res2 = tree2.find(key);
+ if (res1 != res2)
+ __builtin_trap();
+
+ } else if (action % 7 == 5 || action % 7 == 6) {
+ // Erase
+ bool res1 = tree1.erase(key);
+ bool res2 = tree2.erase(key);
+ if (res1 != res2)
+ __builtin_trap();
+ if (tree1.find(key))
+ __builtin_trap();
+ if (tree2.find(key))
+ __builtin_trap();
+ } else {
+ // FindOrInsert
+ bool res1 = tree1.find_or_insert(key);
+ bool res2 = tree2.find_or_insert(key);
+ if (res1 != res2)
+ __builtin_trap();
+ if (!tree1.find(key))
+ __builtin_trap();
+ if (!tree2.find(key))
+ __builtin_trap();
+ }
+ }
+ return 0;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
>From 5c190538cd412776970648873e12f0fec9e7d6f4 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Tue, 16 Dec 2025 07:42:06 -0500
Subject: [PATCH 12/13] reduce size for hermetic test
---
libc/test/src/__support/weak_avl_test.cpp | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/libc/test/src/__support/weak_avl_test.cpp b/libc/test/src/__support/weak_avl_test.cpp
index 18384ed07265a..93fc6498d1f8b 100644
--- a/libc/test/src/__support/weak_avl_test.cpp
+++ b/libc/test/src/__support/weak_avl_test.cpp
@@ -42,13 +42,13 @@ static Node *build_tree(NextFn next, int N, int (*compare)(int, int)) {
static int seq(int i) { return i; }
static int rev(int i) {
- constexpr int N = 1000;
+ constexpr int N = 256;
return N - 1 - i;
}
// Coprime stride permutation: i -> (i * X) % N
static int stride(int i, int prime = 7919) {
- constexpr int N = 1000;
+ constexpr int N = 256;
return (i * prime) % N;
}
@@ -88,7 +88,7 @@ TEST(LlvmLibcWeakAVLTest, SimpleInsertion) {
TEST(LlvmLibcWeakAVLTest, SequentialInsertion) {
auto compare = [](int a, int b) { return a - b; };
- constexpr int N = 1000;
+ constexpr int N = 256;
Node *root = build_tree(seq, N, compare);
ASSERT_TRUE(validate_pure_insertion(root));
@@ -105,7 +105,7 @@ TEST(LlvmLibcWeakAVLTest, SequentialInsertion) {
TEST(LlvmLibcWeakAVLTest, ReversedInsertion) {
auto compare = [](int a, int b) { return a - b; };
- constexpr int N = 1000;
+ constexpr int N = 256;
Node *root = build_tree(rev, N, compare);
ASSERT_TRUE(validate_pure_insertion(root));
@@ -122,7 +122,7 @@ TEST(LlvmLibcWeakAVLTest, ReversedInsertion) {
TEST(LlvmLibcWeakAVLTest, StridedInsertion) {
auto compare = [](int a, int b) { return a - b; };
- constexpr int N = 1000;
+ constexpr int N = 256;
Node *root = build_tree([](int i) { return stride(i); }, N, compare);
ASSERT_TRUE(validate_pure_insertion(root));
@@ -139,7 +139,7 @@ TEST(LlvmLibcWeakAVLTest, StridedInsertion) {
TEST(LlvmLibcWeakAVLTest, FindExistingAndMissing) {
auto compare = [](int a, int b) { return a - b; };
- constexpr int N = 1000;
+ constexpr int N = 256;
Node *root = build_tree(seq, N, compare);
ASSERT_TRUE(validate_pure_insertion(root));
@@ -159,7 +159,7 @@ TEST(LlvmLibcWeakAVLTest, FindExistingAndMissing) {
TEST(LlvmLibcWeakAVLTest, SequentialErase) {
auto compare = [](int a, int b) { return a - b; };
- constexpr int N = 1000;
+ constexpr int N = 256;
Node *root = build_tree(seq, N, compare);
@@ -176,7 +176,7 @@ TEST(LlvmLibcWeakAVLTest, SequentialErase) {
TEST(LlvmLibcWeakAVLTest, ReverseErase) {
auto compare = [](int a, int b) { return a - b; };
- constexpr int N = 1000;
+ constexpr int N = 256;
Node *root = build_tree(seq, N, compare);
@@ -193,7 +193,7 @@ TEST(LlvmLibcWeakAVLTest, ReverseErase) {
TEST(LlvmLibcWeakAVLTest, StridedErase) {
auto compare = [](int a, int b) { return a - b; };
- constexpr int N = 1000;
+ constexpr int N = 256;
Node *root = build_tree(seq, N, compare);
>From 38ad8621f70a728ddf302a2b596c88a3759227a6 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Tue, 16 Dec 2025 07:48:55 -0500
Subject: [PATCH 13/13] improve doc
---
libc/src/__support/weak_avl.h | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/libc/src/__support/weak_avl.h b/libc/src/__support/weak_avl.h
index f3a3adf67ce75..c0680be9bbbf6 100644
--- a/libc/src/__support/weak_avl.h
+++ b/libc/src/__support/weak_avl.h
@@ -79,6 +79,9 @@ template <typename T> class WeakAVLNode {
// not deleted and its pointers are not cleared.
// FixupSite is the lowest surviving node from which rank/flag invariants may
// be violated.
+ // Our tree requires value to stay in their node to maintain stable addresses.
+ // This complicates the unlink operation as the successor transplanting needs
+ // to updates all the pointers and flags.
struct FixupSite {
WeakAVLNode *parent;
bool is_right;
@@ -144,9 +147,8 @@ template <typename T> class WeakAVLNode {
if (succ->parent()) {
bool node_was_right = succ->parent()->children[1] == node;
succ->parent()->children[node_was_right] = succ;
- } else {
+ } else
root = succ;
- }
// 3) If the physical removal was under `node`, fixup parent must be the
// successor (since `node` is deleted and successor now occupies that
More information about the libc-commits
mailing list