[libc-commits] [libc] [libc][tsearch] add weak AVL tree for tsearch implementation (PR #172411)

Michael Jones via libc-commits libc-commits at lists.llvm.org
Wed Dec 17 11:02:38 PST 2025


================
@@ -0,0 +1,574 @@
+//===-- Implementation header for weak AVL tree -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_WEAK_AVL_H
+#define LLVM_LIBC_SRC___SUPPORT_WEAK_AVL_H
+
+#include "hdr/stdint_proxy.h"
+#include "src/__support/CPP/bit.h"
+#include "src/__support/CPP/new.h"
+#include "src/__support/CPP/utility/move.h"
+#include "src/__support/libc_assert.h"
+#include "src/__support/macros/attributes.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+// A general self-balancing binary search tree where the node pointer can
+// be used as stable handles to the stored values.
+//
+// The self-balancing strategy is the Weak AVL (WAVL) tree, based on the
+// following foundational references:
+// 1. https://maskray.me/blog/2025-12-14-weak-avl-tree
+// 2. https://reviews.freebsd.org/D25480
+// 3. https://ics.uci.edu/~goodrich/teach/cs165/notes/WeakAVLTrees.pdf
+// 4. https://dl.acm.org/doi/10.1145/2689412 (Rank-Balanced Trees)
+//
+// WAVL trees belong to the rank-balanced binary search tree framework
+// (reference 4), alongside AVL and Red-Black trees.
+//
+// Key Properties of WAVL Trees:
+// 1. Relationship to Red-Black Trees: A WAVL tree can always be colored as a
+//    Red-Black tree.
+// 2. Relationship to AVL Trees: An AVL tree meets all the requirements of a
+//    WAVL tree. Insertion-only WAVL trees maintain the same structure as AVL
+//    trees.
+//
+// Rank-Based Balancing:
+// In rank-balanced trees, each node is assigned a rank (conceptually similar
+// to height). The rank difference between a parent and its child is
+// strictly enforced to be either **1** or **2**.
+//
+// - **AVL Trees:** Rank is equivalent to height. The strict condition is that
+//   there are no 2-2 nodes (a parent with rank difference 2 to both children).
+// - **WAVL Trees:** The no 2-2 node rule is relaxed for internal nodes during
+//   the deletion fixup process, making WAVL trees less strictly balanced than
+//   AVL trees but easier to maintain than Red-Black trees.
+//
+// Balancing Mechanics (Promotion/Demotion):
+// - **Null nodes** are considered to have rank -1.
+// - **External/leaf nodes** have rank 0.
+// - **Insertion:** Inserting a node may create a situation where a parent and
+// child
+//   have the same rank (difference 0). This is fixed by **promoting** the rank
+//   of the parent and propagating the fix upwards using at most two rotations
+//   (trinode fixup).
+// - **Deletion:** Deleting a node may result in a parent being 3 ranks higher
+//   than a child (difference 3). This is fixed by **demoting** the parent's
+//   rank and propagating the fix upwards.
+//
+// Implementation Detail:
+// The rank is **implicitly** maintained. We never store the full rank. Instead,
+// a 2-bit tag is used on each node to record the rank difference to each child:
+// - Bit cleared (0) -> Rank difference is **1**.
+// - Bit set (1)     -> Rank difference is **2**.
+template <typename T> class WeakAVLNode {
+  // Data
+  T data;
+
+  // Parent pointer
+  WeakAVLNode *parent;
+
+  // Children pointers
+  WeakAVLNode *children[2];
+
+  // Flags
+  unsigned char left_rank_diff_2 : 1;
+  unsigned char right_rank_diff_2 : 1;
+
+  LIBC_INLINE bool is_leaf() {
+    return (children[0] == nullptr) && (children[1] == nullptr);
+  }
+
+  LIBC_INLINE void toggle_rank_diff_2(bool is_right) {
+    if (is_right)
+      right_rank_diff_2 ^= 1;
+    else
+      left_rank_diff_2 ^= 1;
+  }
+
+  LIBC_INLINE bool both_flags_set() const {
+    return left_rank_diff_2 && right_rank_diff_2;
+  }
+
+  LIBC_INLINE bool any_flag_set() const {
+    return left_rank_diff_2 || right_rank_diff_2;
+  }
+
+  LIBC_INLINE void clear_flags() {
+    left_rank_diff_2 = 0;
+    right_rank_diff_2 = 0;
+  }
+
+  LIBC_INLINE void set_both_flags() {
+    left_rank_diff_2 = 1;
+    right_rank_diff_2 = 1;
+  }
+
+  LIBC_INLINE WeakAVLNode(T data)
+      : data(cpp::move(data)), parent(nullptr), children{nullptr, nullptr},
+        left_rank_diff_2(0), right_rank_diff_2(0) {}
+
+  LIBC_INLINE static WeakAVLNode *create(T value) {
+    AllocChecker ac;
+    WeakAVLNode *res = ::new (ac) WeakAVLNode(value);
+    if (ac)
+      return res;
+    return nullptr;
+  }
+
+  // Unlink a node from tree. The corresponding flag is not update. The node is
+  // not deleted and its pointers are not cleared.
+  // FixupSite is the lowest surviving node from which rank/flag invariants may
+  // be violated.
+  // Our tree requires value to stay in their node to maintain stable addresses.
+  // This complicates the unlink operation as the successor transplanting needs
+  // to updates all the pointers and flags.
+  struct FixupSite {
+    WeakAVLNode *parent;
+    bool is_right;
+  };
+  LIBC_INLINE static FixupSite unlink(WeakAVLNode *&root, WeakAVLNode *node) {
+    bool has_left = node->children[0] != nullptr;
+    bool has_right = node->children[1] != nullptr;
+
+    // Case 0: no children
+    if (!has_left && !has_right) {
+      if (!node->parent) {
+        root = nullptr;
+        return {nullptr, false};
+      }
+      FixupSite site = {node->parent, node->parent->children[1] == node};
+      site.parent->children[site.is_right] = nullptr;
+      return site;
+    }
+
+    // Case 1: one child
+    if (has_left != has_right) {
+      WeakAVLNode *child = node->children[has_right];
+      if (!node->parent) {
+        root = child;
+        child->parent = nullptr;
+        return {nullptr, false};
+      }
+      FixupSite site = {node->parent, node->parent->children[1] == node};
+      site.parent->children[site.is_right] = child;
+      child->parent = site.parent;
+      return site;
+    }
+
+    // Case 2: two children: replace by successor (leftmost in right subtree)
+    WeakAVLNode *succ = node->children[1];
+    while (succ->children[0])
+      succ = succ->children[0];
+
+    WeakAVLNode *succ_parent = succ->parent;
+    bool succ_was_right =
+        succ_parent->children[1] == succ; // true only if succ_parent==node
+    WeakAVLNode *succ_rchild = succ->children[1];
+
+    // 1) Splice successor out of its old position (flags intentionally
+    // unchanged)
+    FixupSite site = {succ_parent, succ_was_right};
+    succ_parent->children[succ_was_right] = succ_rchild;
+    if (succ_rchild)
+      succ_rchild->parent = succ_parent;
+
+    // 2) Transplant successor into node's position
+    succ->parent = node->parent;
+    succ->left_rank_diff_2 = node->left_rank_diff_2;
+    succ->right_rank_diff_2 = node->right_rank_diff_2;
+
+    succ->children[0] = node->children[0];
+    succ->children[1] = node->children[1];
+    if (succ->children[0])
+      succ->children[0]->parent = succ;
+    if (succ->children[1])
+      succ->children[1]->parent = succ;
+
+    if (succ->parent) {
+      bool node_was_right = succ->parent->children[1] == node;
+      succ->parent->children[node_was_right] = succ;
+    } else
+      root = succ;
+
+    // 3) If the physical removal was under `node`, fixup parent must be the
+    //    successor (since `node` is deleted and successor now occupies that
+    //    spot).
+    if (site.parent == node)
+      site.parent = succ;
+
+    return site;
+  }
+
+public:
+  LIBC_INLINE const WeakAVLNode *get_left() const { return children[0]; }
+  LIBC_INLINE const WeakAVLNode *get_right() const { return children[1]; }
+  LIBC_INLINE const T &get_data() const { return data; }
+  LIBC_INLINE bool has_rank_diff_2(bool is_right) const {
+    return is_right ? right_rank_diff_2 : left_rank_diff_2;
+  }
+
+  // Destroy the subtree rooted at node
+  LIBC_INLINE static void destroy(WeakAVLNode *node) {
+    if (!node)
+      return;
+    destroy(node->children[0]);
+    destroy(node->children[1]);
+    ::delete node;
+  }
+  // Rotate the subtree rooted at node in the given direction.
+  //
+  // Illustration for is_right = true (Left Rotation):
+  //
+  //          (Node)                       (Pivot)
+  //          /    \                       /     \
+  //         A   (Pivot)       =>       (Node)    C
+  //             /     \                /    \
+  //            B       C              A      B
+  //
+  LIBC_INLINE static WeakAVLNode *rotate(WeakAVLNode *&root, WeakAVLNode *node,
+                                         bool is_right) {
+    WeakAVLNode *pivot = node->children[is_right];
+    // Handover pivot's child
+    WeakAVLNode *grandchild = pivot->children[!is_right];
+    node->children[is_right] = grandchild;
+    if (grandchild)
+      grandchild->parent = node;
+    pivot->parent = node->parent;
+    // Pivot becomes the new root of the subtree
+    if (!node->parent)
+      root = pivot;
+    else {
+      bool node_is_right = node->parent->children[1] == node;
+      node->parent->children[node_is_right] = pivot;
+    }
+    pivot->children[!is_right] = node;
+    node->parent = pivot;
+    return pivot;
+  }
+
+  // Find data in the subtree rooted at root. If not found, returns nullptr.
+  // `Compare` returns integer values for ternary comparison.
+  template <typename Compare>
+  LIBC_INLINE static WeakAVLNode *find(WeakAVLNode *root, T data,
+                                       Compare comp) {
+    WeakAVLNode *cursor = root;
+    while (cursor != nullptr) {
+      int comp_result = comp(cursor->data, data);
+      if (comp_result == 0)
+        return cursor; // Node found
+      bool is_right = comp_result < 0;
+      cursor = cursor->children[is_right];
+    }
+    return nullptr; // Node not found
+  }
+  // Insert data into the subtree rooted at root.
+  // Returns the node if insertion is successful or the node exists in
+  // the tree.
+  // Returns nullptr if memory allocation fails.
+  // `Compare` returns integer values for ternary comparison.
+  template <typename Compare>
+  LIBC_INLINE static WeakAVLNode *find_or_insert(WeakAVLNode *&root, T data,
+                                                 Compare comp) {
+    WeakAVLNode *parent = nullptr, *cursor = root;
+    bool is_right = false;
+    while (cursor != nullptr) {
+      parent = cursor;
+      int comp_result = comp(parent->data, data);
+      if (comp_result == 0)
+        return parent; // Node already exists
+      is_right = comp_result < 0;
+      cursor = cursor->children[is_right];
+    }
+    WeakAVLNode *allocated = create(cpp::move(data));
+    if (!allocated)
+      return nullptr;
+    WeakAVLNode *node = allocated;
+    node->parent = parent;
+
+    // Case 0: inserting into an empty tree
+    if (!parent) {
+      root = node; // Tree was empty
+      return node;
+    }
+
+    parent->children[is_right] = node;
+    // Rebalance process
+    while (parent) {
+      is_right = (parent->children[1] == node);
+      // Case 1: parent does not need to be promoted as node is lowering
+      // than the parent by 2 ranks.
+      //      (P)                       (P)
+      //     /  \                      /  \
+      //    2    1           =>       1    1
+      //   /      \                  /      \
+      // (N)       (*)             (N)       (*)
+      if (parent->has_rank_diff_2(is_right)) {
+        parent->toggle_rank_diff_2(is_right);
+        break;
+      }
+
+      bool sibling_has_rank_diff_2 = parent->has_rank_diff_2(!is_right);
+      // Case 2: node's sibling has rank-difference 1.
+      // Promoting parent will fix the conflict of the trinodes but we may need
+      // to continue on parent.
+      //
+      //         (GP)                       (GP)
+      //          |         Promote          |   x - 1
+      //          | x        ----->         (P)
+      //      0   |         /           1  /   \
+      // (N) --- (P)    ----             (N)    \ 2
+      //            \  1                         \
+      //             (S)                          (S)
+      if (!sibling_has_rank_diff_2) {
+        parent->toggle_rank_diff_2(!is_right);
+        node = parent;
+        parent = node->parent;
+        continue;
+      }
+
+      LIBC_ASSERT(!node->both_flags_set() &&
+                  "there should be no 2-2 node along the insertion fixup path");
+
+      LIBC_ASSERT((node == allocated || node->any_flag_set()) &&
+                  "Internal node must have a child with rank-difference 2, "
+                  "otherwise it should have already been handled.");
+
+      // Case 3: node's sibling has rank-difference 2. And node has a 1-node
+      // along the same direction. We can do a single rotation to fix the
+      // trinode.
+      //                   (GP)                            (GP)
+      //               0    |   X      Rotate               |
+      //         (N) ----- (P)           =>                (N)
+      //     1  /   \  2      \  2                      1  /  \ 1
+      //      (C1)   \         \                        (C1)   (P)
+      //             (C2)       (S)                         1 /  \ 1
+      //                                                    (C2)  (S)
+      if (node->has_rank_diff_2(!is_right)) {
+        WeakAVLNode *new_subroot = rotate(root, parent, is_right);
+        new_subroot->clear_flags();
+        parent->clear_flags();
+        break;
+      }
+      // Case 4: node's sibling has rank-difference 2. And node has a 1-node
+      // along the opposite direction. We need a double rotation to fix the
+      // trinode.
+      //                   (GP)                            (GP)
+      //               0    |   X      Zig-Zag              |      X
+      //         (N) ----- (P)           =>                (C1)
+      //     2  /   \  1      \  2                      1  /  \ 1
+      //       /    (C1)       \                        (N)    (P)
+      //     (C2) L /  \ R      (S)                  1 / \ L R / \ 1
+      //          (A)  (B)                           (C2) (A)(B) (S)
+      // (mirrored)
+      //         (GP)                                      (GP)
+      //        X | 0                Zig-Zag                |      X
+      //         (P) ----- (N)           =>                (C1)
+      //    2  /         1 / \ 2                        1  /  \ 1
+      //      /         (C1)  \                         (P)    (N)
+      //    (S)       L /  \ R (C2)                   1 / \ L R / \ 1
+      //              (A)  (B)                        (S)(A)  (B)(C2)
+      WeakAVLNode *subroot1 = rotate(root, node, !is_right); // First rotation
+      [[maybe_unused]] WeakAVLNode *subroot2 =
+          rotate(root, parent, is_right); // Second rotation
+      LIBC_ASSERT(subroot1 == subroot2 &&
+                  "Subroots after double rotation should be the same");
+      bool subroot_left_diff_2 = subroot1->left_rank_diff_2;
+      bool subroot_right_diff_2 = subroot1->right_rank_diff_2;
+      node->clear_flags();
+      parent->clear_flags();
+      subroot1->clear_flags();
+      // Select destinations
+      WeakAVLNode *dst_left = is_right ? parent : node;
+      WeakAVLNode *dst_right = is_right ? node : parent;
+      // Masked toggles
+      if (subroot_left_diff_2)
+        dst_left->toggle_rank_diff_2(true);
+
+      if (subroot_right_diff_2)
+        dst_right->toggle_rank_diff_2(false);
+      break;
+    }
+    return allocated;
+  }
+
+  // Erase the node from the tree rooted at root.
+  LIBC_INLINE static void erase(WeakAVLNode *&root, WeakAVLNode *node) {
+    // Unlink the node from the tree
+    auto [cursor, is_right] = unlink(root, node);
+    ::delete node;
+    while (cursor) {
+      // Case 0. cursor previously had rank-difference 1 on the side of the
+      // deleted node. We can simply update the rank-difference and stop.
+      // Notice that this step may create 2-2 nodes, thus deviate from "strong"
+      // AVL tree.
+      //
+      //          (C)                 (C)
+      //       X /   \ 1     =>    X /   \
+      //       (*)   (D)           (*)    \ 2
+      //                                   (D)
+      if (!cursor->has_rank_diff_2(is_right)) {
+        cursor->toggle_rank_diff_2(is_right);
+        // If we created a 2-2 leaf, we must demote it and continue.
+        if (cursor->both_flags_set() && cursor->is_leaf()) {
+          cursor->clear_flags();
+          if (cursor->parent)
+            is_right = (cursor->parent->children[1] == cursor);
+          cursor = cursor->parent;
+          continue;
----------------
michaelrj-google wrote:

whenever this loop continues, there's a `cursor = cursor -> parent; continue;`. If you change these to be `else if` you could put that at the end of the loop and avoid all these `continue`s.

https://github.com/llvm/llvm-project/pull/172411


More information about the libc-commits mailing list