[libc-commits] [libc] [libc] Add regex AST and ExprPool (PR #198728)
Alexey Samsonov via libc-commits
libc-commits at lists.llvm.org
Thu Jun 18 22:22:35 PDT 2026
================
@@ -0,0 +1,118 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Pool for Regular Expression AST nodes (Class Definitions).
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_REGEX_REGEX_EXPR_POOL_H
+#define LLVM_LIBC_SRC___SUPPORT_REGEX_REGEX_EXPR_POOL_H
+
+#include "src/__support/CPP/expected.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/regex/regex_ast.h"
+#include <stddef.h>
+
+namespace LIBC_NAMESPACE_DECL {
+
+/// An arena-based pool for Regular Expression AST nodes.
+///
+/// This class manages the allocation and hash-consing of Expr nodes. All
+/// nodes created through this pool are owned by it and will be freed when
+/// the pool is destroyed. Hash-consing ensures that identical expressions
+/// are represented by the same pointer, enabling fast comparison and
+/// derivative normalization.
+class ExprPool {
+ /// Internal storage block for AST nodes.
+ ///
+ /// Blocks are allocated on demand to avoid large contiguous allocations
+ /// and are linked together in a list for cleanup.
+ /// TODO: Consider adopting cpp::forward_list for block management once
+ /// it is available in LLVM-libc.
+ struct Block {
+ /// Number of Expr nodes stored in each block.
+ static constexpr size_t BLOCK_SIZE = 256;
+ /// The actual storage for Expr nodes.
+ Expr nodes[BLOCK_SIZE];
+ /// Pointer to the next block in the chain.
+ Block *next = nullptr;
+ /// Number of nodes currently used in this block.
+ size_t used = 0;
+
+ /// Initialises an empty block.
+ Block();
+ };
+
+ /// The first block in the allocation chain.
+ Block *head = nullptr;
+ /// The block currently being used for new node allocations.
+ Block *current = nullptr;
+ /// Total number of nodes allocated across all blocks.
+ size_t node_count = 0;
+
+ /// The size of the hash table used for hash-consing (interning) expression
+ /// nodes. Choosing 0x4000 (16,384) is the smallest power of two that keeps
+ /// the load factor below 70% when the pool reaches its limit of 10,000 nodes
+ /// (peak load factor is ~61%). Using a power of two allows the compiler to
+ /// optimize the modulo indexing into an efficient bitwise AND, while the low
+ /// load factor minimizes collisions and guarantees O(1) average interning
+ /// time.
+ static constexpr size_t HASH_TABLE_SIZE = 0x4000;
+ /// Hash table storing pointers to unique Expr nodes.
+ Expr **hashtable = nullptr;
+
+ /// Core hash-consing function (Interning).
+ ///
+ /// Guarantees that for any two identical structural definitions of an Expr,
+ /// this function will return the same pointer. This enables O(1) structural
+ /// equality via pointer comparison.
+ ///
+ /// \param e A structural definition (proto-node) to intern.
+ /// \returns A pointer to the unique, stable instance in the arena,
+ /// or REG_ESPACE on failure.
+ cpp::expected<Expr *, int> intern(const Expr &e);
+
+ /// The maximum number of nodes allowed in the pool to prevent memory
----------------
vonosmas wrote:
Nit: moved next to `HASH_TABLE_SIZE` above and deduplicate comments.
https://github.com/llvm/llvm-project/pull/198728
More information about the libc-commits
mailing list