[libc-commits] [libc] [libc] Add regex AST and ExprPool (PR #198728)

Alexey Samsonov via libc-commits libc-commits at lists.llvm.org
Thu Jun 18 22:22:34 PDT 2026


================
@@ -0,0 +1,82 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// AST nodes for Regular Expressions (Class Definitions).
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_REGEX_REGEX_AST_H
+#define LLVM_LIBC_SRC___SUPPORT_REGEX_REGEX_AST_H
+
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+/// Enumeration of Regular Expression AST node types.
+enum class ExprKind {
+  /// Represents the empty set (matches nothing).
+  EmptySet,
+  /// Represents the empty string (matches the empty string).
+  EmptyStr,
+  /// A literal character match.
+  Literal,
+  /// Concatenation of two expressions (left followed by right).
+  Concat,
+  /// Alternation between two expressions (left or right).
+  Alt,
+};
+
+/// A node in the Regular Expression Abstract Syntax Tree.
+///
+/// Expressions are represented as a hash-consed DAG to enable efficient
+/// derivative-based matching. This structure is intended to be managed by
+/// an ExprPool.
+struct Expr {
+  /// The type of this expression node.
+  ExprKind kind;
+  union {
+    /// Character value for Literal nodes.
+    char ch;
+    /// Sub-expressions for Concat and Alt nodes.
+    struct {
+      Expr *left;
+      Expr *right;
+    } bin;
+  };
+
+  /// Default constructor creates an EmptySet node.
+  constexpr Expr() : kind(ExprKind::EmptySet), ch('\0') {}
+  /// Create a node of a specific kind with no data.
+  constexpr Expr(ExprKind k) : kind(k), ch('\0') {}
+  /// Create a Literal node.
+  constexpr Expr(char c) : kind(ExprKind::Literal), ch(c) {}
+  /// Create a binary node (Concat or Alt).
+  constexpr Expr(ExprKind k, Expr *l, Expr *r) : kind(k), bin{l, r} {}
----------------
vonosmas wrote:

WDYT of having a private constructor(s) and public factory methods instead? Then you can enforce invariants in the API, and not bother about clients creating an `Expr` node with two subnodes and type `EmptySet` etc.

```
static constexpr Expr CreateEmptySet() { return Expr(ExprKind::EmptySet, '\0'); }
static constexpr Expr CreateLiteral(char c) { return Expr(ExprKind::Literal, c); }
static constexpr Expr CreateConcat(Expr *l, Expr * r) { return Expr(ExprKind::Concat, l, r); }
```

etc.


https://github.com/llvm/llvm-project/pull/198728


More information about the libc-commits mailing list