[libc-commits] [libc] [libc] Add POSIX regex stub implementation and build infrastructure (PR #196995)

Jeff Bailey via libc-commits libc-commits at lists.llvm.org
Mon May 18 06:59:13 PDT 2026


https://github.com/kaladron updated https://github.com/llvm/llvm-project/pull/196995

>From 62ac91874cd26223a1cce47f90e960e320293b89 Mon Sep 17 00:00:00 2001
From: Jeff Bailey <jbailey at raspberryginger.com>
Date: Wed, 6 May 2026 16:46:13 +0100
Subject: [PATCH 1/4] [libc] Add POSIX regex stub implementation and build
 infrastructure

Added the four POSIX regex entrypoints (regcomp, regexec, regerror,
regfree) and registered them for x86_64, aarch64, riscv, and arm.
regerror is fully implemented with all 13 POSIX error code strings.
The other three are stubs backed by simple string matching to validate
the build pipeline end-to-end.

This implementation is restricted to full-build mode (LLVM_LIBC_FULL_BUILD)
to avoid ABI compatibility risks with system headers and internal state
management in overlay mode.

New files:
* include/regex.yaml and regex-macros.h for header generation
* regex_t, regoff_t, regmatch_t type headers
* src/regex/ with all four entrypoints
* test/src/regex/ with regerror and basic round-trip tests

All 7 tests pass.
---
 libc/config/linux/aarch64/entrypoints.txt    |  6 ++
 libc/config/linux/aarch64/headers.txt        |  4 +
 libc/config/linux/arm/entrypoints.txt        |  6 ++
 libc/config/linux/arm/headers.txt            |  4 +
 libc/config/linux/riscv/entrypoints.txt      |  6 ++
 libc/config/linux/riscv/headers.txt          |  4 +
 libc/config/linux/x86_64/entrypoints.txt     |  6 ++
 libc/config/linux/x86_64/headers.txt         |  4 +
 libc/hdr/CMakeLists.txt                      |  9 ++
 libc/hdr/regex_macros.h                      | 18 ++++
 libc/hdr/types/CMakeLists.txt                | 24 ++++++
 libc/hdr/types/regex_t.h                     | 16 ++++
 libc/hdr/types/regmatch_t.h                  | 16 ++++
 libc/hdr/types/regoff_t.h                    | 16 ++++
 libc/include/CMakeLists.txt                  | 12 +++
 libc/include/llvm-libc-macros/CMakeLists.txt |  6 ++
 libc/include/llvm-libc-macros/regex-macros.h | 42 +++++++++
 libc/include/llvm-libc-types/CMakeLists.txt  |  3 +
 libc/include/llvm-libc-types/regex_t.h       | 19 +++++
 libc/include/llvm-libc-types/regmatch_t.h    | 19 +++++
 libc/include/llvm-libc-types/regoff_t.h      | 14 +++
 libc/include/regex.yaml                      | 83 ++++++++++++++++++
 libc/src/CMakeLists.txt                      |  1 +
 libc/src/regex/CMakeLists.txt                | 56 ++++++++++++
 libc/src/regex/regcomp.cpp                   | 51 +++++++++++
 libc/src/regex/regcomp.h                     | 27 ++++++
 libc/src/regex/regerror.cpp                  | 90 ++++++++++++++++++++
 libc/src/regex/regerror.h                    | 29 +++++++
 libc/src/regex/regexec.cpp                   | 54 ++++++++++++
 libc/src/regex/regexec.h                     | 29 +++++++
 libc/src/regex/regfree.cpp                   | 31 +++++++
 libc/src/regex/regfree.h                     | 26 ++++++
 libc/test/src/CMakeLists.txt                 |  1 +
 libc/test/src/regex/CMakeLists.txt           | 25 ++++++
 libc/test/src/regex/regerror_test.cpp        | 48 +++++++++++
 libc/test/src/regex/regex_basic_test.cpp     | 68 +++++++++++++++
 36 files changed, 873 insertions(+)
 create mode 100644 libc/hdr/regex_macros.h
 create mode 100644 libc/hdr/types/regex_t.h
 create mode 100644 libc/hdr/types/regmatch_t.h
 create mode 100644 libc/hdr/types/regoff_t.h
 create mode 100644 libc/include/llvm-libc-macros/regex-macros.h
 create mode 100644 libc/include/llvm-libc-types/regex_t.h
 create mode 100644 libc/include/llvm-libc-types/regmatch_t.h
 create mode 100644 libc/include/llvm-libc-types/regoff_t.h
 create mode 100644 libc/include/regex.yaml
 create mode 100644 libc/src/regex/CMakeLists.txt
 create mode 100644 libc/src/regex/regcomp.cpp
 create mode 100644 libc/src/regex/regcomp.h
 create mode 100644 libc/src/regex/regerror.cpp
 create mode 100644 libc/src/regex/regerror.h
 create mode 100644 libc/src/regex/regexec.cpp
 create mode 100644 libc/src/regex/regexec.h
 create mode 100644 libc/src/regex/regfree.cpp
 create mode 100644 libc/src/regex/regfree.h
 create mode 100644 libc/test/src/regex/CMakeLists.txt
 create mode 100644 libc/test/src/regex/regerror_test.cpp
 create mode 100644 libc/test/src/regex/regex_basic_test.cpp

diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index b7c9cabd934b4..20f029be07a40 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -1193,6 +1193,12 @@ if(LLVM_LIBC_FULL_BUILD)
     libc.src.search.twalk
     libc.src.search.twalk_r
 
+    # regex.h entrypoints
+    libc.src.regex.regcomp
+    libc.src.regex.regexec
+    libc.src.regex.regerror
+    libc.src.regex.regfree
+
     # threads.h entrypoints
     libc.src.threads.call_once
     libc.src.threads.cnd_broadcast
diff --git a/libc/config/linux/aarch64/headers.txt b/libc/config/linux/aarch64/headers.txt
index 05cd13b1980e7..941a634c48c34 100644
--- a/libc/config/linux/aarch64/headers.txt
+++ b/libc/config/linux/aarch64/headers.txt
@@ -64,3 +64,7 @@ set(TARGET_PUBLIC_HEADERS
     libc.include.wchar
     libc.include.wctype
 )
+
+if(LLVM_LIBC_FULL_BUILD)
+  list(APPEND TARGET_PUBLIC_HEADERS libc.include.regex)
+endif()
diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt
index 906f36d45e337..8a4730ed04138 100644
--- a/libc/config/linux/arm/entrypoints.txt
+++ b/libc/config/linux/arm/entrypoints.txt
@@ -219,6 +219,12 @@ if(LLVM_LIBC_FULL_BUILD)
     # search.h entrypoints
     libc.src.search.lfind
 
+    # regex.h entrypoints
+    libc.src.regex.regcomp
+    libc.src.regex.regexec
+    libc.src.regex.regerror
+    libc.src.regex.regfree
+
     # setjmp.h entrypoints
     libc.src.setjmp.longjmp
     libc.src.setjmp.setjmp
diff --git a/libc/config/linux/arm/headers.txt b/libc/config/linux/arm/headers.txt
index a9c4de1cf790a..4b7bc079dcc0b 100644
--- a/libc/config/linux/arm/headers.txt
+++ b/libc/config/linux/arm/headers.txt
@@ -25,3 +25,7 @@ set(TARGET_PUBLIC_HEADERS
     # Disabled due to epoll_wait syscalls not being available on this platform.
     # libc.include.sys_epoll
 )
+
+if(LLVM_LIBC_FULL_BUILD)
+  list(APPEND TARGET_PUBLIC_HEADERS libc.include.regex)
+endif()
diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
index c0adf2fb116aa..bf410cccac770 100644
--- a/libc/config/linux/riscv/entrypoints.txt
+++ b/libc/config/linux/riscv/entrypoints.txt
@@ -1316,6 +1316,12 @@ if(LLVM_LIBC_FULL_BUILD)
     libc.src.search.lsearch
     libc.src.search.remque
 
+    # regex.h entrypoints
+    libc.src.regex.regcomp
+    libc.src.regex.regexec
+    libc.src.regex.regerror
+    libc.src.regex.regfree
+
     # threads.h entrypoints
     libc.src.threads.call_once
     libc.src.threads.cnd_broadcast
diff --git a/libc/config/linux/riscv/headers.txt b/libc/config/linux/riscv/headers.txt
index 218b7f8153309..1c526807b17cd 100644
--- a/libc/config/linux/riscv/headers.txt
+++ b/libc/config/linux/riscv/headers.txt
@@ -63,3 +63,7 @@ set(TARGET_PUBLIC_HEADERS
     libc.include.wchar
     libc.include.wctype
 )
+
+if(LLVM_LIBC_FULL_BUILD)
+  list(APPEND TARGET_PUBLIC_HEADERS libc.include.regex)
+endif()
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 9970f079abc08..9d35e52bc9dc3 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -1390,6 +1390,12 @@ if(LLVM_LIBC_FULL_BUILD)
     libc.src.search.twalk
     libc.src.search.twalk_r
 
+    # regex.h entrypoints
+    libc.src.regex.regcomp
+    libc.src.regex.regexec
+    libc.src.regex.regerror
+    libc.src.regex.regfree
+
     # threads.h entrypoints
     libc.src.threads.call_once
     libc.src.threads.cnd_broadcast
diff --git a/libc/config/linux/x86_64/headers.txt b/libc/config/linux/x86_64/headers.txt
index 8c25137bd9de4..7ccdc1c3c4668 100644
--- a/libc/config/linux/x86_64/headers.txt
+++ b/libc/config/linux/x86_64/headers.txt
@@ -69,3 +69,7 @@ set(TARGET_PUBLIC_HEADERS
     libc.include.wchar
     libc.include.wctype
 )
+
+if(LLVM_LIBC_FULL_BUILD)
+  list(APPEND TARGET_PUBLIC_HEADERS libc.include.regex)
+endif()
diff --git a/libc/hdr/CMakeLists.txt b/libc/hdr/CMakeLists.txt
index a941d63f7d216..ca58987529699 100644
--- a/libc/hdr/CMakeLists.txt
+++ b/libc/hdr/CMakeLists.txt
@@ -367,5 +367,14 @@ add_gen_header(
   PROXY
 )
 
+add_proxy_header_library(
+  regex_macros
+  HDRS
+    regex_macros.h
+  FULL_BUILD_DEPENDS
+    libc.include.llvm-libc-macros.regex_macros
+    libc.include.regex
+)
+
 add_subdirectory(types)
 add_subdirectory(func)
diff --git a/libc/hdr/regex_macros.h b/libc/hdr/regex_macros.h
new file mode 100644
index 0000000000000..968cb68394e94
--- /dev/null
+++ b/libc/hdr/regex_macros.h
@@ -0,0 +1,18 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_HDR_REGEX_MACROS_H
+#define LLVM_LIBC_HDR_REGEX_MACROS_H
+
+#ifdef LIBC_FULL_BUILD
+
+#include "include/llvm-libc-macros/regex-macros.h"
+
+#endif // LIBC_FULL_BUILD
+
+#endif // LLVM_LIBC_HDR_REGEX_MACROS_H
diff --git a/libc/hdr/types/CMakeLists.txt b/libc/hdr/types/CMakeLists.txt
index a511944547d8a..71e137a7b5c46 100644
--- a/libc/hdr/types/CMakeLists.txt
+++ b/libc/hdr/types/CMakeLists.txt
@@ -986,3 +986,27 @@ add_proxy_header_library(
     libc.include.llvm-libc-types.VISIT
     libc.include.search
 )
+
+add_proxy_header_library(
+  regex_t
+  HDRS
+    regex_t.h
+  FULL_BUILD_DEPENDS
+    libc.include.llvm-libc-types.regex_t
+)
+
+add_proxy_header_library(
+  regoff_t
+  HDRS
+    regoff_t.h
+  FULL_BUILD_DEPENDS
+    libc.include.llvm-libc-types.regoff_t
+)
+
+add_proxy_header_library(
+  regmatch_t
+  HDRS
+    regmatch_t.h
+  FULL_BUILD_DEPENDS
+    libc.include.llvm-libc-types.regmatch_t
+)
diff --git a/libc/hdr/types/regex_t.h b/libc/hdr/types/regex_t.h
new file mode 100644
index 0000000000000..f12e440b04668
--- /dev/null
+++ b/libc/hdr/types/regex_t.h
@@ -0,0 +1,16 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_HDR_TYPES_REGEX_T_H
+#define LLVM_LIBC_HDR_TYPES_REGEX_T_H
+
+#ifdef LIBC_FULL_BUILD
+#include "include/llvm-libc-types/regex_t.h"
+#endif // LIBC_FULL_BUILD
+
+#endif // LLVM_LIBC_HDR_TYPES_REGEX_T_H
diff --git a/libc/hdr/types/regmatch_t.h b/libc/hdr/types/regmatch_t.h
new file mode 100644
index 0000000000000..7a271b067878f
--- /dev/null
+++ b/libc/hdr/types/regmatch_t.h
@@ -0,0 +1,16 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_HDR_TYPES_REGMATCH_T_H
+#define LLVM_LIBC_HDR_TYPES_REGMATCH_T_H
+
+#ifdef LIBC_FULL_BUILD
+#include "include/llvm-libc-types/regmatch_t.h"
+#endif // LIBC_FULL_BUILD
+
+#endif // LLVM_LIBC_HDR_TYPES_REGMATCH_T_H
diff --git a/libc/hdr/types/regoff_t.h b/libc/hdr/types/regoff_t.h
new file mode 100644
index 0000000000000..77a7e6ef4ca09
--- /dev/null
+++ b/libc/hdr/types/regoff_t.h
@@ -0,0 +1,16 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_HDR_TYPES_REGOFF_T_H
+#define LLVM_LIBC_HDR_TYPES_REGOFF_T_H
+
+#ifdef LIBC_FULL_BUILD
+#include "include/llvm-libc-types/regoff_t.h"
+#endif // LIBC_FULL_BUILD
+
+#endif // LLVM_LIBC_HDR_TYPES_REGOFF_T_H
diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt
index 90055b41a37cf..454622198e751 100644
--- a/libc/include/CMakeLists.txt
+++ b/libc/include/CMakeLists.txt
@@ -311,6 +311,18 @@ add_header_macro(
     .llvm_libc_common_h
 )
 
+add_header_macro(
+  regex
+  ../libc/include/regex.yaml
+  regex.h
+  DEPENDS
+    .llvm_libc_common_h
+    .llvm-libc-types.regex_t
+    .llvm-libc-types.regmatch_t
+    .llvm-libc-types.regoff_t
+    .llvm-libc-types.size_t
+)
+
 add_header_macro(
   time
   ../libc/include/time.yaml
diff --git a/libc/include/llvm-libc-macros/CMakeLists.txt b/libc/include/llvm-libc-macros/CMakeLists.txt
index af74d483956a3..3225b5b1849a1 100644
--- a/libc/include/llvm-libc-macros/CMakeLists.txt
+++ b/libc/include/llvm-libc-macros/CMakeLists.txt
@@ -400,6 +400,12 @@ add_macro_header(
     poll-macros.h
 )
 
+add_macro_header(
+  regex_macros
+  HDR
+    regex-macros.h
+)
+
 add_macro_header(
   EFIAPI_macros
   HDR
diff --git a/libc/include/llvm-libc-macros/regex-macros.h b/libc/include/llvm-libc-macros/regex-macros.h
new file mode 100644
index 0000000000000..607b3966a7c83
--- /dev/null
+++ b/libc/include/llvm-libc-macros/regex-macros.h
@@ -0,0 +1,42 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Macros for POSIX regex.h.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_MACROS_REGEX_MACROS_H
+#define LLVM_LIBC_MACROS_REGEX_MACROS_H
+
+// regcomp cflags
+#define REG_EXTENDED 1
+#define REG_ICASE 2
+#define REG_NOSUB 4
+#define REG_NEWLINE 8
+
+// regexec eflags
+#define REG_NOTBOL 1
+#define REG_NOTEOL 2
+
+// Error codes
+#define REG_NOMATCH 1
+#define REG_BADPAT 2
+#define REG_ECOLLATE 3
+#define REG_ECTYPE 4
+#define REG_EESCAPE 5
+#define REG_ESUBREG 6
+#define REG_EBRACK 7
+#define REG_EPAREN 8
+#define REG_EBRACE 9
+#define REG_BADBR 10
+#define REG_ERANGE 11
+#define REG_ESPACE 12
+#define REG_BADRPT 13
+
+#endif // LLVM_LIBC_MACROS_REGEX_MACROS_H
diff --git a/libc/include/llvm-libc-types/CMakeLists.txt b/libc/include/llvm-libc-types/CMakeLists.txt
index 207834072ede9..d6a013d7f4c43 100644
--- a/libc/include/llvm-libc-types/CMakeLists.txt
+++ b/libc/include/llvm-libc-types/CMakeLists.txt
@@ -208,6 +208,9 @@ add_header(ACTION HDR ACTION.h)
 add_header(ENTRY HDR ENTRY.h)
 add_header(VISIT HDR VISIT.h)
 add_header(struct_hsearch_data HDR struct_hsearch_data.h)
+add_header(regex_t HDR regex_t.h DEPENDS .size_t)
+add_header(regoff_t HDR regoff_t.h)
+add_header(regmatch_t HDR regmatch_t.h DEPENDS .regoff_t)
 add_header(struct_epoll_event HDR struct_epoll_event.h)
 add_header(struct_epoll_data HDR struct_epoll_data.h)
 add_header(
diff --git a/libc/include/llvm-libc-types/regex_t.h b/libc/include/llvm-libc-types/regex_t.h
new file mode 100644
index 0000000000000..1ca0f63908306
--- /dev/null
+++ b/libc/include/llvm-libc-types/regex_t.h
@@ -0,0 +1,19 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_TYPES_REGEX_T_H
+#define LLVM_LIBC_TYPES_REGEX_T_H
+
+#include "size_t.h"
+
+typedef struct {
+  size_t re_nsub;
+  void *__internal;
+} regex_t;
+
+#endif // LLVM_LIBC_TYPES_REGEX_T_H
diff --git a/libc/include/llvm-libc-types/regmatch_t.h b/libc/include/llvm-libc-types/regmatch_t.h
new file mode 100644
index 0000000000000..5c178380e2569
--- /dev/null
+++ b/libc/include/llvm-libc-types/regmatch_t.h
@@ -0,0 +1,19 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_TYPES_REGMATCH_T_H
+#define LLVM_LIBC_TYPES_REGMATCH_T_H
+
+#include "regoff_t.h"
+
+typedef struct {
+  regoff_t rm_so;
+  regoff_t rm_eo;
+} regmatch_t;
+
+#endif // LLVM_LIBC_TYPES_REGMATCH_T_H
diff --git a/libc/include/llvm-libc-types/regoff_t.h b/libc/include/llvm-libc-types/regoff_t.h
new file mode 100644
index 0000000000000..3caadf7a0bdd1
--- /dev/null
+++ b/libc/include/llvm-libc-types/regoff_t.h
@@ -0,0 +1,14 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_TYPES_REGOFF_T_H
+#define LLVM_LIBC_TYPES_REGOFF_T_H
+
+typedef int regoff_t;
+
+#endif // LLVM_LIBC_TYPES_REGOFF_T_H
diff --git a/libc/include/regex.yaml b/libc/include/regex.yaml
new file mode 100644
index 0000000000000..8fe77257f07be
--- /dev/null
+++ b/libc/include/regex.yaml
@@ -0,0 +1,83 @@
+header: regex.h
+standards:
+  - posix
+macros:
+  - macro_name: REG_EXTENDED
+    macro_header: regex-macros.h
+  - macro_name: REG_ICASE
+    macro_header: regex-macros.h
+  - macro_name: REG_NOSUB
+    macro_header: regex-macros.h
+  - macro_name: REG_NEWLINE
+    macro_header: regex-macros.h
+  - macro_name: REG_NOTBOL
+    macro_header: regex-macros.h
+  - macro_name: REG_NOTEOL
+    macro_header: regex-macros.h
+  - macro_name: REG_NOMATCH
+    macro_header: regex-macros.h
+  - macro_name: REG_BADPAT
+    macro_header: regex-macros.h
+  - macro_name: REG_ECOLLATE
+    macro_header: regex-macros.h
+  - macro_name: REG_ECTYPE
+    macro_header: regex-macros.h
+  - macro_name: REG_EESCAPE
+    macro_header: regex-macros.h
+  - macro_name: REG_ESUBREG
+    macro_header: regex-macros.h
+  - macro_name: REG_EBRACK
+    macro_header: regex-macros.h
+  - macro_name: REG_EPAREN
+    macro_header: regex-macros.h
+  - macro_name: REG_EBRACE
+    macro_header: regex-macros.h
+  - macro_name: REG_BADBR
+    macro_header: regex-macros.h
+  - macro_name: REG_ERANGE
+    macro_header: regex-macros.h
+  - macro_name: REG_ESPACE
+    macro_header: regex-macros.h
+  - macro_name: REG_BADRPT
+    macro_header: regex-macros.h
+types:
+  - type_name: regex_t
+  - type_name: regmatch_t
+  - type_name: regoff_t
+  - type_name: size_t
+enums: []
+objects: []
+functions:
+  - name: regcomp
+    standards:
+      - posix
+    return_type: int
+    arguments:
+      - type: regex_t *__restrict
+      - type: const char *__restrict
+      - type: int
+  - name: regexec
+    standards:
+      - posix
+    return_type: int
+    arguments:
+      - type: const regex_t *__restrict
+      - type: const char *__restrict
+      - type: size_t
+      - type: regmatch_t *__restrict
+      - type: int
+  - name: regerror
+    standards:
+      - posix
+    return_type: size_t
+    arguments:
+      - type: int
+      - type: const regex_t *__restrict
+      - type: char *__restrict
+      - type: size_t
+  - name: regfree
+    standards:
+      - posix
+    return_type: void
+    arguments:
+      - type: regex_t *
diff --git a/libc/src/CMakeLists.txt b/libc/src/CMakeLists.txt
index 4ca42ddc4f870..891fc122836cb 100644
--- a/libc/src/CMakeLists.txt
+++ b/libc/src/CMakeLists.txt
@@ -44,6 +44,7 @@ add_subdirectory(compiler)
 add_subdirectory(locale)
 add_subdirectory(nl_types)
 add_subdirectory(search)
+add_subdirectory(regex)
 add_subdirectory(setjmp)
 add_subdirectory(signal)
 add_subdirectory(spawn)
diff --git a/libc/src/regex/CMakeLists.txt b/libc/src/regex/CMakeLists.txt
new file mode 100644
index 0000000000000..4b3f8804c2a72
--- /dev/null
+++ b/libc/src/regex/CMakeLists.txt
@@ -0,0 +1,56 @@
+add_entrypoint_object(
+  regcomp
+  SRCS
+    regcomp.cpp
+  HDRS
+    regcomp.h
+  DEPENDS
+    libc.hdr.types.regex_t
+    libc.hdr.regex_macros
+    libc.src.__support.common
+    libc.src.__support.CPP.new
+    libc.src.string.memory_utils.inline_memcpy
+    libc.src.string.string_utils
+)
+
+add_entrypoint_object(
+  regexec
+  SRCS
+    regexec.cpp
+  HDRS
+    regexec.h
+  DEPENDS
+    libc.hdr.types.regex_t
+    libc.hdr.types.regmatch_t
+    libc.hdr.types.size_t
+    libc.hdr.regex_macros
+    libc.src.__support.common
+    libc.src.string.memory_utils.inline_strstr
+)
+
+add_entrypoint_object(
+  regerror
+  SRCS
+    regerror.cpp
+  HDRS
+    regerror.h
+  DEPENDS
+    libc.hdr.types.regex_t
+    libc.hdr.types.size_t
+    libc.hdr.regex_macros
+    libc.src.__support.common
+    libc.src.string.memory_utils.inline_memcpy
+    libc.src.string.string_utils
+)
+
+add_entrypoint_object(
+  regfree
+  SRCS
+    regfree.cpp
+  HDRS
+    regfree.h
+  DEPENDS
+    libc.hdr.types.regex_t
+    libc.src.__support.common
+    libc.src.__support.CPP.new
+)
diff --git a/libc/src/regex/regcomp.cpp b/libc/src/regex/regcomp.cpp
new file mode 100644
index 0000000000000..49413d13e16b6
--- /dev/null
+++ b/libc/src/regex/regcomp.cpp
@@ -0,0 +1,51 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Implementation of regcomp (stub).
+///
+//===----------------------------------------------------------------------===//
+
+#include "src/regex/regcomp.h"
+
+#include "hdr/regex_macros.h"
+#include "src/__support/CPP/new.h"
+#include "src/__support/alloc-checker.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/string/memory_utils/inline_memcpy.h"
+#include "src/string/string_utils.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(int, regcomp,
+                   (regex_t *__restrict preg, const char *__restrict pattern,
+                    int cflags)) {
+  // Silencing unused parameter warning for the stub.
+  (void)cflags;
+
+  // Note: POSIX requires callers to call regfree() before reusing a preg
+  // object.  We therefore do not attempt to free any previous __internal here
+  // — preg is uninitialized on first use and the pointer would be garbage.
+
+  size_t len = internal::string_length(pattern);
+  AllocChecker ac;
+  char *copy = new (ac) char[len + 1];
+  if (!ac)
+    return REG_ESPACE;
+
+  inline_memcpy(copy, pattern, len + 1);
+
+  // TODO: This is a stub. re_nsub is always 0 because parenthesised
+  // subexpressions are not yet parsed. REG_NOSUB is effectively always active.
+  preg->re_nsub = 0;
+  preg->__internal = copy;
+  return 0;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/regex/regcomp.h b/libc/src/regex/regcomp.h
new file mode 100644
index 0000000000000..e42d73b8e4178
--- /dev/null
+++ b/libc/src/regex/regcomp.h
@@ -0,0 +1,27 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Implementation header for regcomp.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_REGEX_REGCOMP_H
+#define LLVM_LIBC_SRC_REGEX_REGCOMP_H
+
+#include "hdr/types/regex_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+int regcomp(regex_t *__restrict preg, const char *__restrict pattern,
+            int cflags);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_REGEX_REGCOMP_H
diff --git a/libc/src/regex/regerror.cpp b/libc/src/regex/regerror.cpp
new file mode 100644
index 0000000000000..8efd72376f494
--- /dev/null
+++ b/libc/src/regex/regerror.cpp
@@ -0,0 +1,90 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Implementation of regerror.
+///
+//===----------------------------------------------------------------------===//
+
+#include "src/regex/regerror.h"
+
+#include "hdr/regex_macros.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/string/memory_utils/inline_memcpy.h"
+#include "src/string/string_utils.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(size_t, regerror,
+                   (int errcode, const regex_t *__restrict preg,
+                    char *__restrict errbuf, size_t errbuf_size)) {
+  (void)preg; // preg is reserved for implementation-specific messages.
+
+  const char *msg;
+  switch (errcode) {
+  case 0:
+    msg = "Success";
+    break;
+  case REG_NOMATCH:
+    msg = "No match";
+    break;
+  case REG_BADPAT:
+    msg = "Invalid regular expression";
+    break;
+  case REG_ECOLLATE:
+    msg = "Invalid collating element";
+    break;
+  case REG_ECTYPE:
+    msg = "Invalid character class";
+    break;
+  case REG_EESCAPE:
+    msg = "Trailing backslash";
+    break;
+  case REG_ESUBREG:
+    msg = "Invalid backreference";
+    break;
+  case REG_EBRACK:
+    msg = "Missing ']'";
+    break;
+  case REG_EPAREN:
+    msg = "Missing ')'";
+    break;
+  case REG_EBRACE:
+    msg = "Missing '}'";
+    break;
+  case REG_BADBR:
+    msg = "Invalid repetition count";
+    break;
+  case REG_ERANGE:
+    msg = "Invalid range end";
+    break;
+  case REG_ESPACE:
+    msg = "Out of memory";
+    break;
+  case REG_BADRPT:
+    msg = "Invalid preceding expression";
+    break;
+  default:
+    msg = "Unknown error";
+    break;
+  }
+
+  size_t msg_len = internal::string_length(msg) + 1; // include NUL
+
+  if (errbuf_size > 0 && errbuf) {
+    size_t copy_len = msg_len < errbuf_size ? msg_len : errbuf_size;
+    inline_memcpy(errbuf, msg, copy_len - 1);
+    errbuf[copy_len - 1] = '\0';
+  }
+  // POSIX requires returning the size needed to hold the full NUL-terminated
+  // string, even if it was truncated in the buffer.
+  return msg_len;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/regex/regerror.h b/libc/src/regex/regerror.h
new file mode 100644
index 0000000000000..52ac54ad9c94c
--- /dev/null
+++ b/libc/src/regex/regerror.h
@@ -0,0 +1,29 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Implementation header for regerror.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_REGEX_REGERROR_H
+#define LLVM_LIBC_SRC_REGEX_REGERROR_H
+
+#include "hdr/types/regex_t.h"
+#include "src/__support/macros/config.h"
+
+#include "hdr/types/size_t.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+size_t regerror(int errcode, const regex_t *__restrict preg,
+                char *__restrict errbuf, size_t errbuf_size);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_REGEX_REGERROR_H
diff --git a/libc/src/regex/regexec.cpp b/libc/src/regex/regexec.cpp
new file mode 100644
index 0000000000000..7d3927a2db8ee
--- /dev/null
+++ b/libc/src/regex/regexec.cpp
@@ -0,0 +1,54 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Implementation of regexec (stub).
+///
+//===----------------------------------------------------------------------===//
+
+#include "src/regex/regexec.h"
+
+#include "hdr/regex_macros.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/string/memory_utils/inline_strstr.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(int, regexec,
+                   (const regex_t *__restrict preg,
+                    const char *__restrict string, size_t nmatch,
+                    regmatch_t *__restrict pmatch, int eflags)) {
+  // TODO: This is a stub. The following are not yet implemented:
+  //   - Regex metacharacters (., *, +, ?, [], {}, (), |, ^, $).
+  //   - REG_EXTENDED / REG_ICASE / REG_NEWLINE compile flags.
+  //   - REG_NOTBOL / REG_NOTEOL eflags.
+  //   - pmatch[] filling (subexpression offsets).
+  //   - Only literal substring search (strstr) is performed.
+  (void)nmatch;
+  (void)pmatch;
+  (void)eflags;
+
+  // Guard against a null internal pointer (e.g. called after regfree).
+  const char *pattern = static_cast<const char *>(preg->__internal);
+  if (!pattern)
+    return REG_NOMATCH;
+
+  // An empty pattern always matches.
+  if (*pattern == '\0')
+    return 0;
+
+  // Use inline_strstr for literal substring matching.
+  auto comp = [](char l, char r) -> int { return l - r; };
+  if (inline_strstr(string, pattern, comp))
+    return 0;
+
+  return REG_NOMATCH;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/regex/regexec.h b/libc/src/regex/regexec.h
new file mode 100644
index 0000000000000..193a1c45c0555
--- /dev/null
+++ b/libc/src/regex/regexec.h
@@ -0,0 +1,29 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Implementation header for regexec.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_REGEX_REGEXEC_H
+#define LLVM_LIBC_SRC_REGEX_REGEXEC_H
+
+#include "hdr/types/regex_t.h"
+#include "hdr/types/regmatch_t.h"
+#include "hdr/types/size_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+int regexec(const regex_t *__restrict preg, const char *__restrict string,
+            size_t nmatch, regmatch_t *__restrict pmatch, int eflags);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_REGEX_REGEXEC_H
diff --git a/libc/src/regex/regfree.cpp b/libc/src/regex/regfree.cpp
new file mode 100644
index 0000000000000..06fd791f628ff
--- /dev/null
+++ b/libc/src/regex/regfree.cpp
@@ -0,0 +1,31 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Implementation of regfree.
+///
+//===----------------------------------------------------------------------===//
+
+#include "src/regex/regfree.h"
+
+#include "src/__support/CPP/new.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(void, regfree, (regex_t * preg)) {
+  if (preg->__internal) {
+    char *ptr = static_cast<char *>(preg->__internal);
+    delete[] ptr;
+    preg->__internal = nullptr;
+  }
+  preg->re_nsub = 0;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/regex/regfree.h b/libc/src/regex/regfree.h
new file mode 100644
index 0000000000000..36599387f494d
--- /dev/null
+++ b/libc/src/regex/regfree.h
@@ -0,0 +1,26 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Implementation header for regfree.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_REGEX_REGFREE_H
+#define LLVM_LIBC_SRC_REGEX_REGFREE_H
+
+#include "hdr/types/regex_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+void regfree(regex_t *preg);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_REGEX_REGFREE_H
diff --git a/libc/test/src/CMakeLists.txt b/libc/test/src/CMakeLists.txt
index b877c7455fc34..6b11d958ded00 100644
--- a/libc/test/src/CMakeLists.txt
+++ b/libc/test/src/CMakeLists.txt
@@ -104,6 +104,7 @@ add_subdirectory(dirent)
 add_subdirectory(locale)
 add_subdirectory(nl_types)
 add_subdirectory(signal)
+add_subdirectory(regex)
 add_subdirectory(spawn)
 
 if(${LIBC_TARGET_OS} STREQUAL "linux")
diff --git a/libc/test/src/regex/CMakeLists.txt b/libc/test/src/regex/CMakeLists.txt
new file mode 100644
index 0000000000000..4807fcc85257e
--- /dev/null
+++ b/libc/test/src/regex/CMakeLists.txt
@@ -0,0 +1,25 @@
+if(LLVM_LIBC_FULL_BUILD)
+  add_custom_target(libc_regex_unittests)
+
+  add_libc_unittest(
+    regerror_test
+    SUITE
+      libc_regex_unittests
+    SRCS
+      regerror_test.cpp
+    DEPENDS
+      libc.src.regex.regerror
+  )
+
+  add_libc_unittest(
+    regex_basic_test
+    SUITE
+      libc_regex_unittests
+    SRCS
+      regex_basic_test.cpp
+    DEPENDS
+      libc.src.regex.regcomp
+      libc.src.regex.regexec
+      libc.src.regex.regfree
+  )
+endif()
diff --git a/libc/test/src/regex/regerror_test.cpp b/libc/test/src/regex/regerror_test.cpp
new file mode 100644
index 0000000000000..28afc87b843ae
--- /dev/null
+++ b/libc/test/src/regex/regerror_test.cpp
@@ -0,0 +1,48 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Unit tests for regerror.
+///
+//===----------------------------------------------------------------------===//
+
+#include "src/regex/regerror.h"
+#include "test/UnitTest/Test.h"
+
+#include "include/llvm-libc-macros/regex-macros.h"
+
+TEST(LlvmLibcRegexTest, RegerrorAllCodes) {
+  char buf[128];
+
+  ASSERT_GT(LIBC_NAMESPACE::regerror(REG_NOMATCH, nullptr, buf, sizeof(buf)),
+            size_t(0));
+  ASSERT_STREQ("No match", buf);
+
+  ASSERT_GT(LIBC_NAMESPACE::regerror(REG_ESPACE, nullptr, buf, sizeof(buf)),
+            size_t(0));
+  ASSERT_STREQ("Out of memory", buf);
+}
+
+TEST(LlvmLibcRegexTest, RegerrorTruncation) {
+  char buf[5];
+  size_t needed =
+      LIBC_NAMESPACE::regerror(REG_NOMATCH, nullptr, buf, sizeof(buf));
+  ASSERT_GT(needed, size_t(5)); // "No match" + NUL = 9 bytes
+  ASSERT_EQ(buf[4], '\0');      // properly NUL-terminated
+}
+
+TEST(LlvmLibcRegexTest, RegerrorZeroBuffer) {
+  size_t needed = LIBC_NAMESPACE::regerror(REG_NOMATCH, nullptr, nullptr, 0);
+  ASSERT_GT(needed, size_t(0));
+}
+
+TEST(LlvmLibcRegexTest, RegerrorSuccess) {
+  char buf[128];
+  LIBC_NAMESPACE::regerror(0, nullptr, buf, sizeof(buf));
+  ASSERT_STREQ("Success", buf);
+}
diff --git a/libc/test/src/regex/regex_basic_test.cpp b/libc/test/src/regex/regex_basic_test.cpp
new file mode 100644
index 0000000000000..287cc7f66686f
--- /dev/null
+++ b/libc/test/src/regex/regex_basic_test.cpp
@@ -0,0 +1,68 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Basic round-trip tests for POSIX regex functions.
+///
+//===----------------------------------------------------------------------===//
+
+#include "src/regex/regcomp.h"
+#include "src/regex/regexec.h"
+#include "src/regex/regfree.h"
+#include "test/UnitTest/Test.h"
+
+#include "include/llvm-libc-macros/regex-macros.h"
+#include "include/llvm-libc-types/regex_t.h"
+
+TEST(LlvmLibcRegexTest, BasicLiteralRoundTrip) {
+  regex_t preg;
+  ASSERT_EQ(0,
+            LIBC_NAMESPACE::regcomp(&preg, "hello", REG_EXTENDED | REG_NOSUB));
+  ASSERT_EQ(0,
+            LIBC_NAMESPACE::regexec(&preg, "say hello world", 0, nullptr, 0));
+  ASSERT_EQ(REG_NOMATCH,
+            LIBC_NAMESPACE::regexec(&preg, "goodbye", 0, nullptr, 0));
+  LIBC_NAMESPACE::regfree(&preg);
+}
+
+TEST(LlvmLibcRegexTest, MismatchCases) {
+  regex_t preg;
+  // Partial match
+  ASSERT_EQ(0,
+            LIBC_NAMESPACE::regcomp(&preg, "hello", REG_EXTENDED | REG_NOSUB));
+  ASSERT_EQ(REG_NOMATCH, LIBC_NAMESPACE::regexec(&preg, "hell", 0, nullptr, 0));
+  LIBC_NAMESPACE::regfree(&preg);
+
+  // Case sensitivity
+  ASSERT_EQ(0,
+            LIBC_NAMESPACE::regcomp(&preg, "Hello", REG_EXTENDED | REG_NOSUB));
+  ASSERT_EQ(REG_NOMATCH,
+            LIBC_NAMESPACE::regexec(&preg, "hello", 0, nullptr, 0));
+  LIBC_NAMESPACE::regfree(&preg);
+
+  // Empty string vs non-empty pattern
+  ASSERT_EQ(0, LIBC_NAMESPACE::regcomp(&preg, "a", REG_EXTENDED | REG_NOSUB));
+  ASSERT_EQ(REG_NOMATCH, LIBC_NAMESPACE::regexec(&preg, "", 0, nullptr, 0));
+  LIBC_NAMESPACE::regfree(&preg);
+}
+
+TEST(LlvmLibcRegexTest, EmptyString) {
+  regex_t preg;
+  ASSERT_EQ(0, LIBC_NAMESPACE::regcomp(&preg, "", REG_EXTENDED | REG_NOSUB));
+  ASSERT_EQ(0, LIBC_NAMESPACE::regexec(&preg, "anything", 0, nullptr, 0));
+  ASSERT_EQ(0, LIBC_NAMESPACE::regexec(&preg, "", 0, nullptr, 0));
+  LIBC_NAMESPACE::regfree(&preg);
+}
+
+TEST(LlvmLibcRegexTest, ExactMatch) {
+  regex_t preg;
+  ASSERT_EQ(0,
+            LIBC_NAMESPACE::regcomp(&preg, "test", REG_EXTENDED | REG_NOSUB));
+  ASSERT_EQ(0, LIBC_NAMESPACE::regexec(&preg, "test", 0, nullptr, 0));
+  LIBC_NAMESPACE::regfree(&preg);
+}

>From ed80ffb11de6905f1f028111b359dea53f747196 Mon Sep 17 00:00:00 2001
From: Jeff Bailey <jbailey at raspberryginger.com>
Date: Thu, 14 May 2026 17:17:12 +0100
Subject: [PATCH 2/4] [libc] Address reviewer feedback for regex implementation
 (#196995)

Gated regex entrypoints and headers behind LLVM_LIBC_ENABLE_EXPERIMENTAL_ENTRYPOINTS
for Linux and Baremetal on all supported architectures.

Updated regoff_t to use __PTRDIFF_TYPE__ for POSIX compliance and modernised
regcomp and regerror to use cpp::string_view.

Additional changes:

* Fixed circular dependency between regex_macros and regex.h.
* Hardened regerror_test.cpp with proxy headers and precise truncation assertions.
* Updated CMake dependencies and added missing alloc_checker header library to __support.

Assisted-by: Automated tooling, human reviewed.
---
 libc/config/baremetal/aarch64/entrypoints.txt | 10 ++++++++++
 libc/config/baremetal/aarch64/headers.txt     |  4 ++++
 libc/config/baremetal/arm/entrypoints.txt     | 10 ++++++++++
 libc/config/baremetal/arm/headers.txt         |  4 ++++
 libc/config/baremetal/riscv/entrypoints.txt   | 10 ++++++++++
 libc/config/baremetal/riscv/headers.txt       |  4 ++++
 libc/config/linux/aarch64/entrypoints.txt     | 14 +++++++++-----
 libc/config/linux/aarch64/headers.txt         |  2 +-
 libc/config/linux/arm/entrypoints.txt         | 17 ++++++++++++-----
 libc/config/linux/arm/headers.txt             |  2 +-
 libc/config/linux/riscv/entrypoints.txt       | 14 +++++++++-----
 libc/config/linux/riscv/headers.txt           |  2 +-
 libc/config/linux/x86_64/entrypoints.txt      | 14 +++++++++-----
 libc/config/linux/x86_64/headers.txt          |  2 +-
 libc/hdr/CMakeLists.txt                       |  1 -
 libc/include/llvm-libc-types/regoff_t.h       |  7 ++++++-
 libc/src/__support/CMakeLists.txt             |  8 ++++++++
 libc/src/regex/CMakeLists.txt                 |  5 +++--
 libc/src/regex/regcomp.cpp                    |  6 +++---
 libc/src/regex/regerror.cpp                   |  4 ++--
 libc/test/src/regex/CMakeLists.txt            |  5 ++++-
 libc/test/src/regex/regerror_test.cpp         |  7 +++----
 libc/test/src/regex/regex_basic_test.cpp      |  4 ++--
 23 files changed, 116 insertions(+), 40 deletions(-)

diff --git a/libc/config/baremetal/aarch64/entrypoints.txt b/libc/config/baremetal/aarch64/entrypoints.txt
index 452abd985b3a5..dcb50135232e2 100644
--- a/libc/config/baremetal/aarch64/entrypoints.txt
+++ b/libc/config/baremetal/aarch64/entrypoints.txt
@@ -947,6 +947,16 @@ if(LIBC_COMPILER_HAS_FIXED_POINT)
   )
 endif()
 
+if(LLVM_LIBC_ENABLE_EXPERIMENTAL_ENTRYPOINTS)
+  list(APPEND TARGET_LIBC_ENTRYPOINTS
+    # regex.h entrypoints
+    libc.src.regex.regcomp
+    libc.src.regex.regexec
+    libc.src.regex.regerror
+    libc.src.regex.regfree
+  )
+endif()
+
 set(TARGET_LLVMLIBC_ENTRYPOINTS
   ${TARGET_LIBC_ENTRYPOINTS}
   ${TARGET_LIBM_ENTRYPOINTS}
diff --git a/libc/config/baremetal/aarch64/headers.txt b/libc/config/baremetal/aarch64/headers.txt
index 31cc04d849109..42dcd87b680d6 100644
--- a/libc/config/baremetal/aarch64/headers.txt
+++ b/libc/config/baremetal/aarch64/headers.txt
@@ -25,3 +25,7 @@ set(TARGET_PUBLIC_HEADERS
     libc.include.uchar
     libc.include.wchar
 )
+
+if(LLVM_LIBC_ENABLE_EXPERIMENTAL_ENTRYPOINTS)
+  list(APPEND TARGET_PUBLIC_HEADERS libc.include.regex)
+endif()
diff --git a/libc/config/baremetal/arm/entrypoints.txt b/libc/config/baremetal/arm/entrypoints.txt
index 41c80efc64227..fac62bac939cc 100644
--- a/libc/config/baremetal/arm/entrypoints.txt
+++ b/libc/config/baremetal/arm/entrypoints.txt
@@ -966,6 +966,16 @@ if(LIBC_COMPILER_HAS_FIXED_POINT)
   )
 endif()
 
+if(LLVM_LIBC_ENABLE_EXPERIMENTAL_ENTRYPOINTS)
+  list(APPEND TARGET_LIBC_ENTRYPOINTS
+    # regex.h entrypoints
+    libc.src.regex.regcomp
+    libc.src.regex.regexec
+    libc.src.regex.regerror
+    libc.src.regex.regfree
+  )
+endif()
+
 set(TARGET_LLVMLIBC_ENTRYPOINTS
   ${TARGET_LIBC_ENTRYPOINTS}
   ${TARGET_LIBM_ENTRYPOINTS}
diff --git a/libc/config/baremetal/arm/headers.txt b/libc/config/baremetal/arm/headers.txt
index a259c3a4d834b..a69660a97fdae 100644
--- a/libc/config/baremetal/arm/headers.txt
+++ b/libc/config/baremetal/arm/headers.txt
@@ -26,3 +26,7 @@ set(TARGET_PUBLIC_HEADERS
     libc.include.wchar
     libc.include.wctype
 )
+
+if(LLVM_LIBC_ENABLE_EXPERIMENTAL_ENTRYPOINTS)
+  list(APPEND TARGET_PUBLIC_HEADERS libc.include.regex)
+endif()
diff --git a/libc/config/baremetal/riscv/entrypoints.txt b/libc/config/baremetal/riscv/entrypoints.txt
index 88eacfae12969..a3b96225ff09d 100644
--- a/libc/config/baremetal/riscv/entrypoints.txt
+++ b/libc/config/baremetal/riscv/entrypoints.txt
@@ -963,6 +963,16 @@ if(LIBC_COMPILER_HAS_FIXED_POINT)
   )
 endif()
 
+if(LLVM_LIBC_ENABLE_EXPERIMENTAL_ENTRYPOINTS)
+  list(APPEND TARGET_LIBC_ENTRYPOINTS
+    # regex.h entrypoints
+    libc.src.regex.regcomp
+    libc.src.regex.regexec
+    libc.src.regex.regerror
+    libc.src.regex.regfree
+  )
+endif()
+
 set(TARGET_LLVMLIBC_ENTRYPOINTS
   ${TARGET_LIBC_ENTRYPOINTS}
   ${TARGET_LIBM_ENTRYPOINTS}
diff --git a/libc/config/baremetal/riscv/headers.txt b/libc/config/baremetal/riscv/headers.txt
index a259c3a4d834b..a69660a97fdae 100644
--- a/libc/config/baremetal/riscv/headers.txt
+++ b/libc/config/baremetal/riscv/headers.txt
@@ -26,3 +26,7 @@ set(TARGET_PUBLIC_HEADERS
     libc.include.wchar
     libc.include.wctype
 )
+
+if(LLVM_LIBC_ENABLE_EXPERIMENTAL_ENTRYPOINTS)
+  list(APPEND TARGET_PUBLIC_HEADERS libc.include.regex)
+endif()
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 20f029be07a40..6cb251ebae047 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -1193,11 +1193,6 @@ if(LLVM_LIBC_FULL_BUILD)
     libc.src.search.twalk
     libc.src.search.twalk_r
 
-    # regex.h entrypoints
-    libc.src.regex.regcomp
-    libc.src.regex.regexec
-    libc.src.regex.regerror
-    libc.src.regex.regfree
 
     # threads.h entrypoints
     libc.src.threads.call_once
@@ -1277,6 +1272,15 @@ if(LLVM_LIBC_ENABLE_EXPERIMENTAL_ENTRYPOINTS)
   list(APPEND TARGET_LIBC_ENTRYPOINTS
     libc.src.unistd.sysconf
   )
+  if(LLVM_LIBC_FULL_BUILD)
+    list(APPEND TARGET_LIBC_ENTRYPOINTS
+      # regex.h entrypoints
+      libc.src.regex.regcomp
+      libc.src.regex.regexec
+      libc.src.regex.regerror
+      libc.src.regex.regfree
+    )
+  endif()
 endif()
 
 set(TARGET_LIBMVEC_ENTRYPOINTS)
diff --git a/libc/config/linux/aarch64/headers.txt b/libc/config/linux/aarch64/headers.txt
index 941a634c48c34..e321c5425f662 100644
--- a/libc/config/linux/aarch64/headers.txt
+++ b/libc/config/linux/aarch64/headers.txt
@@ -65,6 +65,6 @@ set(TARGET_PUBLIC_HEADERS
     libc.include.wctype
 )
 
-if(LLVM_LIBC_FULL_BUILD)
+if(LLVM_LIBC_FULL_BUILD AND LLVM_LIBC_ENABLE_EXPERIMENTAL_ENTRYPOINTS)
   list(APPEND TARGET_PUBLIC_HEADERS libc.include.regex)
 endif()
diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt
index 8a4730ed04138..d34609e91bc72 100644
--- a/libc/config/linux/arm/entrypoints.txt
+++ b/libc/config/linux/arm/entrypoints.txt
@@ -219,11 +219,6 @@ if(LLVM_LIBC_FULL_BUILD)
     # search.h entrypoints
     libc.src.search.lfind
 
-    # regex.h entrypoints
-    libc.src.regex.regcomp
-    libc.src.regex.regexec
-    libc.src.regex.regerror
-    libc.src.regex.regfree
 
     # setjmp.h entrypoints
     libc.src.setjmp.longjmp
@@ -563,6 +558,18 @@ list(APPEND TARGET_LIBM_ENTRYPOINTS
   libc.src.math.ufromfpxbf16
 )
 
+if(LLVM_LIBC_ENABLE_EXPERIMENTAL_ENTRYPOINTS)
+  if(LLVM_LIBC_FULL_BUILD)
+    list(APPEND TARGET_LIBC_ENTRYPOINTS
+      # regex.h entrypoints
+      libc.src.regex.regcomp
+      libc.src.regex.regexec
+      libc.src.regex.regerror
+      libc.src.regex.regfree
+    )
+  endif()
+endif()
+
 set(TARGET_LLVMLIBC_ENTRYPOINTS
   ${TARGET_LIBC_ENTRYPOINTS}
   ${TARGET_LIBM_ENTRYPOINTS}
diff --git a/libc/config/linux/arm/headers.txt b/libc/config/linux/arm/headers.txt
index 4b7bc079dcc0b..6a0b285944698 100644
--- a/libc/config/linux/arm/headers.txt
+++ b/libc/config/linux/arm/headers.txt
@@ -26,6 +26,6 @@ set(TARGET_PUBLIC_HEADERS
     # libc.include.sys_epoll
 )
 
-if(LLVM_LIBC_FULL_BUILD)
+if(LLVM_LIBC_FULL_BUILD AND LLVM_LIBC_ENABLE_EXPERIMENTAL_ENTRYPOINTS)
   list(APPEND TARGET_PUBLIC_HEADERS libc.include.regex)
 endif()
diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
index bf410cccac770..fe9d94372da4f 100644
--- a/libc/config/linux/riscv/entrypoints.txt
+++ b/libc/config/linux/riscv/entrypoints.txt
@@ -1316,11 +1316,6 @@ if(LLVM_LIBC_FULL_BUILD)
     libc.src.search.lsearch
     libc.src.search.remque
 
-    # regex.h entrypoints
-    libc.src.regex.regcomp
-    libc.src.regex.regexec
-    libc.src.regex.regerror
-    libc.src.regex.regfree
 
     # threads.h entrypoints
     libc.src.threads.call_once
@@ -1411,6 +1406,15 @@ if(LLVM_LIBC_ENABLE_EXPERIMENTAL_ENTRYPOINTS)
   list(APPEND TARGET_LIBC_ENTRYPOINTS
     libc.src.unistd.sysconf
   )
+  if(LLVM_LIBC_FULL_BUILD)
+    list(APPEND TARGET_LIBC_ENTRYPOINTS
+      # regex.h entrypoints
+      libc.src.regex.regcomp
+      libc.src.regex.regexec
+      libc.src.regex.regerror
+      libc.src.regex.regfree
+    )
+  endif()
 endif()
 
 set(TARGET_LLVMLIBC_ENTRYPOINTS
diff --git a/libc/config/linux/riscv/headers.txt b/libc/config/linux/riscv/headers.txt
index 1c526807b17cd..d7ae420240280 100644
--- a/libc/config/linux/riscv/headers.txt
+++ b/libc/config/linux/riscv/headers.txt
@@ -64,6 +64,6 @@ set(TARGET_PUBLIC_HEADERS
     libc.include.wctype
 )
 
-if(LLVM_LIBC_FULL_BUILD)
+if(LLVM_LIBC_FULL_BUILD AND LLVM_LIBC_ENABLE_EXPERIMENTAL_ENTRYPOINTS)
   list(APPEND TARGET_PUBLIC_HEADERS libc.include.regex)
 endif()
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 9d35e52bc9dc3..a7d37882ed4db 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -1390,11 +1390,6 @@ if(LLVM_LIBC_FULL_BUILD)
     libc.src.search.twalk
     libc.src.search.twalk_r
 
-    # regex.h entrypoints
-    libc.src.regex.regcomp
-    libc.src.regex.regexec
-    libc.src.regex.regerror
-    libc.src.regex.regfree
 
     # threads.h entrypoints
     libc.src.threads.call_once
@@ -1501,6 +1496,15 @@ if(LLVM_LIBC_ENABLE_EXPERIMENTAL_ENTRYPOINTS)
   list(APPEND TARGET_LIBC_ENTRYPOINTS
     libc.src.unistd.sysconf
   )
+  if(LLVM_LIBC_FULL_BUILD)
+    list(APPEND TARGET_LIBC_ENTRYPOINTS
+      # regex.h entrypoints
+      libc.src.regex.regcomp
+      libc.src.regex.regexec
+      libc.src.regex.regerror
+      libc.src.regex.regfree
+    )
+  endif()
 endif()
 
 set(TARGET_LIBMVEC_ENTRYPOINTS)
diff --git a/libc/config/linux/x86_64/headers.txt b/libc/config/linux/x86_64/headers.txt
index 7ccdc1c3c4668..1aa63784e8aab 100644
--- a/libc/config/linux/x86_64/headers.txt
+++ b/libc/config/linux/x86_64/headers.txt
@@ -70,6 +70,6 @@ set(TARGET_PUBLIC_HEADERS
     libc.include.wctype
 )
 
-if(LLVM_LIBC_FULL_BUILD)
+if(LLVM_LIBC_FULL_BUILD AND LLVM_LIBC_ENABLE_EXPERIMENTAL_ENTRYPOINTS)
   list(APPEND TARGET_PUBLIC_HEADERS libc.include.regex)
 endif()
diff --git a/libc/hdr/CMakeLists.txt b/libc/hdr/CMakeLists.txt
index ca58987529699..f0a79285dfa5c 100644
--- a/libc/hdr/CMakeLists.txt
+++ b/libc/hdr/CMakeLists.txt
@@ -373,7 +373,6 @@ add_proxy_header_library(
     regex_macros.h
   FULL_BUILD_DEPENDS
     libc.include.llvm-libc-macros.regex_macros
-    libc.include.regex
 )
 
 add_subdirectory(types)
diff --git a/libc/include/llvm-libc-types/regoff_t.h b/libc/include/llvm-libc-types/regoff_t.h
index 3caadf7a0bdd1..d16559a56030d 100644
--- a/libc/include/llvm-libc-types/regoff_t.h
+++ b/libc/include/llvm-libc-types/regoff_t.h
@@ -5,10 +5,15 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
+///
+/// \file
+/// Definition of the regoff_t type.
+///
+//===----------------------------------------------------------------------===//
 
 #ifndef LLVM_LIBC_TYPES_REGOFF_T_H
 #define LLVM_LIBC_TYPES_REGOFF_T_H
 
-typedef int regoff_t;
+typedef __PTRDIFF_TYPE__ regoff_t;
 
 #endif // LLVM_LIBC_TYPES_REGOFF_T_H
diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt
index 098fb6ef86936..ada489046ef9e 100644
--- a/libc/src/__support/CMakeLists.txt
+++ b/libc/src/__support/CMakeLists.txt
@@ -144,6 +144,14 @@ add_header_library(
     libc.src.__support.CPP.string_view
 )
 
+add_header_library(
+  alloc_checker
+  HDRS
+    alloc-checker.h
+  DEPENDS
+    libc.src.__support.macros.config
+)
+
 add_header_library(
   ctype_utils
   HDRS
diff --git a/libc/src/regex/CMakeLists.txt b/libc/src/regex/CMakeLists.txt
index 4b3f8804c2a72..3db701645b26a 100644
--- a/libc/src/regex/CMakeLists.txt
+++ b/libc/src/regex/CMakeLists.txt
@@ -9,8 +9,9 @@ add_entrypoint_object(
     libc.hdr.regex_macros
     libc.src.__support.common
     libc.src.__support.CPP.new
+    libc.src.__support.CPP.string_view
+    libc.src.__support.alloc_checker
     libc.src.string.memory_utils.inline_memcpy
-    libc.src.string.string_utils
 )
 
 add_entrypoint_object(
@@ -39,8 +40,8 @@ add_entrypoint_object(
     libc.hdr.types.size_t
     libc.hdr.regex_macros
     libc.src.__support.common
+    libc.src.__support.CPP.string_view
     libc.src.string.memory_utils.inline_memcpy
-    libc.src.string.string_utils
 )
 
 add_entrypoint_object(
diff --git a/libc/src/regex/regcomp.cpp b/libc/src/regex/regcomp.cpp
index 49413d13e16b6..f4c339f70f9d4 100644
--- a/libc/src/regex/regcomp.cpp
+++ b/libc/src/regex/regcomp.cpp
@@ -15,11 +15,10 @@
 
 #include "hdr/regex_macros.h"
 #include "src/__support/CPP/new.h"
+#include "src/__support/CPP/string_view.h"
 #include "src/__support/alloc-checker.h"
-#include "src/__support/common.h"
 #include "src/__support/macros/config.h"
 #include "src/string/memory_utils/inline_memcpy.h"
-#include "src/string/string_utils.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
@@ -33,7 +32,8 @@ LLVM_LIBC_FUNCTION(int, regcomp,
   // object.  We therefore do not attempt to free any previous __internal here
   // — preg is uninitialized on first use and the pointer would be garbage.
 
-  size_t len = internal::string_length(pattern);
+  cpp::string_view pattern_view(pattern);
+  size_t len = pattern_view.size();
   AllocChecker ac;
   char *copy = new (ac) char[len + 1];
   if (!ac)
diff --git a/libc/src/regex/regerror.cpp b/libc/src/regex/regerror.cpp
index 8efd72376f494..87cd5211f9b6b 100644
--- a/libc/src/regex/regerror.cpp
+++ b/libc/src/regex/regerror.cpp
@@ -14,10 +14,10 @@
 #include "src/regex/regerror.h"
 
 #include "hdr/regex_macros.h"
+#include "src/__support/CPP/string_view.h"
 #include "src/__support/common.h"
 #include "src/__support/macros/config.h"
 #include "src/string/memory_utils/inline_memcpy.h"
-#include "src/string/string_utils.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
@@ -75,7 +75,7 @@ LLVM_LIBC_FUNCTION(size_t, regerror,
     break;
   }
 
-  size_t msg_len = internal::string_length(msg) + 1; // include NUL
+  size_t msg_len = cpp::string_view(msg).size() + 1; // include NUL
 
   if (errbuf_size > 0 && errbuf) {
     size_t copy_len = msg_len < errbuf_size ? msg_len : errbuf_size;
diff --git a/libc/test/src/regex/CMakeLists.txt b/libc/test/src/regex/CMakeLists.txt
index 4807fcc85257e..915b8a5d42638 100644
--- a/libc/test/src/regex/CMakeLists.txt
+++ b/libc/test/src/regex/CMakeLists.txt
@@ -1,4 +1,4 @@
-if(LLVM_LIBC_FULL_BUILD)
+if(LLVM_LIBC_FULL_BUILD AND LLVM_LIBC_ENABLE_EXPERIMENTAL_ENTRYPOINTS)
   add_custom_target(libc_regex_unittests)
 
   add_libc_unittest(
@@ -8,6 +8,7 @@ if(LLVM_LIBC_FULL_BUILD)
     SRCS
       regerror_test.cpp
     DEPENDS
+      libc.hdr.regex_macros
       libc.src.regex.regerror
   )
 
@@ -18,6 +19,8 @@ if(LLVM_LIBC_FULL_BUILD)
     SRCS
       regex_basic_test.cpp
     DEPENDS
+      libc.hdr.regex_macros
+      libc.hdr.types.regex_t
       libc.src.regex.regcomp
       libc.src.regex.regexec
       libc.src.regex.regfree
diff --git a/libc/test/src/regex/regerror_test.cpp b/libc/test/src/regex/regerror_test.cpp
index 28afc87b843ae..99bd2f71f1301 100644
--- a/libc/test/src/regex/regerror_test.cpp
+++ b/libc/test/src/regex/regerror_test.cpp
@@ -14,9 +14,9 @@
 #include "src/regex/regerror.h"
 #include "test/UnitTest/Test.h"
 
-#include "include/llvm-libc-macros/regex-macros.h"
+#include "hdr/regex_macros.h"
 
-TEST(LlvmLibcRegexTest, RegerrorAllCodes) {
+TEST(LlvmLibcRegexTest, RegerrorBasicCodes) {
   char buf[128];
 
   ASSERT_GT(LIBC_NAMESPACE::regerror(REG_NOMATCH, nullptr, buf, sizeof(buf)),
@@ -32,8 +32,7 @@ TEST(LlvmLibcRegexTest, RegerrorTruncation) {
   char buf[5];
   size_t needed =
       LIBC_NAMESPACE::regerror(REG_NOMATCH, nullptr, buf, sizeof(buf));
-  ASSERT_GT(needed, size_t(5)); // "No match" + NUL = 9 bytes
-  ASSERT_EQ(buf[4], '\0');      // properly NUL-terminated
+  ASSERT_STREQ("No m", buf); // "No match" truncated to 5 bytes (including NUL)
 }
 
 TEST(LlvmLibcRegexTest, RegerrorZeroBuffer) {
diff --git a/libc/test/src/regex/regex_basic_test.cpp b/libc/test/src/regex/regex_basic_test.cpp
index 287cc7f66686f..ae5ad42a138e5 100644
--- a/libc/test/src/regex/regex_basic_test.cpp
+++ b/libc/test/src/regex/regex_basic_test.cpp
@@ -16,8 +16,8 @@
 #include "src/regex/regfree.h"
 #include "test/UnitTest/Test.h"
 
-#include "include/llvm-libc-macros/regex-macros.h"
-#include "include/llvm-libc-types/regex_t.h"
+#include "hdr/regex_macros.h"
+#include "hdr/types/regex_t.h"
 
 TEST(LlvmLibcRegexTest, BasicLiteralRoundTrip) {
   regex_t preg;

>From 644b5b6796e0affbb4758c28da37a852c0e471b5 Mon Sep 17 00:00:00 2001
From: Jeff Bailey <jbailey at raspberryginger.com>
Date: Thu, 14 May 2026 17:29:24 +0100
Subject: [PATCH 3/4] [libc][NFC] Remove extra blank lines in entrypoint files
 (#196995)

Cleanup extra lines in arm, riscv, and x86_64 entrypoints.txt.

Assisted-by: Automated tooling, human reviewed.
---
 libc/config/linux/arm/entrypoints.txt    | 1 -
 libc/config/linux/riscv/entrypoints.txt  | 1 -
 libc/config/linux/x86_64/entrypoints.txt | 1 -
 3 files changed, 3 deletions(-)

diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt
index d34609e91bc72..49b30ef1830f3 100644
--- a/libc/config/linux/arm/entrypoints.txt
+++ b/libc/config/linux/arm/entrypoints.txt
@@ -219,7 +219,6 @@ if(LLVM_LIBC_FULL_BUILD)
     # search.h entrypoints
     libc.src.search.lfind
 
-
     # setjmp.h entrypoints
     libc.src.setjmp.longjmp
     libc.src.setjmp.setjmp
diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
index fe9d94372da4f..9a4c6de8e092e 100644
--- a/libc/config/linux/riscv/entrypoints.txt
+++ b/libc/config/linux/riscv/entrypoints.txt
@@ -1316,7 +1316,6 @@ if(LLVM_LIBC_FULL_BUILD)
     libc.src.search.lsearch
     libc.src.search.remque
 
-
     # threads.h entrypoints
     libc.src.threads.call_once
     libc.src.threads.cnd_broadcast
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index a7d37882ed4db..d1bb130a84fe1 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -1390,7 +1390,6 @@ if(LLVM_LIBC_FULL_BUILD)
     libc.src.search.twalk
     libc.src.search.twalk_r
 
-
     # threads.h entrypoints
     libc.src.threads.call_once
     libc.src.threads.cnd_broadcast

>From 335a37d75f72dcda16afba5d16e261664a8f2904 Mon Sep 17 00:00:00 2001
From: Jeff Bailey <jbailey at raspberryginger.com>
Date: Mon, 18 May 2026 14:55:29 +0100
Subject: [PATCH 4/4] [libc] Address reviewer feedback on regex PR #196995

Addressed feedback from vonosmas and michaelrj-google:

* regerror.cpp: replaced raw const char* + break-heavy switch with
  an immediately-invoked lambda captured into cpp::string_view.
  Uses msg.size() and msg.data() throughout.
* regex.yaml: removed size_t from the types list; hdrgen infers it
  from function signatures automatically.
* hdr/types/regex_t.h: added #else #error branch to match the
  locale_t pattern, since regex_t contains internal state that is
  incompatible with any system header. Fixed #endif comment to use
  the preprocessor macro name LIBC_FULL_BUILD, not the CMake
  variable LLVM_LIBC_FULL_BUILD.
* hdr/types/regoff_t.h, hdr/types/regmatch_t.h,
  hdr/types/regex_t.h, hdr/regex_macros.h,
  include/llvm-libc-types/regex_t.h,
  include/llvm-libc-types/regmatch_t.h: added missing \file
  Doxygen blocks.
* config/linux/aarch64/entrypoints.txt: removed extra blank line
  between search.h and threads.h sections.
* test/src/regex/regerror_test.cpp: asserted that needed equals 9
  (strlen("No match") + 1) in the truncation test.
* test/src/regex/regex_basic_test.cpp: added NullByteStopsParsing
  test verifying regexec stops at an embedded NUL in the subject.

Assisted-by: Automated tooling, human reviewed.
---
 libc/config/linux/aarch64/entrypoints.txt |  1 -
 libc/hdr/regex_macros.h                   |  5 ++
 libc/hdr/types/regex_t.h                  | 11 +++
 libc/hdr/types/regmatch_t.h               |  5 ++
 libc/hdr/types/regoff_t.h                 |  5 ++
 libc/include/llvm-libc-types/regex_t.h    |  5 ++
 libc/include/llvm-libc-types/regmatch_t.h |  5 ++
 libc/include/regex.yaml                   |  1 -
 libc/src/regex/regerror.cpp               | 86 ++++++++++-------------
 libc/test/src/regex/regerror_test.cpp     |  1 +
 libc/test/src/regex/regex_basic_test.cpp  |  9 +++
 11 files changed, 82 insertions(+), 52 deletions(-)

diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 6cb251ebae047..e1fd41779c898 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -1193,7 +1193,6 @@ if(LLVM_LIBC_FULL_BUILD)
     libc.src.search.twalk
     libc.src.search.twalk_r
 
-
     # threads.h entrypoints
     libc.src.threads.call_once
     libc.src.threads.cnd_broadcast
diff --git a/libc/hdr/regex_macros.h b/libc/hdr/regex_macros.h
index 968cb68394e94..74b5d4be20ff5 100644
--- a/libc/hdr/regex_macros.h
+++ b/libc/hdr/regex_macros.h
@@ -5,6 +5,11 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
+///
+/// \file
+/// Proxy header for regex macros.
+///
+//===----------------------------------------------------------------------===//
 
 #ifndef LLVM_LIBC_HDR_REGEX_MACROS_H
 #define LLVM_LIBC_HDR_REGEX_MACROS_H
diff --git a/libc/hdr/types/regex_t.h b/libc/hdr/types/regex_t.h
index f12e440b04668..fccdce8073677 100644
--- a/libc/hdr/types/regex_t.h
+++ b/libc/hdr/types/regex_t.h
@@ -5,12 +5,23 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
+///
+/// \file
+/// Proxy header for regex_t.
+///
+//===----------------------------------------------------------------------===//
 
 #ifndef LLVM_LIBC_HDR_TYPES_REGEX_T_H
 #define LLVM_LIBC_HDR_TYPES_REGEX_T_H
 
 #ifdef LIBC_FULL_BUILD
+
 #include "include/llvm-libc-types/regex_t.h"
+
+#else // overlay mode
+
+#error "type not available in overlay mode"
+
 #endif // LIBC_FULL_BUILD
 
 #endif // LLVM_LIBC_HDR_TYPES_REGEX_T_H
diff --git a/libc/hdr/types/regmatch_t.h b/libc/hdr/types/regmatch_t.h
index 7a271b067878f..b26129ac7d562 100644
--- a/libc/hdr/types/regmatch_t.h
+++ b/libc/hdr/types/regmatch_t.h
@@ -5,6 +5,11 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
+///
+/// \file
+/// Proxy header for regmatch_t.
+///
+//===----------------------------------------------------------------------===//
 
 #ifndef LLVM_LIBC_HDR_TYPES_REGMATCH_T_H
 #define LLVM_LIBC_HDR_TYPES_REGMATCH_T_H
diff --git a/libc/hdr/types/regoff_t.h b/libc/hdr/types/regoff_t.h
index 77a7e6ef4ca09..dce512f86778f 100644
--- a/libc/hdr/types/regoff_t.h
+++ b/libc/hdr/types/regoff_t.h
@@ -5,6 +5,11 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
+///
+/// \file
+/// Proxy header for regoff_t.
+///
+//===----------------------------------------------------------------------===//
 
 #ifndef LLVM_LIBC_HDR_TYPES_REGOFF_T_H
 #define LLVM_LIBC_HDR_TYPES_REGOFF_T_H
diff --git a/libc/include/llvm-libc-types/regex_t.h b/libc/include/llvm-libc-types/regex_t.h
index 1ca0f63908306..6f47c8150056f 100644
--- a/libc/include/llvm-libc-types/regex_t.h
+++ b/libc/include/llvm-libc-types/regex_t.h
@@ -5,6 +5,11 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
+///
+/// \file
+/// Type definition for regex_t.
+///
+//===----------------------------------------------------------------------===//
 
 #ifndef LLVM_LIBC_TYPES_REGEX_T_H
 #define LLVM_LIBC_TYPES_REGEX_T_H
diff --git a/libc/include/llvm-libc-types/regmatch_t.h b/libc/include/llvm-libc-types/regmatch_t.h
index 5c178380e2569..0adaa24229b6f 100644
--- a/libc/include/llvm-libc-types/regmatch_t.h
+++ b/libc/include/llvm-libc-types/regmatch_t.h
@@ -5,6 +5,11 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
+///
+/// \file
+/// Type definition for regmatch_t.
+///
+//===----------------------------------------------------------------------===//
 
 #ifndef LLVM_LIBC_TYPES_REGMATCH_T_H
 #define LLVM_LIBC_TYPES_REGMATCH_T_H
diff --git a/libc/include/regex.yaml b/libc/include/regex.yaml
index 8fe77257f07be..baa7e39c9dd10 100644
--- a/libc/include/regex.yaml
+++ b/libc/include/regex.yaml
@@ -44,7 +44,6 @@ types:
   - type_name: regex_t
   - type_name: regmatch_t
   - type_name: regoff_t
-  - type_name: size_t
 enums: []
 objects: []
 functions:
diff --git a/libc/src/regex/regerror.cpp b/libc/src/regex/regerror.cpp
index 87cd5211f9b6b..cc42d476687f4 100644
--- a/libc/src/regex/regerror.cpp
+++ b/libc/src/regex/regerror.cpp
@@ -26,60 +26,46 @@ LLVM_LIBC_FUNCTION(size_t, regerror,
                     char *__restrict errbuf, size_t errbuf_size)) {
   (void)preg; // preg is reserved for implementation-specific messages.
 
-  const char *msg;
-  switch (errcode) {
-  case 0:
-    msg = "Success";
-    break;
-  case REG_NOMATCH:
-    msg = "No match";
-    break;
-  case REG_BADPAT:
-    msg = "Invalid regular expression";
-    break;
-  case REG_ECOLLATE:
-    msg = "Invalid collating element";
-    break;
-  case REG_ECTYPE:
-    msg = "Invalid character class";
-    break;
-  case REG_EESCAPE:
-    msg = "Trailing backslash";
-    break;
-  case REG_ESUBREG:
-    msg = "Invalid backreference";
-    break;
-  case REG_EBRACK:
-    msg = "Missing ']'";
-    break;
-  case REG_EPAREN:
-    msg = "Missing ')'";
-    break;
-  case REG_EBRACE:
-    msg = "Missing '}'";
-    break;
-  case REG_BADBR:
-    msg = "Invalid repetition count";
-    break;
-  case REG_ERANGE:
-    msg = "Invalid range end";
-    break;
-  case REG_ESPACE:
-    msg = "Out of memory";
-    break;
-  case REG_BADRPT:
-    msg = "Invalid preceding expression";
-    break;
-  default:
-    msg = "Unknown error";
-    break;
-  }
+  cpp::string_view msg = [errcode]() -> const char * {
+    switch (errcode) {
+    case 0:
+      return "Success";
+    case REG_NOMATCH:
+      return "No match";
+    case REG_BADPAT:
+      return "Invalid regular expression";
+    case REG_ECOLLATE:
+      return "Invalid collating element";
+    case REG_ECTYPE:
+      return "Invalid character class";
+    case REG_EESCAPE:
+      return "Trailing backslash";
+    case REG_ESUBREG:
+      return "Invalid backreference";
+    case REG_EBRACK:
+      return "Missing ']'";
+    case REG_EPAREN:
+      return "Missing ')'";
+    case REG_EBRACE:
+      return "Missing '}'";
+    case REG_BADBR:
+      return "Invalid repetition count";
+    case REG_ERANGE:
+      return "Invalid range end";
+    case REG_ESPACE:
+      return "Out of memory";
+    case REG_BADRPT:
+      return "Invalid preceding expression";
+    default:
+      return "Unknown error";
+    }
+  }();
 
-  size_t msg_len = cpp::string_view(msg).size() + 1; // include NUL
+  size_t msg_len = msg.size() + 1; // include NUL
 
   if (errbuf_size > 0 && errbuf) {
     size_t copy_len = msg_len < errbuf_size ? msg_len : errbuf_size;
-    inline_memcpy(errbuf, msg, copy_len - 1);
+    inline_memcpy(errbuf, msg.data(), copy_len - 1);
     errbuf[copy_len - 1] = '\0';
   }
   // POSIX requires returning the size needed to hold the full NUL-terminated
diff --git a/libc/test/src/regex/regerror_test.cpp b/libc/test/src/regex/regerror_test.cpp
index 99bd2f71f1301..d7010aa0b9fc8 100644
--- a/libc/test/src/regex/regerror_test.cpp
+++ b/libc/test/src/regex/regerror_test.cpp
@@ -32,6 +32,7 @@ TEST(LlvmLibcRegexTest, RegerrorTruncation) {
   char buf[5];
   size_t needed =
       LIBC_NAMESPACE::regerror(REG_NOMATCH, nullptr, buf, sizeof(buf));
+  ASSERT_EQ(needed, size_t(9)); // strlen("No match") + 1
   ASSERT_STREQ("No m", buf); // "No match" truncated to 5 bytes (including NUL)
 }
 
diff --git a/libc/test/src/regex/regex_basic_test.cpp b/libc/test/src/regex/regex_basic_test.cpp
index ae5ad42a138e5..fc598227665cc 100644
--- a/libc/test/src/regex/regex_basic_test.cpp
+++ b/libc/test/src/regex/regex_basic_test.cpp
@@ -66,3 +66,12 @@ TEST(LlvmLibcRegexTest, ExactMatch) {
   ASSERT_EQ(0, LIBC_NAMESPACE::regexec(&preg, "test", 0, nullptr, 0));
   LIBC_NAMESPACE::regfree(&preg);
 }
+
+TEST(LlvmLibcRegexTest, NullByteStopsParsing) {
+  regex_t preg;
+  ASSERT_EQ(0,
+            LIBC_NAMESPACE::regcomp(&preg, "match", REG_EXTENDED | REG_NOSUB));
+  ASSERT_EQ(REG_NOMATCH,
+            LIBC_NAMESPACE::regexec(&preg, "doesn't \0 match", 0, nullptr, 0));
+  LIBC_NAMESPACE::regfree(&preg);
+}



More information about the libc-commits mailing list