[libc-commits] [libc] [libc] Add POSIX regex stub implementation and build infrastructure (PR #196995)
Jeff Bailey via libc-commits
libc-commits at lists.llvm.org
Mon May 18 06:59:13 PDT 2026
https://github.com/kaladron updated https://github.com/llvm/llvm-project/pull/196995
>From 62ac91874cd26223a1cce47f90e960e320293b89 Mon Sep 17 00:00:00 2001
From: Jeff Bailey <jbailey at raspberryginger.com>
Date: Wed, 6 May 2026 16:46:13 +0100
Subject: [PATCH 1/4] [libc] Add POSIX regex stub implementation and build
infrastructure
Added the four POSIX regex entrypoints (regcomp, regexec, regerror,
regfree) and registered them for x86_64, aarch64, riscv, and arm.
regerror is fully implemented with all 13 POSIX error code strings.
The other three are stubs backed by simple string matching to validate
the build pipeline end-to-end.
This implementation is restricted to full-build mode (LLVM_LIBC_FULL_BUILD)
to avoid ABI compatibility risks with system headers and internal state
management in overlay mode.
New files:
* include/regex.yaml and regex-macros.h for header generation
* regex_t, regoff_t, regmatch_t type headers
* src/regex/ with all four entrypoints
* test/src/regex/ with regerror and basic round-trip tests
All 7 tests pass.
---
libc/config/linux/aarch64/entrypoints.txt | 6 ++
libc/config/linux/aarch64/headers.txt | 4 +
libc/config/linux/arm/entrypoints.txt | 6 ++
libc/config/linux/arm/headers.txt | 4 +
libc/config/linux/riscv/entrypoints.txt | 6 ++
libc/config/linux/riscv/headers.txt | 4 +
libc/config/linux/x86_64/entrypoints.txt | 6 ++
libc/config/linux/x86_64/headers.txt | 4 +
libc/hdr/CMakeLists.txt | 9 ++
libc/hdr/regex_macros.h | 18 ++++
libc/hdr/types/CMakeLists.txt | 24 ++++++
libc/hdr/types/regex_t.h | 16 ++++
libc/hdr/types/regmatch_t.h | 16 ++++
libc/hdr/types/regoff_t.h | 16 ++++
libc/include/CMakeLists.txt | 12 +++
libc/include/llvm-libc-macros/CMakeLists.txt | 6 ++
libc/include/llvm-libc-macros/regex-macros.h | 42 +++++++++
libc/include/llvm-libc-types/CMakeLists.txt | 3 +
libc/include/llvm-libc-types/regex_t.h | 19 +++++
libc/include/llvm-libc-types/regmatch_t.h | 19 +++++
libc/include/llvm-libc-types/regoff_t.h | 14 +++
libc/include/regex.yaml | 83 ++++++++++++++++++
libc/src/CMakeLists.txt | 1 +
libc/src/regex/CMakeLists.txt | 56 ++++++++++++
libc/src/regex/regcomp.cpp | 51 +++++++++++
libc/src/regex/regcomp.h | 27 ++++++
libc/src/regex/regerror.cpp | 90 ++++++++++++++++++++
libc/src/regex/regerror.h | 29 +++++++
libc/src/regex/regexec.cpp | 54 ++++++++++++
libc/src/regex/regexec.h | 29 +++++++
libc/src/regex/regfree.cpp | 31 +++++++
libc/src/regex/regfree.h | 26 ++++++
libc/test/src/CMakeLists.txt | 1 +
libc/test/src/regex/CMakeLists.txt | 25 ++++++
libc/test/src/regex/regerror_test.cpp | 48 +++++++++++
libc/test/src/regex/regex_basic_test.cpp | 68 +++++++++++++++
36 files changed, 873 insertions(+)
create mode 100644 libc/hdr/regex_macros.h
create mode 100644 libc/hdr/types/regex_t.h
create mode 100644 libc/hdr/types/regmatch_t.h
create mode 100644 libc/hdr/types/regoff_t.h
create mode 100644 libc/include/llvm-libc-macros/regex-macros.h
create mode 100644 libc/include/llvm-libc-types/regex_t.h
create mode 100644 libc/include/llvm-libc-types/regmatch_t.h
create mode 100644 libc/include/llvm-libc-types/regoff_t.h
create mode 100644 libc/include/regex.yaml
create mode 100644 libc/src/regex/CMakeLists.txt
create mode 100644 libc/src/regex/regcomp.cpp
create mode 100644 libc/src/regex/regcomp.h
create mode 100644 libc/src/regex/regerror.cpp
create mode 100644 libc/src/regex/regerror.h
create mode 100644 libc/src/regex/regexec.cpp
create mode 100644 libc/src/regex/regexec.h
create mode 100644 libc/src/regex/regfree.cpp
create mode 100644 libc/src/regex/regfree.h
create mode 100644 libc/test/src/regex/CMakeLists.txt
create mode 100644 libc/test/src/regex/regerror_test.cpp
create mode 100644 libc/test/src/regex/regex_basic_test.cpp
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index b7c9cabd934b4..20f029be07a40 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -1193,6 +1193,12 @@ if(LLVM_LIBC_FULL_BUILD)
libc.src.search.twalk
libc.src.search.twalk_r
+ # regex.h entrypoints
+ libc.src.regex.regcomp
+ libc.src.regex.regexec
+ libc.src.regex.regerror
+ libc.src.regex.regfree
+
# threads.h entrypoints
libc.src.threads.call_once
libc.src.threads.cnd_broadcast
diff --git a/libc/config/linux/aarch64/headers.txt b/libc/config/linux/aarch64/headers.txt
index 05cd13b1980e7..941a634c48c34 100644
--- a/libc/config/linux/aarch64/headers.txt
+++ b/libc/config/linux/aarch64/headers.txt
@@ -64,3 +64,7 @@ set(TARGET_PUBLIC_HEADERS
libc.include.wchar
libc.include.wctype
)
+
+if(LLVM_LIBC_FULL_BUILD)
+ list(APPEND TARGET_PUBLIC_HEADERS libc.include.regex)
+endif()
diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt
index 906f36d45e337..8a4730ed04138 100644
--- a/libc/config/linux/arm/entrypoints.txt
+++ b/libc/config/linux/arm/entrypoints.txt
@@ -219,6 +219,12 @@ if(LLVM_LIBC_FULL_BUILD)
# search.h entrypoints
libc.src.search.lfind
+ # regex.h entrypoints
+ libc.src.regex.regcomp
+ libc.src.regex.regexec
+ libc.src.regex.regerror
+ libc.src.regex.regfree
+
# setjmp.h entrypoints
libc.src.setjmp.longjmp
libc.src.setjmp.setjmp
diff --git a/libc/config/linux/arm/headers.txt b/libc/config/linux/arm/headers.txt
index a9c4de1cf790a..4b7bc079dcc0b 100644
--- a/libc/config/linux/arm/headers.txt
+++ b/libc/config/linux/arm/headers.txt
@@ -25,3 +25,7 @@ set(TARGET_PUBLIC_HEADERS
# Disabled due to epoll_wait syscalls not being available on this platform.
# libc.include.sys_epoll
)
+
+if(LLVM_LIBC_FULL_BUILD)
+ list(APPEND TARGET_PUBLIC_HEADERS libc.include.regex)
+endif()
diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
index c0adf2fb116aa..bf410cccac770 100644
--- a/libc/config/linux/riscv/entrypoints.txt
+++ b/libc/config/linux/riscv/entrypoints.txt
@@ -1316,6 +1316,12 @@ if(LLVM_LIBC_FULL_BUILD)
libc.src.search.lsearch
libc.src.search.remque
+ # regex.h entrypoints
+ libc.src.regex.regcomp
+ libc.src.regex.regexec
+ libc.src.regex.regerror
+ libc.src.regex.regfree
+
# threads.h entrypoints
libc.src.threads.call_once
libc.src.threads.cnd_broadcast
diff --git a/libc/config/linux/riscv/headers.txt b/libc/config/linux/riscv/headers.txt
index 218b7f8153309..1c526807b17cd 100644
--- a/libc/config/linux/riscv/headers.txt
+++ b/libc/config/linux/riscv/headers.txt
@@ -63,3 +63,7 @@ set(TARGET_PUBLIC_HEADERS
libc.include.wchar
libc.include.wctype
)
+
+if(LLVM_LIBC_FULL_BUILD)
+ list(APPEND TARGET_PUBLIC_HEADERS libc.include.regex)
+endif()
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 9970f079abc08..9d35e52bc9dc3 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -1390,6 +1390,12 @@ if(LLVM_LIBC_FULL_BUILD)
libc.src.search.twalk
libc.src.search.twalk_r
+ # regex.h entrypoints
+ libc.src.regex.regcomp
+ libc.src.regex.regexec
+ libc.src.regex.regerror
+ libc.src.regex.regfree
+
# threads.h entrypoints
libc.src.threads.call_once
libc.src.threads.cnd_broadcast
diff --git a/libc/config/linux/x86_64/headers.txt b/libc/config/linux/x86_64/headers.txt
index 8c25137bd9de4..7ccdc1c3c4668 100644
--- a/libc/config/linux/x86_64/headers.txt
+++ b/libc/config/linux/x86_64/headers.txt
@@ -69,3 +69,7 @@ set(TARGET_PUBLIC_HEADERS
libc.include.wchar
libc.include.wctype
)
+
+if(LLVM_LIBC_FULL_BUILD)
+ list(APPEND TARGET_PUBLIC_HEADERS libc.include.regex)
+endif()
diff --git a/libc/hdr/CMakeLists.txt b/libc/hdr/CMakeLists.txt
index a941d63f7d216..ca58987529699 100644
--- a/libc/hdr/CMakeLists.txt
+++ b/libc/hdr/CMakeLists.txt
@@ -367,5 +367,14 @@ add_gen_header(
PROXY
)
+add_proxy_header_library(
+ regex_macros
+ HDRS
+ regex_macros.h
+ FULL_BUILD_DEPENDS
+ libc.include.llvm-libc-macros.regex_macros
+ libc.include.regex
+)
+
add_subdirectory(types)
add_subdirectory(func)
diff --git a/libc/hdr/regex_macros.h b/libc/hdr/regex_macros.h
new file mode 100644
index 0000000000000..968cb68394e94
--- /dev/null
+++ b/libc/hdr/regex_macros.h
@@ -0,0 +1,18 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_HDR_REGEX_MACROS_H
+#define LLVM_LIBC_HDR_REGEX_MACROS_H
+
+#ifdef LIBC_FULL_BUILD
+
+#include "include/llvm-libc-macros/regex-macros.h"
+
+#endif // LIBC_FULL_BUILD
+
+#endif // LLVM_LIBC_HDR_REGEX_MACROS_H
diff --git a/libc/hdr/types/CMakeLists.txt b/libc/hdr/types/CMakeLists.txt
index a511944547d8a..71e137a7b5c46 100644
--- a/libc/hdr/types/CMakeLists.txt
+++ b/libc/hdr/types/CMakeLists.txt
@@ -986,3 +986,27 @@ add_proxy_header_library(
libc.include.llvm-libc-types.VISIT
libc.include.search
)
+
+add_proxy_header_library(
+ regex_t
+ HDRS
+ regex_t.h
+ FULL_BUILD_DEPENDS
+ libc.include.llvm-libc-types.regex_t
+)
+
+add_proxy_header_library(
+ regoff_t
+ HDRS
+ regoff_t.h
+ FULL_BUILD_DEPENDS
+ libc.include.llvm-libc-types.regoff_t
+)
+
+add_proxy_header_library(
+ regmatch_t
+ HDRS
+ regmatch_t.h
+ FULL_BUILD_DEPENDS
+ libc.include.llvm-libc-types.regmatch_t
+)
diff --git a/libc/hdr/types/regex_t.h b/libc/hdr/types/regex_t.h
new file mode 100644
index 0000000000000..f12e440b04668
--- /dev/null
+++ b/libc/hdr/types/regex_t.h
@@ -0,0 +1,16 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_HDR_TYPES_REGEX_T_H
+#define LLVM_LIBC_HDR_TYPES_REGEX_T_H
+
+#ifdef LIBC_FULL_BUILD
+#include "include/llvm-libc-types/regex_t.h"
+#endif // LIBC_FULL_BUILD
+
+#endif // LLVM_LIBC_HDR_TYPES_REGEX_T_H
diff --git a/libc/hdr/types/regmatch_t.h b/libc/hdr/types/regmatch_t.h
new file mode 100644
index 0000000000000..7a271b067878f
--- /dev/null
+++ b/libc/hdr/types/regmatch_t.h
@@ -0,0 +1,16 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_HDR_TYPES_REGMATCH_T_H
+#define LLVM_LIBC_HDR_TYPES_REGMATCH_T_H
+
+#ifdef LIBC_FULL_BUILD
+#include "include/llvm-libc-types/regmatch_t.h"
+#endif // LIBC_FULL_BUILD
+
+#endif // LLVM_LIBC_HDR_TYPES_REGMATCH_T_H
diff --git a/libc/hdr/types/regoff_t.h b/libc/hdr/types/regoff_t.h
new file mode 100644
index 0000000000000..77a7e6ef4ca09
--- /dev/null
+++ b/libc/hdr/types/regoff_t.h
@@ -0,0 +1,16 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_HDR_TYPES_REGOFF_T_H
+#define LLVM_LIBC_HDR_TYPES_REGOFF_T_H
+
+#ifdef LIBC_FULL_BUILD
+#include "include/llvm-libc-types/regoff_t.h"
+#endif // LIBC_FULL_BUILD
+
+#endif // LLVM_LIBC_HDR_TYPES_REGOFF_T_H
diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt
index 90055b41a37cf..454622198e751 100644
--- a/libc/include/CMakeLists.txt
+++ b/libc/include/CMakeLists.txt
@@ -311,6 +311,18 @@ add_header_macro(
.llvm_libc_common_h
)
+add_header_macro(
+ regex
+ ../libc/include/regex.yaml
+ regex.h
+ DEPENDS
+ .llvm_libc_common_h
+ .llvm-libc-types.regex_t
+ .llvm-libc-types.regmatch_t
+ .llvm-libc-types.regoff_t
+ .llvm-libc-types.size_t
+)
+
add_header_macro(
time
../libc/include/time.yaml
diff --git a/libc/include/llvm-libc-macros/CMakeLists.txt b/libc/include/llvm-libc-macros/CMakeLists.txt
index af74d483956a3..3225b5b1849a1 100644
--- a/libc/include/llvm-libc-macros/CMakeLists.txt
+++ b/libc/include/llvm-libc-macros/CMakeLists.txt
@@ -400,6 +400,12 @@ add_macro_header(
poll-macros.h
)
+add_macro_header(
+ regex_macros
+ HDR
+ regex-macros.h
+)
+
add_macro_header(
EFIAPI_macros
HDR
diff --git a/libc/include/llvm-libc-macros/regex-macros.h b/libc/include/llvm-libc-macros/regex-macros.h
new file mode 100644
index 0000000000000..607b3966a7c83
--- /dev/null
+++ b/libc/include/llvm-libc-macros/regex-macros.h
@@ -0,0 +1,42 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Macros for POSIX regex.h.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_MACROS_REGEX_MACROS_H
+#define LLVM_LIBC_MACROS_REGEX_MACROS_H
+
+// regcomp cflags
+#define REG_EXTENDED 1
+#define REG_ICASE 2
+#define REG_NOSUB 4
+#define REG_NEWLINE 8
+
+// regexec eflags
+#define REG_NOTBOL 1
+#define REG_NOTEOL 2
+
+// Error codes
+#define REG_NOMATCH 1
+#define REG_BADPAT 2
+#define REG_ECOLLATE 3
+#define REG_ECTYPE 4
+#define REG_EESCAPE 5
+#define REG_ESUBREG 6
+#define REG_EBRACK 7
+#define REG_EPAREN 8
+#define REG_EBRACE 9
+#define REG_BADBR 10
+#define REG_ERANGE 11
+#define REG_ESPACE 12
+#define REG_BADRPT 13
+
+#endif // LLVM_LIBC_MACROS_REGEX_MACROS_H
diff --git a/libc/include/llvm-libc-types/CMakeLists.txt b/libc/include/llvm-libc-types/CMakeLists.txt
index 207834072ede9..d6a013d7f4c43 100644
--- a/libc/include/llvm-libc-types/CMakeLists.txt
+++ b/libc/include/llvm-libc-types/CMakeLists.txt
@@ -208,6 +208,9 @@ add_header(ACTION HDR ACTION.h)
add_header(ENTRY HDR ENTRY.h)
add_header(VISIT HDR VISIT.h)
add_header(struct_hsearch_data HDR struct_hsearch_data.h)
+add_header(regex_t HDR regex_t.h DEPENDS .size_t)
+add_header(regoff_t HDR regoff_t.h)
+add_header(regmatch_t HDR regmatch_t.h DEPENDS .regoff_t)
add_header(struct_epoll_event HDR struct_epoll_event.h)
add_header(struct_epoll_data HDR struct_epoll_data.h)
add_header(
diff --git a/libc/include/llvm-libc-types/regex_t.h b/libc/include/llvm-libc-types/regex_t.h
new file mode 100644
index 0000000000000..1ca0f63908306
--- /dev/null
+++ b/libc/include/llvm-libc-types/regex_t.h
@@ -0,0 +1,19 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_TYPES_REGEX_T_H
+#define LLVM_LIBC_TYPES_REGEX_T_H
+
+#include "size_t.h"
+
+typedef struct {
+ size_t re_nsub;
+ void *__internal;
+} regex_t;
+
+#endif // LLVM_LIBC_TYPES_REGEX_T_H
diff --git a/libc/include/llvm-libc-types/regmatch_t.h b/libc/include/llvm-libc-types/regmatch_t.h
new file mode 100644
index 0000000000000..5c178380e2569
--- /dev/null
+++ b/libc/include/llvm-libc-types/regmatch_t.h
@@ -0,0 +1,19 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_TYPES_REGMATCH_T_H
+#define LLVM_LIBC_TYPES_REGMATCH_T_H
+
+#include "regoff_t.h"
+
+typedef struct {
+ regoff_t rm_so;
+ regoff_t rm_eo;
+} regmatch_t;
+
+#endif // LLVM_LIBC_TYPES_REGMATCH_T_H
diff --git a/libc/include/llvm-libc-types/regoff_t.h b/libc/include/llvm-libc-types/regoff_t.h
new file mode 100644
index 0000000000000..3caadf7a0bdd1
--- /dev/null
+++ b/libc/include/llvm-libc-types/regoff_t.h
@@ -0,0 +1,14 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_TYPES_REGOFF_T_H
+#define LLVM_LIBC_TYPES_REGOFF_T_H
+
+typedef int regoff_t;
+
+#endif // LLVM_LIBC_TYPES_REGOFF_T_H
diff --git a/libc/include/regex.yaml b/libc/include/regex.yaml
new file mode 100644
index 0000000000000..8fe77257f07be
--- /dev/null
+++ b/libc/include/regex.yaml
@@ -0,0 +1,83 @@
+header: regex.h
+standards:
+ - posix
+macros:
+ - macro_name: REG_EXTENDED
+ macro_header: regex-macros.h
+ - macro_name: REG_ICASE
+ macro_header: regex-macros.h
+ - macro_name: REG_NOSUB
+ macro_header: regex-macros.h
+ - macro_name: REG_NEWLINE
+ macro_header: regex-macros.h
+ - macro_name: REG_NOTBOL
+ macro_header: regex-macros.h
+ - macro_name: REG_NOTEOL
+ macro_header: regex-macros.h
+ - macro_name: REG_NOMATCH
+ macro_header: regex-macros.h
+ - macro_name: REG_BADPAT
+ macro_header: regex-macros.h
+ - macro_name: REG_ECOLLATE
+ macro_header: regex-macros.h
+ - macro_name: REG_ECTYPE
+ macro_header: regex-macros.h
+ - macro_name: REG_EESCAPE
+ macro_header: regex-macros.h
+ - macro_name: REG_ESUBREG
+ macro_header: regex-macros.h
+ - macro_name: REG_EBRACK
+ macro_header: regex-macros.h
+ - macro_name: REG_EPAREN
+ macro_header: regex-macros.h
+ - macro_name: REG_EBRACE
+ macro_header: regex-macros.h
+ - macro_name: REG_BADBR
+ macro_header: regex-macros.h
+ - macro_name: REG_ERANGE
+ macro_header: regex-macros.h
+ - macro_name: REG_ESPACE
+ macro_header: regex-macros.h
+ - macro_name: REG_BADRPT
+ macro_header: regex-macros.h
+types:
+ - type_name: regex_t
+ - type_name: regmatch_t
+ - type_name: regoff_t
+ - type_name: size_t
+enums: []
+objects: []
+functions:
+ - name: regcomp
+ standards:
+ - posix
+ return_type: int
+ arguments:
+ - type: regex_t *__restrict
+ - type: const char *__restrict
+ - type: int
+ - name: regexec
+ standards:
+ - posix
+ return_type: int
+ arguments:
+ - type: const regex_t *__restrict
+ - type: const char *__restrict
+ - type: size_t
+ - type: regmatch_t *__restrict
+ - type: int
+ - name: regerror
+ standards:
+ - posix
+ return_type: size_t
+ arguments:
+ - type: int
+ - type: const regex_t *__restrict
+ - type: char *__restrict
+ - type: size_t
+ - name: regfree
+ standards:
+ - posix
+ return_type: void
+ arguments:
+ - type: regex_t *
diff --git a/libc/src/CMakeLists.txt b/libc/src/CMakeLists.txt
index 4ca42ddc4f870..891fc122836cb 100644
--- a/libc/src/CMakeLists.txt
+++ b/libc/src/CMakeLists.txt
@@ -44,6 +44,7 @@ add_subdirectory(compiler)
add_subdirectory(locale)
add_subdirectory(nl_types)
add_subdirectory(search)
+add_subdirectory(regex)
add_subdirectory(setjmp)
add_subdirectory(signal)
add_subdirectory(spawn)
diff --git a/libc/src/regex/CMakeLists.txt b/libc/src/regex/CMakeLists.txt
new file mode 100644
index 0000000000000..4b3f8804c2a72
--- /dev/null
+++ b/libc/src/regex/CMakeLists.txt
@@ -0,0 +1,56 @@
+add_entrypoint_object(
+ regcomp
+ SRCS
+ regcomp.cpp
+ HDRS
+ regcomp.h
+ DEPENDS
+ libc.hdr.types.regex_t
+ libc.hdr.regex_macros
+ libc.src.__support.common
+ libc.src.__support.CPP.new
+ libc.src.string.memory_utils.inline_memcpy
+ libc.src.string.string_utils
+)
+
+add_entrypoint_object(
+ regexec
+ SRCS
+ regexec.cpp
+ HDRS
+ regexec.h
+ DEPENDS
+ libc.hdr.types.regex_t
+ libc.hdr.types.regmatch_t
+ libc.hdr.types.size_t
+ libc.hdr.regex_macros
+ libc.src.__support.common
+ libc.src.string.memory_utils.inline_strstr
+)
+
+add_entrypoint_object(
+ regerror
+ SRCS
+ regerror.cpp
+ HDRS
+ regerror.h
+ DEPENDS
+ libc.hdr.types.regex_t
+ libc.hdr.types.size_t
+ libc.hdr.regex_macros
+ libc.src.__support.common
+ libc.src.string.memory_utils.inline_memcpy
+ libc.src.string.string_utils
+)
+
+add_entrypoint_object(
+ regfree
+ SRCS
+ regfree.cpp
+ HDRS
+ regfree.h
+ DEPENDS
+ libc.hdr.types.regex_t
+ libc.src.__support.common
+ libc.src.__support.CPP.new
+)
diff --git a/libc/src/regex/regcomp.cpp b/libc/src/regex/regcomp.cpp
new file mode 100644
index 0000000000000..49413d13e16b6
--- /dev/null
+++ b/libc/src/regex/regcomp.cpp
@@ -0,0 +1,51 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Implementation of regcomp (stub).
+///
+//===----------------------------------------------------------------------===//
+
+#include "src/regex/regcomp.h"
+
+#include "hdr/regex_macros.h"
+#include "src/__support/CPP/new.h"
+#include "src/__support/alloc-checker.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/string/memory_utils/inline_memcpy.h"
+#include "src/string/string_utils.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(int, regcomp,
+ (regex_t *__restrict preg, const char *__restrict pattern,
+ int cflags)) {
+ // Silencing unused parameter warning for the stub.
+ (void)cflags;
+
+ // Note: POSIX requires callers to call regfree() before reusing a preg
+ // object. We therefore do not attempt to free any previous __internal here
+ // — preg is uninitialized on first use and the pointer would be garbage.
+
+ size_t len = internal::string_length(pattern);
+ AllocChecker ac;
+ char *copy = new (ac) char[len + 1];
+ if (!ac)
+ return REG_ESPACE;
+
+ inline_memcpy(copy, pattern, len + 1);
+
+ // TODO: This is a stub. re_nsub is always 0 because parenthesised
+ // subexpressions are not yet parsed. REG_NOSUB is effectively always active.
+ preg->re_nsub = 0;
+ preg->__internal = copy;
+ return 0;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/regex/regcomp.h b/libc/src/regex/regcomp.h
new file mode 100644
index 0000000000000..e42d73b8e4178
--- /dev/null
+++ b/libc/src/regex/regcomp.h
@@ -0,0 +1,27 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Implementation header for regcomp.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_REGEX_REGCOMP_H
+#define LLVM_LIBC_SRC_REGEX_REGCOMP_H
+
+#include "hdr/types/regex_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+int regcomp(regex_t *__restrict preg, const char *__restrict pattern,
+ int cflags);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_REGEX_REGCOMP_H
diff --git a/libc/src/regex/regerror.cpp b/libc/src/regex/regerror.cpp
new file mode 100644
index 0000000000000..8efd72376f494
--- /dev/null
+++ b/libc/src/regex/regerror.cpp
@@ -0,0 +1,90 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Implementation of regerror.
+///
+//===----------------------------------------------------------------------===//
+
+#include "src/regex/regerror.h"
+
+#include "hdr/regex_macros.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/string/memory_utils/inline_memcpy.h"
+#include "src/string/string_utils.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(size_t, regerror,
+ (int errcode, const regex_t *__restrict preg,
+ char *__restrict errbuf, size_t errbuf_size)) {
+ (void)preg; // preg is reserved for implementation-specific messages.
+
+ const char *msg;
+ switch (errcode) {
+ case 0:
+ msg = "Success";
+ break;
+ case REG_NOMATCH:
+ msg = "No match";
+ break;
+ case REG_BADPAT:
+ msg = "Invalid regular expression";
+ break;
+ case REG_ECOLLATE:
+ msg = "Invalid collating element";
+ break;
+ case REG_ECTYPE:
+ msg = "Invalid character class";
+ break;
+ case REG_EESCAPE:
+ msg = "Trailing backslash";
+ break;
+ case REG_ESUBREG:
+ msg = "Invalid backreference";
+ break;
+ case REG_EBRACK:
+ msg = "Missing ']'";
+ break;
+ case REG_EPAREN:
+ msg = "Missing ')'";
+ break;
+ case REG_EBRACE:
+ msg = "Missing '}'";
+ break;
+ case REG_BADBR:
+ msg = "Invalid repetition count";
+ break;
+ case REG_ERANGE:
+ msg = "Invalid range end";
+ break;
+ case REG_ESPACE:
+ msg = "Out of memory";
+ break;
+ case REG_BADRPT:
+ msg = "Invalid preceding expression";
+ break;
+ default:
+ msg = "Unknown error";
+ break;
+ }
+
+ size_t msg_len = internal::string_length(msg) + 1; // include NUL
+
+ if (errbuf_size > 0 && errbuf) {
+ size_t copy_len = msg_len < errbuf_size ? msg_len : errbuf_size;
+ inline_memcpy(errbuf, msg, copy_len - 1);
+ errbuf[copy_len - 1] = '\0';
+ }
+ // POSIX requires returning the size needed to hold the full NUL-terminated
+ // string, even if it was truncated in the buffer.
+ return msg_len;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/regex/regerror.h b/libc/src/regex/regerror.h
new file mode 100644
index 0000000000000..52ac54ad9c94c
--- /dev/null
+++ b/libc/src/regex/regerror.h
@@ -0,0 +1,29 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Implementation header for regerror.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_REGEX_REGERROR_H
+#define LLVM_LIBC_SRC_REGEX_REGERROR_H
+
+#include "hdr/types/regex_t.h"
+#include "src/__support/macros/config.h"
+
+#include "hdr/types/size_t.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+size_t regerror(int errcode, const regex_t *__restrict preg,
+ char *__restrict errbuf, size_t errbuf_size);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_REGEX_REGERROR_H
diff --git a/libc/src/regex/regexec.cpp b/libc/src/regex/regexec.cpp
new file mode 100644
index 0000000000000..7d3927a2db8ee
--- /dev/null
+++ b/libc/src/regex/regexec.cpp
@@ -0,0 +1,54 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Implementation of regexec (stub).
+///
+//===----------------------------------------------------------------------===//
+
+#include "src/regex/regexec.h"
+
+#include "hdr/regex_macros.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/string/memory_utils/inline_strstr.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(int, regexec,
+ (const regex_t *__restrict preg,
+ const char *__restrict string, size_t nmatch,
+ regmatch_t *__restrict pmatch, int eflags)) {
+ // TODO: This is a stub. The following are not yet implemented:
+ // - Regex metacharacters (., *, +, ?, [], {}, (), |, ^, $).
+ // - REG_EXTENDED / REG_ICASE / REG_NEWLINE compile flags.
+ // - REG_NOTBOL / REG_NOTEOL eflags.
+ // - pmatch[] filling (subexpression offsets).
+ // - Only literal substring search (strstr) is performed.
+ (void)nmatch;
+ (void)pmatch;
+ (void)eflags;
+
+ // Guard against a null internal pointer (e.g. called after regfree).
+ const char *pattern = static_cast<const char *>(preg->__internal);
+ if (!pattern)
+ return REG_NOMATCH;
+
+ // An empty pattern always matches.
+ if (*pattern == '\0')
+ return 0;
+
+ // Use inline_strstr for literal substring matching.
+ auto comp = [](char l, char r) -> int { return l - r; };
+ if (inline_strstr(string, pattern, comp))
+ return 0;
+
+ return REG_NOMATCH;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/regex/regexec.h b/libc/src/regex/regexec.h
new file mode 100644
index 0000000000000..193a1c45c0555
--- /dev/null
+++ b/libc/src/regex/regexec.h
@@ -0,0 +1,29 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Implementation header for regexec.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_REGEX_REGEXEC_H
+#define LLVM_LIBC_SRC_REGEX_REGEXEC_H
+
+#include "hdr/types/regex_t.h"
+#include "hdr/types/regmatch_t.h"
+#include "hdr/types/size_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+int regexec(const regex_t *__restrict preg, const char *__restrict string,
+ size_t nmatch, regmatch_t *__restrict pmatch, int eflags);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_REGEX_REGEXEC_H
diff --git a/libc/src/regex/regfree.cpp b/libc/src/regex/regfree.cpp
new file mode 100644
index 0000000000000..06fd791f628ff
--- /dev/null
+++ b/libc/src/regex/regfree.cpp
@@ -0,0 +1,31 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Implementation of regfree.
+///
+//===----------------------------------------------------------------------===//
+
+#include "src/regex/regfree.h"
+
+#include "src/__support/CPP/new.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(void, regfree, (regex_t * preg)) {
+ if (preg->__internal) {
+ char *ptr = static_cast<char *>(preg->__internal);
+ delete[] ptr;
+ preg->__internal = nullptr;
+ }
+ preg->re_nsub = 0;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/regex/regfree.h b/libc/src/regex/regfree.h
new file mode 100644
index 0000000000000..36599387f494d
--- /dev/null
+++ b/libc/src/regex/regfree.h
@@ -0,0 +1,26 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Implementation header for regfree.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_REGEX_REGFREE_H
+#define LLVM_LIBC_SRC_REGEX_REGFREE_H
+
+#include "hdr/types/regex_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+void regfree(regex_t *preg);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_REGEX_REGFREE_H
diff --git a/libc/test/src/CMakeLists.txt b/libc/test/src/CMakeLists.txt
index b877c7455fc34..6b11d958ded00 100644
--- a/libc/test/src/CMakeLists.txt
+++ b/libc/test/src/CMakeLists.txt
@@ -104,6 +104,7 @@ add_subdirectory(dirent)
add_subdirectory(locale)
add_subdirectory(nl_types)
add_subdirectory(signal)
+add_subdirectory(regex)
add_subdirectory(spawn)
if(${LIBC_TARGET_OS} STREQUAL "linux")
diff --git a/libc/test/src/regex/CMakeLists.txt b/libc/test/src/regex/CMakeLists.txt
new file mode 100644
index 0000000000000..4807fcc85257e
--- /dev/null
+++ b/libc/test/src/regex/CMakeLists.txt
@@ -0,0 +1,25 @@
+if(LLVM_LIBC_FULL_BUILD)
+ add_custom_target(libc_regex_unittests)
+
+ add_libc_unittest(
+ regerror_test
+ SUITE
+ libc_regex_unittests
+ SRCS
+ regerror_test.cpp
+ DEPENDS
+ libc.src.regex.regerror
+ )
+
+ add_libc_unittest(
+ regex_basic_test
+ SUITE
+ libc_regex_unittests
+ SRCS
+ regex_basic_test.cpp
+ DEPENDS
+ libc.src.regex.regcomp
+ libc.src.regex.regexec
+ libc.src.regex.regfree
+ )
+endif()
diff --git a/libc/test/src/regex/regerror_test.cpp b/libc/test/src/regex/regerror_test.cpp
new file mode 100644
index 0000000000000..28afc87b843ae
--- /dev/null
+++ b/libc/test/src/regex/regerror_test.cpp
@@ -0,0 +1,48 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Unit tests for regerror.
+///
+//===----------------------------------------------------------------------===//
+
+#include "src/regex/regerror.h"
+#include "test/UnitTest/Test.h"
+
+#include "include/llvm-libc-macros/regex-macros.h"
+
+TEST(LlvmLibcRegexTest, RegerrorAllCodes) {
+ char buf[128];
+
+ ASSERT_GT(LIBC_NAMESPACE::regerror(REG_NOMATCH, nullptr, buf, sizeof(buf)),
+ size_t(0));
+ ASSERT_STREQ("No match", buf);
+
+ ASSERT_GT(LIBC_NAMESPACE::regerror(REG_ESPACE, nullptr, buf, sizeof(buf)),
+ size_t(0));
+ ASSERT_STREQ("Out of memory", buf);
+}
+
+TEST(LlvmLibcRegexTest, RegerrorTruncation) {
+ char buf[5];
+ size_t needed =
+ LIBC_NAMESPACE::regerror(REG_NOMATCH, nullptr, buf, sizeof(buf));
+ ASSERT_GT(needed, size_t(5)); // "No match" + NUL = 9 bytes
+ ASSERT_EQ(buf[4], '\0'); // properly NUL-terminated
+}
+
+TEST(LlvmLibcRegexTest, RegerrorZeroBuffer) {
+ size_t needed = LIBC_NAMESPACE::regerror(REG_NOMATCH, nullptr, nullptr, 0);
+ ASSERT_GT(needed, size_t(0));
+}
+
+TEST(LlvmLibcRegexTest, RegerrorSuccess) {
+ char buf[128];
+ LIBC_NAMESPACE::regerror(0, nullptr, buf, sizeof(buf));
+ ASSERT_STREQ("Success", buf);
+}
diff --git a/libc/test/src/regex/regex_basic_test.cpp b/libc/test/src/regex/regex_basic_test.cpp
new file mode 100644
index 0000000000000..287cc7f66686f
--- /dev/null
+++ b/libc/test/src/regex/regex_basic_test.cpp
@@ -0,0 +1,68 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Basic round-trip tests for POSIX regex functions.
+///
+//===----------------------------------------------------------------------===//
+
+#include "src/regex/regcomp.h"
+#include "src/regex/regexec.h"
+#include "src/regex/regfree.h"
+#include "test/UnitTest/Test.h"
+
+#include "include/llvm-libc-macros/regex-macros.h"
+#include "include/llvm-libc-types/regex_t.h"
+
+TEST(LlvmLibcRegexTest, BasicLiteralRoundTrip) {
+ regex_t preg;
+ ASSERT_EQ(0,
+ LIBC_NAMESPACE::regcomp(&preg, "hello", REG_EXTENDED | REG_NOSUB));
+ ASSERT_EQ(0,
+ LIBC_NAMESPACE::regexec(&preg, "say hello world", 0, nullptr, 0));
+ ASSERT_EQ(REG_NOMATCH,
+ LIBC_NAMESPACE::regexec(&preg, "goodbye", 0, nullptr, 0));
+ LIBC_NAMESPACE::regfree(&preg);
+}
+
+TEST(LlvmLibcRegexTest, MismatchCases) {
+ regex_t preg;
+ // Partial match
+ ASSERT_EQ(0,
+ LIBC_NAMESPACE::regcomp(&preg, "hello", REG_EXTENDED | REG_NOSUB));
+ ASSERT_EQ(REG_NOMATCH, LIBC_NAMESPACE::regexec(&preg, "hell", 0, nullptr, 0));
+ LIBC_NAMESPACE::regfree(&preg);
+
+ // Case sensitivity
+ ASSERT_EQ(0,
+ LIBC_NAMESPACE::regcomp(&preg, "Hello", REG_EXTENDED | REG_NOSUB));
+ ASSERT_EQ(REG_NOMATCH,
+ LIBC_NAMESPACE::regexec(&preg, "hello", 0, nullptr, 0));
+ LIBC_NAMESPACE::regfree(&preg);
+
+ // Empty string vs non-empty pattern
+ ASSERT_EQ(0, LIBC_NAMESPACE::regcomp(&preg, "a", REG_EXTENDED | REG_NOSUB));
+ ASSERT_EQ(REG_NOMATCH, LIBC_NAMESPACE::regexec(&preg, "", 0, nullptr, 0));
+ LIBC_NAMESPACE::regfree(&preg);
+}
+
+TEST(LlvmLibcRegexTest, EmptyString) {
+ regex_t preg;
+ ASSERT_EQ(0, LIBC_NAMESPACE::regcomp(&preg, "", REG_EXTENDED | REG_NOSUB));
+ ASSERT_EQ(0, LIBC_NAMESPACE::regexec(&preg, "anything", 0, nullptr, 0));
+ ASSERT_EQ(0, LIBC_NAMESPACE::regexec(&preg, "", 0, nullptr, 0));
+ LIBC_NAMESPACE::regfree(&preg);
+}
+
+TEST(LlvmLibcRegexTest, ExactMatch) {
+ regex_t preg;
+ ASSERT_EQ(0,
+ LIBC_NAMESPACE::regcomp(&preg, "test", REG_EXTENDED | REG_NOSUB));
+ ASSERT_EQ(0, LIBC_NAMESPACE::regexec(&preg, "test", 0, nullptr, 0));
+ LIBC_NAMESPACE::regfree(&preg);
+}
>From ed80ffb11de6905f1f028111b359dea53f747196 Mon Sep 17 00:00:00 2001
From: Jeff Bailey <jbailey at raspberryginger.com>
Date: Thu, 14 May 2026 17:17:12 +0100
Subject: [PATCH 2/4] [libc] Address reviewer feedback for regex implementation
(#196995)
Gated regex entrypoints and headers behind LLVM_LIBC_ENABLE_EXPERIMENTAL_ENTRYPOINTS
for Linux and Baremetal on all supported architectures.
Updated regoff_t to use __PTRDIFF_TYPE__ for POSIX compliance and modernised
regcomp and regerror to use cpp::string_view.
Additional changes:
* Fixed circular dependency between regex_macros and regex.h.
* Hardened regerror_test.cpp with proxy headers and precise truncation assertions.
* Updated CMake dependencies and added missing alloc_checker header library to __support.
Assisted-by: Automated tooling, human reviewed.
---
libc/config/baremetal/aarch64/entrypoints.txt | 10 ++++++++++
libc/config/baremetal/aarch64/headers.txt | 4 ++++
libc/config/baremetal/arm/entrypoints.txt | 10 ++++++++++
libc/config/baremetal/arm/headers.txt | 4 ++++
libc/config/baremetal/riscv/entrypoints.txt | 10 ++++++++++
libc/config/baremetal/riscv/headers.txt | 4 ++++
libc/config/linux/aarch64/entrypoints.txt | 14 +++++++++-----
libc/config/linux/aarch64/headers.txt | 2 +-
libc/config/linux/arm/entrypoints.txt | 17 ++++++++++++-----
libc/config/linux/arm/headers.txt | 2 +-
libc/config/linux/riscv/entrypoints.txt | 14 +++++++++-----
libc/config/linux/riscv/headers.txt | 2 +-
libc/config/linux/x86_64/entrypoints.txt | 14 +++++++++-----
libc/config/linux/x86_64/headers.txt | 2 +-
libc/hdr/CMakeLists.txt | 1 -
libc/include/llvm-libc-types/regoff_t.h | 7 ++++++-
libc/src/__support/CMakeLists.txt | 8 ++++++++
libc/src/regex/CMakeLists.txt | 5 +++--
libc/src/regex/regcomp.cpp | 6 +++---
libc/src/regex/regerror.cpp | 4 ++--
libc/test/src/regex/CMakeLists.txt | 5 ++++-
libc/test/src/regex/regerror_test.cpp | 7 +++----
libc/test/src/regex/regex_basic_test.cpp | 4 ++--
23 files changed, 116 insertions(+), 40 deletions(-)
diff --git a/libc/config/baremetal/aarch64/entrypoints.txt b/libc/config/baremetal/aarch64/entrypoints.txt
index 452abd985b3a5..dcb50135232e2 100644
--- a/libc/config/baremetal/aarch64/entrypoints.txt
+++ b/libc/config/baremetal/aarch64/entrypoints.txt
@@ -947,6 +947,16 @@ if(LIBC_COMPILER_HAS_FIXED_POINT)
)
endif()
+if(LLVM_LIBC_ENABLE_EXPERIMENTAL_ENTRYPOINTS)
+ list(APPEND TARGET_LIBC_ENTRYPOINTS
+ # regex.h entrypoints
+ libc.src.regex.regcomp
+ libc.src.regex.regexec
+ libc.src.regex.regerror
+ libc.src.regex.regfree
+ )
+endif()
+
set(TARGET_LLVMLIBC_ENTRYPOINTS
${TARGET_LIBC_ENTRYPOINTS}
${TARGET_LIBM_ENTRYPOINTS}
diff --git a/libc/config/baremetal/aarch64/headers.txt b/libc/config/baremetal/aarch64/headers.txt
index 31cc04d849109..42dcd87b680d6 100644
--- a/libc/config/baremetal/aarch64/headers.txt
+++ b/libc/config/baremetal/aarch64/headers.txt
@@ -25,3 +25,7 @@ set(TARGET_PUBLIC_HEADERS
libc.include.uchar
libc.include.wchar
)
+
+if(LLVM_LIBC_ENABLE_EXPERIMENTAL_ENTRYPOINTS)
+ list(APPEND TARGET_PUBLIC_HEADERS libc.include.regex)
+endif()
diff --git a/libc/config/baremetal/arm/entrypoints.txt b/libc/config/baremetal/arm/entrypoints.txt
index 41c80efc64227..fac62bac939cc 100644
--- a/libc/config/baremetal/arm/entrypoints.txt
+++ b/libc/config/baremetal/arm/entrypoints.txt
@@ -966,6 +966,16 @@ if(LIBC_COMPILER_HAS_FIXED_POINT)
)
endif()
+if(LLVM_LIBC_ENABLE_EXPERIMENTAL_ENTRYPOINTS)
+ list(APPEND TARGET_LIBC_ENTRYPOINTS
+ # regex.h entrypoints
+ libc.src.regex.regcomp
+ libc.src.regex.regexec
+ libc.src.regex.regerror
+ libc.src.regex.regfree
+ )
+endif()
+
set(TARGET_LLVMLIBC_ENTRYPOINTS
${TARGET_LIBC_ENTRYPOINTS}
${TARGET_LIBM_ENTRYPOINTS}
diff --git a/libc/config/baremetal/arm/headers.txt b/libc/config/baremetal/arm/headers.txt
index a259c3a4d834b..a69660a97fdae 100644
--- a/libc/config/baremetal/arm/headers.txt
+++ b/libc/config/baremetal/arm/headers.txt
@@ -26,3 +26,7 @@ set(TARGET_PUBLIC_HEADERS
libc.include.wchar
libc.include.wctype
)
+
+if(LLVM_LIBC_ENABLE_EXPERIMENTAL_ENTRYPOINTS)
+ list(APPEND TARGET_PUBLIC_HEADERS libc.include.regex)
+endif()
diff --git a/libc/config/baremetal/riscv/entrypoints.txt b/libc/config/baremetal/riscv/entrypoints.txt
index 88eacfae12969..a3b96225ff09d 100644
--- a/libc/config/baremetal/riscv/entrypoints.txt
+++ b/libc/config/baremetal/riscv/entrypoints.txt
@@ -963,6 +963,16 @@ if(LIBC_COMPILER_HAS_FIXED_POINT)
)
endif()
+if(LLVM_LIBC_ENABLE_EXPERIMENTAL_ENTRYPOINTS)
+ list(APPEND TARGET_LIBC_ENTRYPOINTS
+ # regex.h entrypoints
+ libc.src.regex.regcomp
+ libc.src.regex.regexec
+ libc.src.regex.regerror
+ libc.src.regex.regfree
+ )
+endif()
+
set(TARGET_LLVMLIBC_ENTRYPOINTS
${TARGET_LIBC_ENTRYPOINTS}
${TARGET_LIBM_ENTRYPOINTS}
diff --git a/libc/config/baremetal/riscv/headers.txt b/libc/config/baremetal/riscv/headers.txt
index a259c3a4d834b..a69660a97fdae 100644
--- a/libc/config/baremetal/riscv/headers.txt
+++ b/libc/config/baremetal/riscv/headers.txt
@@ -26,3 +26,7 @@ set(TARGET_PUBLIC_HEADERS
libc.include.wchar
libc.include.wctype
)
+
+if(LLVM_LIBC_ENABLE_EXPERIMENTAL_ENTRYPOINTS)
+ list(APPEND TARGET_PUBLIC_HEADERS libc.include.regex)
+endif()
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 20f029be07a40..6cb251ebae047 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -1193,11 +1193,6 @@ if(LLVM_LIBC_FULL_BUILD)
libc.src.search.twalk
libc.src.search.twalk_r
- # regex.h entrypoints
- libc.src.regex.regcomp
- libc.src.regex.regexec
- libc.src.regex.regerror
- libc.src.regex.regfree
# threads.h entrypoints
libc.src.threads.call_once
@@ -1277,6 +1272,15 @@ if(LLVM_LIBC_ENABLE_EXPERIMENTAL_ENTRYPOINTS)
list(APPEND TARGET_LIBC_ENTRYPOINTS
libc.src.unistd.sysconf
)
+ if(LLVM_LIBC_FULL_BUILD)
+ list(APPEND TARGET_LIBC_ENTRYPOINTS
+ # regex.h entrypoints
+ libc.src.regex.regcomp
+ libc.src.regex.regexec
+ libc.src.regex.regerror
+ libc.src.regex.regfree
+ )
+ endif()
endif()
set(TARGET_LIBMVEC_ENTRYPOINTS)
diff --git a/libc/config/linux/aarch64/headers.txt b/libc/config/linux/aarch64/headers.txt
index 941a634c48c34..e321c5425f662 100644
--- a/libc/config/linux/aarch64/headers.txt
+++ b/libc/config/linux/aarch64/headers.txt
@@ -65,6 +65,6 @@ set(TARGET_PUBLIC_HEADERS
libc.include.wctype
)
-if(LLVM_LIBC_FULL_BUILD)
+if(LLVM_LIBC_FULL_BUILD AND LLVM_LIBC_ENABLE_EXPERIMENTAL_ENTRYPOINTS)
list(APPEND TARGET_PUBLIC_HEADERS libc.include.regex)
endif()
diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt
index 8a4730ed04138..d34609e91bc72 100644
--- a/libc/config/linux/arm/entrypoints.txt
+++ b/libc/config/linux/arm/entrypoints.txt
@@ -219,11 +219,6 @@ if(LLVM_LIBC_FULL_BUILD)
# search.h entrypoints
libc.src.search.lfind
- # regex.h entrypoints
- libc.src.regex.regcomp
- libc.src.regex.regexec
- libc.src.regex.regerror
- libc.src.regex.regfree
# setjmp.h entrypoints
libc.src.setjmp.longjmp
@@ -563,6 +558,18 @@ list(APPEND TARGET_LIBM_ENTRYPOINTS
libc.src.math.ufromfpxbf16
)
+if(LLVM_LIBC_ENABLE_EXPERIMENTAL_ENTRYPOINTS)
+ if(LLVM_LIBC_FULL_BUILD)
+ list(APPEND TARGET_LIBC_ENTRYPOINTS
+ # regex.h entrypoints
+ libc.src.regex.regcomp
+ libc.src.regex.regexec
+ libc.src.regex.regerror
+ libc.src.regex.regfree
+ )
+ endif()
+endif()
+
set(TARGET_LLVMLIBC_ENTRYPOINTS
${TARGET_LIBC_ENTRYPOINTS}
${TARGET_LIBM_ENTRYPOINTS}
diff --git a/libc/config/linux/arm/headers.txt b/libc/config/linux/arm/headers.txt
index 4b7bc079dcc0b..6a0b285944698 100644
--- a/libc/config/linux/arm/headers.txt
+++ b/libc/config/linux/arm/headers.txt
@@ -26,6 +26,6 @@ set(TARGET_PUBLIC_HEADERS
# libc.include.sys_epoll
)
-if(LLVM_LIBC_FULL_BUILD)
+if(LLVM_LIBC_FULL_BUILD AND LLVM_LIBC_ENABLE_EXPERIMENTAL_ENTRYPOINTS)
list(APPEND TARGET_PUBLIC_HEADERS libc.include.regex)
endif()
diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
index bf410cccac770..fe9d94372da4f 100644
--- a/libc/config/linux/riscv/entrypoints.txt
+++ b/libc/config/linux/riscv/entrypoints.txt
@@ -1316,11 +1316,6 @@ if(LLVM_LIBC_FULL_BUILD)
libc.src.search.lsearch
libc.src.search.remque
- # regex.h entrypoints
- libc.src.regex.regcomp
- libc.src.regex.regexec
- libc.src.regex.regerror
- libc.src.regex.regfree
# threads.h entrypoints
libc.src.threads.call_once
@@ -1411,6 +1406,15 @@ if(LLVM_LIBC_ENABLE_EXPERIMENTAL_ENTRYPOINTS)
list(APPEND TARGET_LIBC_ENTRYPOINTS
libc.src.unistd.sysconf
)
+ if(LLVM_LIBC_FULL_BUILD)
+ list(APPEND TARGET_LIBC_ENTRYPOINTS
+ # regex.h entrypoints
+ libc.src.regex.regcomp
+ libc.src.regex.regexec
+ libc.src.regex.regerror
+ libc.src.regex.regfree
+ )
+ endif()
endif()
set(TARGET_LLVMLIBC_ENTRYPOINTS
diff --git a/libc/config/linux/riscv/headers.txt b/libc/config/linux/riscv/headers.txt
index 1c526807b17cd..d7ae420240280 100644
--- a/libc/config/linux/riscv/headers.txt
+++ b/libc/config/linux/riscv/headers.txt
@@ -64,6 +64,6 @@ set(TARGET_PUBLIC_HEADERS
libc.include.wctype
)
-if(LLVM_LIBC_FULL_BUILD)
+if(LLVM_LIBC_FULL_BUILD AND LLVM_LIBC_ENABLE_EXPERIMENTAL_ENTRYPOINTS)
list(APPEND TARGET_PUBLIC_HEADERS libc.include.regex)
endif()
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 9d35e52bc9dc3..a7d37882ed4db 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -1390,11 +1390,6 @@ if(LLVM_LIBC_FULL_BUILD)
libc.src.search.twalk
libc.src.search.twalk_r
- # regex.h entrypoints
- libc.src.regex.regcomp
- libc.src.regex.regexec
- libc.src.regex.regerror
- libc.src.regex.regfree
# threads.h entrypoints
libc.src.threads.call_once
@@ -1501,6 +1496,15 @@ if(LLVM_LIBC_ENABLE_EXPERIMENTAL_ENTRYPOINTS)
list(APPEND TARGET_LIBC_ENTRYPOINTS
libc.src.unistd.sysconf
)
+ if(LLVM_LIBC_FULL_BUILD)
+ list(APPEND TARGET_LIBC_ENTRYPOINTS
+ # regex.h entrypoints
+ libc.src.regex.regcomp
+ libc.src.regex.regexec
+ libc.src.regex.regerror
+ libc.src.regex.regfree
+ )
+ endif()
endif()
set(TARGET_LIBMVEC_ENTRYPOINTS)
diff --git a/libc/config/linux/x86_64/headers.txt b/libc/config/linux/x86_64/headers.txt
index 7ccdc1c3c4668..1aa63784e8aab 100644
--- a/libc/config/linux/x86_64/headers.txt
+++ b/libc/config/linux/x86_64/headers.txt
@@ -70,6 +70,6 @@ set(TARGET_PUBLIC_HEADERS
libc.include.wctype
)
-if(LLVM_LIBC_FULL_BUILD)
+if(LLVM_LIBC_FULL_BUILD AND LLVM_LIBC_ENABLE_EXPERIMENTAL_ENTRYPOINTS)
list(APPEND TARGET_PUBLIC_HEADERS libc.include.regex)
endif()
diff --git a/libc/hdr/CMakeLists.txt b/libc/hdr/CMakeLists.txt
index ca58987529699..f0a79285dfa5c 100644
--- a/libc/hdr/CMakeLists.txt
+++ b/libc/hdr/CMakeLists.txt
@@ -373,7 +373,6 @@ add_proxy_header_library(
regex_macros.h
FULL_BUILD_DEPENDS
libc.include.llvm-libc-macros.regex_macros
- libc.include.regex
)
add_subdirectory(types)
diff --git a/libc/include/llvm-libc-types/regoff_t.h b/libc/include/llvm-libc-types/regoff_t.h
index 3caadf7a0bdd1..d16559a56030d 100644
--- a/libc/include/llvm-libc-types/regoff_t.h
+++ b/libc/include/llvm-libc-types/regoff_t.h
@@ -5,10 +5,15 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Definition of the regoff_t type.
+///
+//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_TYPES_REGOFF_T_H
#define LLVM_LIBC_TYPES_REGOFF_T_H
-typedef int regoff_t;
+typedef __PTRDIFF_TYPE__ regoff_t;
#endif // LLVM_LIBC_TYPES_REGOFF_T_H
diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt
index 098fb6ef86936..ada489046ef9e 100644
--- a/libc/src/__support/CMakeLists.txt
+++ b/libc/src/__support/CMakeLists.txt
@@ -144,6 +144,14 @@ add_header_library(
libc.src.__support.CPP.string_view
)
+add_header_library(
+ alloc_checker
+ HDRS
+ alloc-checker.h
+ DEPENDS
+ libc.src.__support.macros.config
+)
+
add_header_library(
ctype_utils
HDRS
diff --git a/libc/src/regex/CMakeLists.txt b/libc/src/regex/CMakeLists.txt
index 4b3f8804c2a72..3db701645b26a 100644
--- a/libc/src/regex/CMakeLists.txt
+++ b/libc/src/regex/CMakeLists.txt
@@ -9,8 +9,9 @@ add_entrypoint_object(
libc.hdr.regex_macros
libc.src.__support.common
libc.src.__support.CPP.new
+ libc.src.__support.CPP.string_view
+ libc.src.__support.alloc_checker
libc.src.string.memory_utils.inline_memcpy
- libc.src.string.string_utils
)
add_entrypoint_object(
@@ -39,8 +40,8 @@ add_entrypoint_object(
libc.hdr.types.size_t
libc.hdr.regex_macros
libc.src.__support.common
+ libc.src.__support.CPP.string_view
libc.src.string.memory_utils.inline_memcpy
- libc.src.string.string_utils
)
add_entrypoint_object(
diff --git a/libc/src/regex/regcomp.cpp b/libc/src/regex/regcomp.cpp
index 49413d13e16b6..f4c339f70f9d4 100644
--- a/libc/src/regex/regcomp.cpp
+++ b/libc/src/regex/regcomp.cpp
@@ -15,11 +15,10 @@
#include "hdr/regex_macros.h"
#include "src/__support/CPP/new.h"
+#include "src/__support/CPP/string_view.h"
#include "src/__support/alloc-checker.h"
-#include "src/__support/common.h"
#include "src/__support/macros/config.h"
#include "src/string/memory_utils/inline_memcpy.h"
-#include "src/string/string_utils.h"
namespace LIBC_NAMESPACE_DECL {
@@ -33,7 +32,8 @@ LLVM_LIBC_FUNCTION(int, regcomp,
// object. We therefore do not attempt to free any previous __internal here
// — preg is uninitialized on first use and the pointer would be garbage.
- size_t len = internal::string_length(pattern);
+ cpp::string_view pattern_view(pattern);
+ size_t len = pattern_view.size();
AllocChecker ac;
char *copy = new (ac) char[len + 1];
if (!ac)
diff --git a/libc/src/regex/regerror.cpp b/libc/src/regex/regerror.cpp
index 8efd72376f494..87cd5211f9b6b 100644
--- a/libc/src/regex/regerror.cpp
+++ b/libc/src/regex/regerror.cpp
@@ -14,10 +14,10 @@
#include "src/regex/regerror.h"
#include "hdr/regex_macros.h"
+#include "src/__support/CPP/string_view.h"
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
#include "src/string/memory_utils/inline_memcpy.h"
-#include "src/string/string_utils.h"
namespace LIBC_NAMESPACE_DECL {
@@ -75,7 +75,7 @@ LLVM_LIBC_FUNCTION(size_t, regerror,
break;
}
- size_t msg_len = internal::string_length(msg) + 1; // include NUL
+ size_t msg_len = cpp::string_view(msg).size() + 1; // include NUL
if (errbuf_size > 0 && errbuf) {
size_t copy_len = msg_len < errbuf_size ? msg_len : errbuf_size;
diff --git a/libc/test/src/regex/CMakeLists.txt b/libc/test/src/regex/CMakeLists.txt
index 4807fcc85257e..915b8a5d42638 100644
--- a/libc/test/src/regex/CMakeLists.txt
+++ b/libc/test/src/regex/CMakeLists.txt
@@ -1,4 +1,4 @@
-if(LLVM_LIBC_FULL_BUILD)
+if(LLVM_LIBC_FULL_BUILD AND LLVM_LIBC_ENABLE_EXPERIMENTAL_ENTRYPOINTS)
add_custom_target(libc_regex_unittests)
add_libc_unittest(
@@ -8,6 +8,7 @@ if(LLVM_LIBC_FULL_BUILD)
SRCS
regerror_test.cpp
DEPENDS
+ libc.hdr.regex_macros
libc.src.regex.regerror
)
@@ -18,6 +19,8 @@ if(LLVM_LIBC_FULL_BUILD)
SRCS
regex_basic_test.cpp
DEPENDS
+ libc.hdr.regex_macros
+ libc.hdr.types.regex_t
libc.src.regex.regcomp
libc.src.regex.regexec
libc.src.regex.regfree
diff --git a/libc/test/src/regex/regerror_test.cpp b/libc/test/src/regex/regerror_test.cpp
index 28afc87b843ae..99bd2f71f1301 100644
--- a/libc/test/src/regex/regerror_test.cpp
+++ b/libc/test/src/regex/regerror_test.cpp
@@ -14,9 +14,9 @@
#include "src/regex/regerror.h"
#include "test/UnitTest/Test.h"
-#include "include/llvm-libc-macros/regex-macros.h"
+#include "hdr/regex_macros.h"
-TEST(LlvmLibcRegexTest, RegerrorAllCodes) {
+TEST(LlvmLibcRegexTest, RegerrorBasicCodes) {
char buf[128];
ASSERT_GT(LIBC_NAMESPACE::regerror(REG_NOMATCH, nullptr, buf, sizeof(buf)),
@@ -32,8 +32,7 @@ TEST(LlvmLibcRegexTest, RegerrorTruncation) {
char buf[5];
size_t needed =
LIBC_NAMESPACE::regerror(REG_NOMATCH, nullptr, buf, sizeof(buf));
- ASSERT_GT(needed, size_t(5)); // "No match" + NUL = 9 bytes
- ASSERT_EQ(buf[4], '\0'); // properly NUL-terminated
+ ASSERT_STREQ("No m", buf); // "No match" truncated to 5 bytes (including NUL)
}
TEST(LlvmLibcRegexTest, RegerrorZeroBuffer) {
diff --git a/libc/test/src/regex/regex_basic_test.cpp b/libc/test/src/regex/regex_basic_test.cpp
index 287cc7f66686f..ae5ad42a138e5 100644
--- a/libc/test/src/regex/regex_basic_test.cpp
+++ b/libc/test/src/regex/regex_basic_test.cpp
@@ -16,8 +16,8 @@
#include "src/regex/regfree.h"
#include "test/UnitTest/Test.h"
-#include "include/llvm-libc-macros/regex-macros.h"
-#include "include/llvm-libc-types/regex_t.h"
+#include "hdr/regex_macros.h"
+#include "hdr/types/regex_t.h"
TEST(LlvmLibcRegexTest, BasicLiteralRoundTrip) {
regex_t preg;
>From 644b5b6796e0affbb4758c28da37a852c0e471b5 Mon Sep 17 00:00:00 2001
From: Jeff Bailey <jbailey at raspberryginger.com>
Date: Thu, 14 May 2026 17:29:24 +0100
Subject: [PATCH 3/4] [libc][NFC] Remove extra blank lines in entrypoint files
(#196995)
Cleanup extra lines in arm, riscv, and x86_64 entrypoints.txt.
Assisted-by: Automated tooling, human reviewed.
---
libc/config/linux/arm/entrypoints.txt | 1 -
libc/config/linux/riscv/entrypoints.txt | 1 -
libc/config/linux/x86_64/entrypoints.txt | 1 -
3 files changed, 3 deletions(-)
diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt
index d34609e91bc72..49b30ef1830f3 100644
--- a/libc/config/linux/arm/entrypoints.txt
+++ b/libc/config/linux/arm/entrypoints.txt
@@ -219,7 +219,6 @@ if(LLVM_LIBC_FULL_BUILD)
# search.h entrypoints
libc.src.search.lfind
-
# setjmp.h entrypoints
libc.src.setjmp.longjmp
libc.src.setjmp.setjmp
diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
index fe9d94372da4f..9a4c6de8e092e 100644
--- a/libc/config/linux/riscv/entrypoints.txt
+++ b/libc/config/linux/riscv/entrypoints.txt
@@ -1316,7 +1316,6 @@ if(LLVM_LIBC_FULL_BUILD)
libc.src.search.lsearch
libc.src.search.remque
-
# threads.h entrypoints
libc.src.threads.call_once
libc.src.threads.cnd_broadcast
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index a7d37882ed4db..d1bb130a84fe1 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -1390,7 +1390,6 @@ if(LLVM_LIBC_FULL_BUILD)
libc.src.search.twalk
libc.src.search.twalk_r
-
# threads.h entrypoints
libc.src.threads.call_once
libc.src.threads.cnd_broadcast
>From 335a37d75f72dcda16afba5d16e261664a8f2904 Mon Sep 17 00:00:00 2001
From: Jeff Bailey <jbailey at raspberryginger.com>
Date: Mon, 18 May 2026 14:55:29 +0100
Subject: [PATCH 4/4] [libc] Address reviewer feedback on regex PR #196995
Addressed feedback from vonosmas and michaelrj-google:
* regerror.cpp: replaced raw const char* + break-heavy switch with
an immediately-invoked lambda captured into cpp::string_view.
Uses msg.size() and msg.data() throughout.
* regex.yaml: removed size_t from the types list; hdrgen infers it
from function signatures automatically.
* hdr/types/regex_t.h: added #else #error branch to match the
locale_t pattern, since regex_t contains internal state that is
incompatible with any system header. Fixed #endif comment to use
the preprocessor macro name LIBC_FULL_BUILD, not the CMake
variable LLVM_LIBC_FULL_BUILD.
* hdr/types/regoff_t.h, hdr/types/regmatch_t.h,
hdr/types/regex_t.h, hdr/regex_macros.h,
include/llvm-libc-types/regex_t.h,
include/llvm-libc-types/regmatch_t.h: added missing \file
Doxygen blocks.
* config/linux/aarch64/entrypoints.txt: removed extra blank line
between search.h and threads.h sections.
* test/src/regex/regerror_test.cpp: asserted that needed equals 9
(strlen("No match") + 1) in the truncation test.
* test/src/regex/regex_basic_test.cpp: added NullByteStopsParsing
test verifying regexec stops at an embedded NUL in the subject.
Assisted-by: Automated tooling, human reviewed.
---
libc/config/linux/aarch64/entrypoints.txt | 1 -
libc/hdr/regex_macros.h | 5 ++
libc/hdr/types/regex_t.h | 11 +++
libc/hdr/types/regmatch_t.h | 5 ++
libc/hdr/types/regoff_t.h | 5 ++
libc/include/llvm-libc-types/regex_t.h | 5 ++
libc/include/llvm-libc-types/regmatch_t.h | 5 ++
libc/include/regex.yaml | 1 -
libc/src/regex/regerror.cpp | 86 ++++++++++-------------
libc/test/src/regex/regerror_test.cpp | 1 +
libc/test/src/regex/regex_basic_test.cpp | 9 +++
11 files changed, 82 insertions(+), 52 deletions(-)
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 6cb251ebae047..e1fd41779c898 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -1193,7 +1193,6 @@ if(LLVM_LIBC_FULL_BUILD)
libc.src.search.twalk
libc.src.search.twalk_r
-
# threads.h entrypoints
libc.src.threads.call_once
libc.src.threads.cnd_broadcast
diff --git a/libc/hdr/regex_macros.h b/libc/hdr/regex_macros.h
index 968cb68394e94..74b5d4be20ff5 100644
--- a/libc/hdr/regex_macros.h
+++ b/libc/hdr/regex_macros.h
@@ -5,6 +5,11 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Proxy header for regex macros.
+///
+//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_HDR_REGEX_MACROS_H
#define LLVM_LIBC_HDR_REGEX_MACROS_H
diff --git a/libc/hdr/types/regex_t.h b/libc/hdr/types/regex_t.h
index f12e440b04668..fccdce8073677 100644
--- a/libc/hdr/types/regex_t.h
+++ b/libc/hdr/types/regex_t.h
@@ -5,12 +5,23 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Proxy header for regex_t.
+///
+//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_HDR_TYPES_REGEX_T_H
#define LLVM_LIBC_HDR_TYPES_REGEX_T_H
#ifdef LIBC_FULL_BUILD
+
#include "include/llvm-libc-types/regex_t.h"
+
+#else // overlay mode
+
+#error "type not available in overlay mode"
+
#endif // LIBC_FULL_BUILD
#endif // LLVM_LIBC_HDR_TYPES_REGEX_T_H
diff --git a/libc/hdr/types/regmatch_t.h b/libc/hdr/types/regmatch_t.h
index 7a271b067878f..b26129ac7d562 100644
--- a/libc/hdr/types/regmatch_t.h
+++ b/libc/hdr/types/regmatch_t.h
@@ -5,6 +5,11 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Proxy header for regmatch_t.
+///
+//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_HDR_TYPES_REGMATCH_T_H
#define LLVM_LIBC_HDR_TYPES_REGMATCH_T_H
diff --git a/libc/hdr/types/regoff_t.h b/libc/hdr/types/regoff_t.h
index 77a7e6ef4ca09..dce512f86778f 100644
--- a/libc/hdr/types/regoff_t.h
+++ b/libc/hdr/types/regoff_t.h
@@ -5,6 +5,11 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Proxy header for regoff_t.
+///
+//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_HDR_TYPES_REGOFF_T_H
#define LLVM_LIBC_HDR_TYPES_REGOFF_T_H
diff --git a/libc/include/llvm-libc-types/regex_t.h b/libc/include/llvm-libc-types/regex_t.h
index 1ca0f63908306..6f47c8150056f 100644
--- a/libc/include/llvm-libc-types/regex_t.h
+++ b/libc/include/llvm-libc-types/regex_t.h
@@ -5,6 +5,11 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Type definition for regex_t.
+///
+//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_TYPES_REGEX_T_H
#define LLVM_LIBC_TYPES_REGEX_T_H
diff --git a/libc/include/llvm-libc-types/regmatch_t.h b/libc/include/llvm-libc-types/regmatch_t.h
index 5c178380e2569..0adaa24229b6f 100644
--- a/libc/include/llvm-libc-types/regmatch_t.h
+++ b/libc/include/llvm-libc-types/regmatch_t.h
@@ -5,6 +5,11 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Type definition for regmatch_t.
+///
+//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_TYPES_REGMATCH_T_H
#define LLVM_LIBC_TYPES_REGMATCH_T_H
diff --git a/libc/include/regex.yaml b/libc/include/regex.yaml
index 8fe77257f07be..baa7e39c9dd10 100644
--- a/libc/include/regex.yaml
+++ b/libc/include/regex.yaml
@@ -44,7 +44,6 @@ types:
- type_name: regex_t
- type_name: regmatch_t
- type_name: regoff_t
- - type_name: size_t
enums: []
objects: []
functions:
diff --git a/libc/src/regex/regerror.cpp b/libc/src/regex/regerror.cpp
index 87cd5211f9b6b..cc42d476687f4 100644
--- a/libc/src/regex/regerror.cpp
+++ b/libc/src/regex/regerror.cpp
@@ -26,60 +26,46 @@ LLVM_LIBC_FUNCTION(size_t, regerror,
char *__restrict errbuf, size_t errbuf_size)) {
(void)preg; // preg is reserved for implementation-specific messages.
- const char *msg;
- switch (errcode) {
- case 0:
- msg = "Success";
- break;
- case REG_NOMATCH:
- msg = "No match";
- break;
- case REG_BADPAT:
- msg = "Invalid regular expression";
- break;
- case REG_ECOLLATE:
- msg = "Invalid collating element";
- break;
- case REG_ECTYPE:
- msg = "Invalid character class";
- break;
- case REG_EESCAPE:
- msg = "Trailing backslash";
- break;
- case REG_ESUBREG:
- msg = "Invalid backreference";
- break;
- case REG_EBRACK:
- msg = "Missing ']'";
- break;
- case REG_EPAREN:
- msg = "Missing ')'";
- break;
- case REG_EBRACE:
- msg = "Missing '}'";
- break;
- case REG_BADBR:
- msg = "Invalid repetition count";
- break;
- case REG_ERANGE:
- msg = "Invalid range end";
- break;
- case REG_ESPACE:
- msg = "Out of memory";
- break;
- case REG_BADRPT:
- msg = "Invalid preceding expression";
- break;
- default:
- msg = "Unknown error";
- break;
- }
+ cpp::string_view msg = [errcode]() -> const char * {
+ switch (errcode) {
+ case 0:
+ return "Success";
+ case REG_NOMATCH:
+ return "No match";
+ case REG_BADPAT:
+ return "Invalid regular expression";
+ case REG_ECOLLATE:
+ return "Invalid collating element";
+ case REG_ECTYPE:
+ return "Invalid character class";
+ case REG_EESCAPE:
+ return "Trailing backslash";
+ case REG_ESUBREG:
+ return "Invalid backreference";
+ case REG_EBRACK:
+ return "Missing ']'";
+ case REG_EPAREN:
+ return "Missing ')'";
+ case REG_EBRACE:
+ return "Missing '}'";
+ case REG_BADBR:
+ return "Invalid repetition count";
+ case REG_ERANGE:
+ return "Invalid range end";
+ case REG_ESPACE:
+ return "Out of memory";
+ case REG_BADRPT:
+ return "Invalid preceding expression";
+ default:
+ return "Unknown error";
+ }
+ }();
- size_t msg_len = cpp::string_view(msg).size() + 1; // include NUL
+ size_t msg_len = msg.size() + 1; // include NUL
if (errbuf_size > 0 && errbuf) {
size_t copy_len = msg_len < errbuf_size ? msg_len : errbuf_size;
- inline_memcpy(errbuf, msg, copy_len - 1);
+ inline_memcpy(errbuf, msg.data(), copy_len - 1);
errbuf[copy_len - 1] = '\0';
}
// POSIX requires returning the size needed to hold the full NUL-terminated
diff --git a/libc/test/src/regex/regerror_test.cpp b/libc/test/src/regex/regerror_test.cpp
index 99bd2f71f1301..d7010aa0b9fc8 100644
--- a/libc/test/src/regex/regerror_test.cpp
+++ b/libc/test/src/regex/regerror_test.cpp
@@ -32,6 +32,7 @@ TEST(LlvmLibcRegexTest, RegerrorTruncation) {
char buf[5];
size_t needed =
LIBC_NAMESPACE::regerror(REG_NOMATCH, nullptr, buf, sizeof(buf));
+ ASSERT_EQ(needed, size_t(9)); // strlen("No match") + 1
ASSERT_STREQ("No m", buf); // "No match" truncated to 5 bytes (including NUL)
}
diff --git a/libc/test/src/regex/regex_basic_test.cpp b/libc/test/src/regex/regex_basic_test.cpp
index ae5ad42a138e5..fc598227665cc 100644
--- a/libc/test/src/regex/regex_basic_test.cpp
+++ b/libc/test/src/regex/regex_basic_test.cpp
@@ -66,3 +66,12 @@ TEST(LlvmLibcRegexTest, ExactMatch) {
ASSERT_EQ(0, LIBC_NAMESPACE::regexec(&preg, "test", 0, nullptr, 0));
LIBC_NAMESPACE::regfree(&preg);
}
+
+TEST(LlvmLibcRegexTest, NullByteStopsParsing) {
+ regex_t preg;
+ ASSERT_EQ(0,
+ LIBC_NAMESPACE::regcomp(&preg, "match", REG_EXTENDED | REG_NOSUB));
+ ASSERT_EQ(REG_NOMATCH,
+ LIBC_NAMESPACE::regexec(&preg, "doesn't \0 match", 0, nullptr, 0));
+ LIBC_NAMESPACE::regfree(&preg);
+}
More information about the libc-commits
mailing list