[libc] [llvm] Refactor WIDE_READ to allow finer control over high-performance function selection (PR #165613)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 20 16:00:03 PST 2025
https://github.com/Sterling-Augustine updated https://github.com/llvm/llvm-project/pull/165613
>From d7b5e8b22ce2a0bfb7740d4124f836ec0d7dfc55 Mon Sep 17 00:00:00 2001
From: Sterling Augustine <saugustine at google.com>
Date: Thu, 23 Oct 2025 09:58:48 -0700
Subject: [PATCH 01/11] Refactor WIDE_READ mechanism to allow finer control
over function selection
---
.../modules/LLVMLibCCompileOptionRules.cmake | 5 +-
libc/config/config.json | 11 +-
libc/config/linux/arm/config.json | 7 +-
libc/config/linux/config.json | 7 +-
libc/config/linux/riscv/config.json | 7 +-
libc/docs/configure.rst | 3 +-
.../memory_utils/aarch64/inline_strlen.h | 6 +-
.../memory_utils/generic/inline_strlen.h | 5 +-
.../memory_utils/x86_64/inline_strlen.h | 14 +-
libc/src/string/string_utils.h | 138 +++++++++++-------
.../libc/libc_configure_options.bzl | 3 +-
11 files changed, 126 insertions(+), 80 deletions(-)
diff --git a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
index 4e9a9b66a63a7..f4e2a62d14b31 100644
--- a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
+++ b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
@@ -81,9 +81,8 @@ function(_get_compile_options_from_config output_var)
list(APPEND config_options "-DLIBC_QSORT_IMPL=${LIBC_CONF_QSORT_IMPL}")
endif()
- if(LIBC_CONF_STRING_UNSAFE_WIDE_READ)
- list(APPEND config_options "-DLIBC_COPT_STRING_UNSAFE_WIDE_READ")
- endif()
+ list(APPEND config_options "-DLIBC_COPT_STRING_LENGTH_IMPL=${LIBC_CONF_STRING_LENGTH_IMPL}")
+ list(APPEND config_options "-DLIBC_COPT_FIND_FIRST_CHARACTER_IMPL=${LIBC_CONF_FIND_FIRST_CHARACTER_IMPL}")
if(LIBC_CONF_MEMSET_X86_USE_SOFTWARE_PREFETCHING)
list(APPEND config_options "-DLIBC_COPT_MEMSET_X86_USE_SOFTWARE_PREFETCHING")
diff --git a/libc/config/config.json b/libc/config/config.json
index cfbe9a43948ea..12596a00911e2 100644
--- a/libc/config/config.json
+++ b/libc/config/config.json
@@ -40,6 +40,7 @@
"value": false,
"doc": "Use an alternative printf float implementation based on 320-bit floats"
},
+
"LIBC_CONF_PRINTF_DISABLE_FIXED_POINT": {
"value": false,
"doc": "Disable printing fixed point values in printf and friends."
@@ -64,9 +65,13 @@
}
},
"string": {
- "LIBC_CONF_STRING_UNSAFE_WIDE_READ": {
- "value": false,
- "doc": "Read more than a byte at a time to perform byte-string operations like strlen."
+ "LIBC_CONF_STRING_LENGTH_IMPL": {
+ "value": "element",
+ "doc": "Selects the implementation for string-length: 'element', 'wide', 'generic' (vector), or 'arch'."
+ },
+ "LIBC_CONF_FIND_FIRST_CHARACTER_IMPL": {
+ "value": "element",
+ "doc": "Selects the implementation for find-first-character-related functions: 'element', 'wide', 'generic' (vector), or 'arch'."
},
"LIBC_CONF_MEMSET_X86_USE_SOFTWARE_PREFETCHING": {
"value": false,
diff --git a/libc/config/linux/arm/config.json b/libc/config/linux/arm/config.json
index e7ad4544b104d..caa16744d389f 100644
--- a/libc/config/linux/arm/config.json
+++ b/libc/config/linux/arm/config.json
@@ -1,7 +1,10 @@
{
"string": {
- "LIBC_CONF_STRING_UNSAFE_WIDE_READ": {
- "value": false
+ "LIBC_CONF_STRING_LENGTH_IMPL": {
+ "value": "element"
+ }
+ "LIBC_CONF_FIND_FIRST_CHARACTER_IMPL": {
+ "value": "element"
}
}
}
diff --git a/libc/config/linux/config.json b/libc/config/linux/config.json
index 30e8b2cdadabe..93f5a1ef1f184 100644
--- a/libc/config/linux/config.json
+++ b/libc/config/linux/config.json
@@ -1,7 +1,10 @@
{
"string": {
- "LIBC_CONF_STRING_UNSAFE_WIDE_READ": {
- "value": true
+ "LIBC_CONF_STRING_LENGTH_IMPL": {
+ "value": "generic",
+ },
+ "LIBC_CONF_FIND_FIRST_CHARACTER_IMPL": {
+ "value": "wide",
}
}
}
diff --git a/libc/config/linux/riscv/config.json b/libc/config/linux/riscv/config.json
index e7ad4544b104d..caa16744d389f 100644
--- a/libc/config/linux/riscv/config.json
+++ b/libc/config/linux/riscv/config.json
@@ -1,7 +1,10 @@
{
"string": {
- "LIBC_CONF_STRING_UNSAFE_WIDE_READ": {
- "value": false
+ "LIBC_CONF_STRING_LENGTH_IMPL": {
+ "value": "element"
+ }
+ "LIBC_CONF_FIND_FIRST_CHARACTER_IMPL": {
+ "value": "element"
}
}
}
diff --git a/libc/docs/configure.rst b/libc/docs/configure.rst
index e23fc824ce7c8..3049738aff6e7 100644
--- a/libc/docs/configure.rst
+++ b/libc/docs/configure.rst
@@ -58,8 +58,9 @@ to learn about the defaults for your platform and target.
* **"setjmp" options**
- ``LIBC_CONF_SETJMP_AARCH64_RESTORE_PLATFORM_REGISTER``: Make setjmp save the value of x18, and longjmp restore it. The AArch64 ABI delegates this register to platform ABIs, which can choose whether to make it caller-saved.
* **"string" options**
+ - ``LIBC_CONF_FIND_FIRST_CHARACTER_IMPL``: Selects the implementation for find-first-character-related functions: 'element', 'wide', 'generic' (vector), or 'arch'.
- ``LIBC_CONF_MEMSET_X86_USE_SOFTWARE_PREFETCHING``: Inserts prefetch for write instructions (PREFETCHW) for memset on x86 to recover performance when hardware prefetcher is disabled.
- - ``LIBC_CONF_STRING_UNSAFE_WIDE_READ``: Read more than a byte at a time to perform byte-string operations like strlen.
+ - ``LIBC_CONF_STRING_LENGTH_IMPL``: Selects the implementation for string-length: 'element', 'wide', 'generic' (vector), or 'arch'.
* **"threads" options**
- ``LIBC_CONF_THREAD_MODE``: The implementation used for Mutex, acceptable values are LIBC_THREAD_MODE_PLATFORM, LIBC_THREAD_MODE_SINGLE, and LIBC_THREAD_MODE_EXTERNAL.
* **"time" options**
diff --git a/libc/src/string/memory_utils/aarch64/inline_strlen.h b/libc/src/string/memory_utils/aarch64/inline_strlen.h
index 87f5ccdd56e23..b39df3e474669 100644
--- a/libc/src/string/memory_utils/aarch64/inline_strlen.h
+++ b/libc/src/string/memory_utils/aarch64/inline_strlen.h
@@ -16,7 +16,7 @@
namespace LIBC_NAMESPACE_DECL {
-namespace neon {
+namespace arch {
[[maybe_unused]] LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE static size_t
string_length(const char *src) {
using Vector __attribute__((may_alias)) = uint8x8_t;
@@ -44,9 +44,7 @@ string_length(const char *src) {
(cpp::countr_zero(cmp) >> 3));
}
}
-} // namespace neon
-
-namespace string_length_impl = neon;
+} // namespace arch
} // namespace LIBC_NAMESPACE_DECL
#endif // __ARM_NEON
diff --git a/libc/src/string/memory_utils/generic/inline_strlen.h b/libc/src/string/memory_utils/generic/inline_strlen.h
index 69700e801bcea..7630c0b7caedf 100644
--- a/libc/src/string/memory_utils/generic/inline_strlen.h
+++ b/libc/src/string/memory_utils/generic/inline_strlen.h
@@ -14,7 +14,7 @@
#include "src/__support/common.h"
namespace LIBC_NAMESPACE_DECL {
-namespace internal {
+namespace generic {
// Exploit the underlying integer representation to do a variable shift.
LIBC_INLINE constexpr cpp::simd_mask<char> shift_mask(cpp::simd_mask<char> m,
@@ -46,9 +46,8 @@ LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE size_t string_length(const char *src) {
cpp::find_first_set(mask);
}
}
-} // namespace internal
+} // namespace generic
-namespace string_length_impl = internal;
} // namespace LIBC_NAMESPACE_DECL
#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_INLINE_STRLEN_H
diff --git a/libc/src/string/memory_utils/x86_64/inline_strlen.h b/libc/src/string/memory_utils/x86_64/inline_strlen.h
index 9e10d58363393..3d93960605f0c 100644
--- a/libc/src/string/memory_utils/x86_64/inline_strlen.h
+++ b/libc/src/string/memory_utils/x86_64/inline_strlen.h
@@ -15,7 +15,8 @@
namespace LIBC_NAMESPACE_DECL {
-namespace string_length_internal {
+namespace internal::arch {
+
// Return a bit-mask with the nth bit set if the nth-byte in block_ptr is zero.
template <typename Vector, typename Mask>
LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE static Mask
@@ -92,15 +93,18 @@ namespace avx512 {
}
} // namespace avx512
#endif
-} // namespace string_length_internal
+[[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) {
#if defined(__AVX512F__)
-namespace string_length_impl = string_length_internal::avx512;
+ return avx512::string_length(src);
#elif defined(__AVX2__)
-namespace string_length_impl = string_length_internal::avx2;
+ return avx2::string_length(src);
#else
-namespace string_length_impl = string_length_internal::sse2;
+ return sse2::string_length(src);
#endif
+}
+
+} // namespace internal::arch
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index 7feef56fb3676..b9f020ca9097c 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -24,21 +24,56 @@
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
#include "src/string/memory_utils/inline_memcpy.h"
-#if defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ)
#if LIBC_HAS_VECTOR_TYPE
#include "src/string/memory_utils/generic/inline_strlen.h"
-#elif defined(LIBC_TARGET_ARCH_IS_X86)
+#endif
+#if defined(LIBC_TARGET_ARCH_IS_X86)
#include "src/string/memory_utils/x86_64/inline_strlen.h"
-#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_NEON)
+#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
#include "src/string/memory_utils/aarch64/inline_strlen.h"
-#else
-namespace string_length_impl = LIBC_NAMESPACE::wide_read;
#endif
-#endif // defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ)
namespace LIBC_NAMESPACE_DECL {
namespace internal {
+#if !LIBC_HAS_VECTOR_TYPE
+// Foreword any generic vector impls to architecture specific ones
+namespace arch {}
+namespace generic = arch;
+#endif
+
+namespace element {
+// Element-by-element (usually a byte, but wider for wchar) implementations of
+// functions that search for data. Slow, but easy to understand and analyze.
+
+// Returns the length of a string, denoted by the first occurrence
+// of a null terminator.
+LIBC_INLINE size_t string_length(const char *src) {
+ size_t length;
+ for (length = 0; *src; ++src, ++length)
+ ;
+ return length;
+}
+
+template <typename T> LIBC_INLINE size_t string_length_element(const T *src) {
+ size_t length;
+ for (length = 0; *src; ++src, ++length)
+ ;
+ return length;
+}
+
+LIBC_INLINE void *find_first_character(const unsigned char *src,
+ unsigned char ch, size_t n) {
+ for (; n && *src != ch; --n, ++src)
+ ;
+ return n ? const_cast<unsigned char *>(src) : nullptr;
+}
+} // namespace element
+
+namespace wide {
+// Generic, non-vector, implementations of functions that search for data
+// by reading from memory block-by-block.
+
template <typename Word> LIBC_INLINE constexpr Word repeat_byte(Word byte) {
static_assert(CHAR_BIT == 8, "repeat_byte assumes a byte is 8 bits.");
constexpr size_t BITS_IN_BYTE = CHAR_BIT;
@@ -74,8 +109,13 @@ template <typename Word> LIBC_INLINE constexpr bool has_zeroes(Word block) {
return (subtracted & inverted & HIGH_BITS) != 0;
}
-template <typename Word>
-LIBC_INLINE size_t string_length_wide_read(const char *src) {
+// Unsigned int is the default size for most processors, and on x86-64 it
+// performs better than larger sizes when the src pointer can't be assumed to
+// be aligned to a word boundary, so it's the size we use for reading the
+// string a block at a time.
+
+LIBC_INLINE size_t string_length(const char *src) {
+ using Word = unsigned int;
const char *char_ptr = src;
// Step 1: read 1 byte at a time to align to block size
for (; reinterpret_cast<uintptr_t>(char_ptr) % sizeof(Word) != 0;
@@ -95,37 +135,23 @@ LIBC_INLINE size_t string_length_wide_read(const char *src) {
return static_cast<size_t>(char_ptr - src);
}
-namespace wide_read {
-LIBC_INLINE size_t string_length(const char *src) {
- // Unsigned int is the default size for most processors, and on x86-64 it
- // performs better than larger sizes when the src pointer can't be assumed to
- // be aligned to a word boundary, so it's the size we use for reading the
- // string a block at a time.
- return string_length_wide_read<unsigned int>(src);
-}
-
-} // namespace wide_read
-
-// Returns the length of a string, denoted by the first occurrence
-// of a null terminator.
-template <typename T> LIBC_INLINE size_t string_length(const T *src) {
-#ifdef LIBC_COPT_STRING_UNSAFE_WIDE_READ
- if constexpr (cpp::is_same_v<T, char>)
- return string_length_impl::string_length(src);
-#endif
- size_t length;
- for (length = 0; *src; ++src, ++length)
- ;
- return length;
-}
-
-template <typename Word>
LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE void *
-find_first_character_wide_read(const unsigned char *src, unsigned char ch,
- size_t n) {
+find_first_character(const unsigned char *src, unsigned char ch,
+ size_t max_strlen = cpp::numeric_limits<size_t>::max()) {
+ using Word = unsigned int;
const unsigned char *char_ptr = src;
size_t cur = 0;
+ // If the maximum size of the string is small, the overhead of aligning to a
+ // word boundary and generating a bitmask of the appropriate size may be
+ // greater than the gains from reading larger chunks. Based on some testing,
+ // the crossover point between when it's faster to just read bytewise and read
+ // blocks is somewhere between 16 and 32, so 4 times the size of the block
+ // should be in that range.
+ if (max_strlen < (sizeof(Word) * 4)) {
+ return element::find_first_character(src, ch, max_strlen);
+ }
+ size_t n = max_strlen;
// Step 1: read 1 byte at a time to align to block size
for (; reinterpret_cast<uintptr_t>(char_ptr) % sizeof(Word) != 0 && cur < n;
++char_ptr, ++cur) {
@@ -153,31 +179,35 @@ find_first_character_wide_read(const unsigned char *src, unsigned char ch,
return const_cast<unsigned char *>(char_ptr);
}
-LIBC_INLINE void *find_first_character_byte_read(const unsigned char *src,
- unsigned char ch, size_t n) {
- for (; n && *src != ch; --n, ++src)
- ;
- return n ? const_cast<unsigned char *>(src) : nullptr;
+} // namespace wide
+
+// Dispatch mechanism for implementations of performance-sensitive
+// functions. Always measure, but generally from lower- to higher-performance
+// order:
+//
+// 1. element - read char-by-char or wchar-by-wchar
+// 3. wide - read word-by-word
+// 3. generic - read using clang's internal vector types
+// 4. arch - hand-coded per architecture. Possibly in asm, or with intrinsics.
+//
+//The called implemenation is chosen at build-time by setting
+// LIBC_CONF_{FUNC}_IMPL in config.json
+static constexpr auto &string_length_impl =
+ LIBC_COPT_STRING_LENGTH_IMPL::string_length;
+static constexpr auto &find_first_character_impl =
+ LIBC_COPT_FIND_FIRST_CHARACTER_IMPL::find_first_character;
+
+template <typename T> LIBC_INLINE size_t string_length(const T *src) {
+ if constexpr (cpp::is_same_v<T, char>)
+ return string_length_impl(src);
+ return element::string_length_element<T>(src);
}
// Returns the first occurrence of 'ch' within the first 'n' characters of
// 'src'. If 'ch' is not found, returns nullptr.
LIBC_INLINE void *find_first_character(const unsigned char *src,
unsigned char ch, size_t max_strlen) {
-#ifdef LIBC_COPT_STRING_UNSAFE_WIDE_READ
- // If the maximum size of the string is small, the overhead of aligning to a
- // word boundary and generating a bitmask of the appropriate size may be
- // greater than the gains from reading larger chunks. Based on some testing,
- // the crossover point between when it's faster to just read bytewise and read
- // blocks is somewhere between 16 and 32, so 4 times the size of the block
- // should be in that range.
- // Unsigned int is used for the same reason as in strlen.
- using BlockType = unsigned int;
- if (max_strlen > (sizeof(BlockType) * 4)) {
- return find_first_character_wide_read<BlockType>(src, ch, max_strlen);
- }
-#endif
- return find_first_character_byte_read(src, ch, max_strlen);
+ return find_first_character_impl(src, ch, max_strlen);
}
// Returns the maximum length span that contains only characters not found in
diff --git a/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl b/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl
index 259d4d292fcf4..6166f52f80f8b 100644
--- a/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl
+++ b/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl
@@ -39,7 +39,8 @@ LIBC_CONFIGURE_OPTIONS = [
# "LIBC_COPT_SCANF_DISABLE_FLOAT",
# "LIBC_COPT_SCANF_DISABLE_INDEX_MODE",
"LIBC_COPT_STDIO_USE_SYSTEM_FILE",
- "LIBC_COPT_STRING_UNSAFE_WIDE_READ",
+ "LIBC_COPT_STRING_LENGTH_IMPL=generic",
+ "LIBC_COPT_FIND_FIRST_CHARACTER_IMPL=wide",
# "LIBC_COPT_STRTOFLOAT_DISABLE_CLINGER_FAST_PATH",
# "LIBC_COPT_STRTOFLOAT_DISABLE_EISEL_LEMIRE",
# "LIBC_COPT_STRTOFLOAT_DISABLE_SIMPLE_DECIMAL_CONVERSION",
>From 5df3ff461f3c33c5ddd678f986feb06f58339661 Mon Sep 17 00:00:00 2001
From: Sterling Augustine <saugustine at google.com>
Date: Wed, 29 Oct 2025 12:01:35 -0700
Subject: [PATCH 02/11] Fix formatting
---
libc/src/string/string_utils.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index b9f020ca9097c..833879795a236 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -190,7 +190,7 @@ find_first_character(const unsigned char *src, unsigned char ch,
// 3. generic - read using clang's internal vector types
// 4. arch - hand-coded per architecture. Possibly in asm, or with intrinsics.
//
-//The called implemenation is chosen at build-time by setting
+// The called implemenation is chosen at build-time by setting
// LIBC_CONF_{FUNC}_IMPL in config.json
static constexpr auto &string_length_impl =
LIBC_COPT_STRING_LENGTH_IMPL::string_length;
>From 051f4ab0d3b5ed2078427723745e40fb5fe1c7e5 Mon Sep 17 00:00:00 2001
From: Sterling Augustine <saugustine at google.com>
Date: Fri, 31 Oct 2025 13:19:05 -0700
Subject: [PATCH 03/11] Fix aarch64 build.
---
libc/src/string/memory_utils/aarch64/inline_strlen.h | 4 ++--
libc/src/string/string_utils.h | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/libc/src/string/memory_utils/aarch64/inline_strlen.h b/libc/src/string/memory_utils/aarch64/inline_strlen.h
index b39df3e474669..f7385ce75a813 100644
--- a/libc/src/string/memory_utils/aarch64/inline_strlen.h
+++ b/libc/src/string/memory_utils/aarch64/inline_strlen.h
@@ -16,7 +16,7 @@
namespace LIBC_NAMESPACE_DECL {
-namespace arch {
+namespace internal::arch {
[[maybe_unused]] LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE static size_t
string_length(const char *src) {
using Vector __attribute__((may_alias)) = uint8x8_t;
@@ -44,7 +44,7 @@ string_length(const char *src) {
(cpp::countr_zero(cmp) >> 3));
}
}
-} // namespace arch
+} // namespace internal::arch
} // namespace LIBC_NAMESPACE_DECL
#endif // __ARM_NEON
diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index 833879795a236..f4cecc5973978 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -37,7 +37,7 @@ namespace LIBC_NAMESPACE_DECL {
namespace internal {
#if !LIBC_HAS_VECTOR_TYPE
-// Foreword any generic vector impls to architecture specific ones
+// Forward any generic vector impls to architecture specific ones
namespace arch {}
namespace generic = arch;
#endif
>From 303929b6307301611b3662d1efd80e74da9f6f42 Mon Sep 17 00:00:00 2001
From: Sterling Augustine <saugustine at google.com>
Date: Tue, 18 Nov 2025 13:52:17 -0800
Subject: [PATCH 04/11] Rename namespaces to match review sugestions.
laksfdj
---
libc/config/config.json | 4 ++--
libc/config/linux/config.json | 4 ++--
libc/docs/configure.rst | 4 ++--
.../memory_utils/aarch64/inline_strlen.h | 4 ++--
.../memory_utils/generic/inline_strlen.h | 4 ++--
.../memory_utils/x86_64/inline_strlen.h | 4 ++--
libc/src/string/string_utils.h | 21 ++++++++++---------
.../libc/libc_configure_options.bzl | 4 ++--
8 files changed, 25 insertions(+), 24 deletions(-)
diff --git a/libc/config/config.json b/libc/config/config.json
index 12596a00911e2..71384c98083c2 100644
--- a/libc/config/config.json
+++ b/libc/config/config.json
@@ -67,11 +67,11 @@
"string": {
"LIBC_CONF_STRING_LENGTH_IMPL": {
"value": "element",
- "doc": "Selects the implementation for string-length: 'element', 'wide', 'generic' (vector), or 'arch'."
+ "doc": "Selects the implementation for string-length: 'element', 'word', 'clang_vector', or 'arch_vector'."
},
"LIBC_CONF_FIND_FIRST_CHARACTER_IMPL": {
"value": "element",
- "doc": "Selects the implementation for find-first-character-related functions: 'element', 'wide', 'generic' (vector), or 'arch'."
+ "doc": "Selects the implementation for find-first-character-related functions: 'element', 'word', 'clang_vector', or 'arch_vector'."
},
"LIBC_CONF_MEMSET_X86_USE_SOFTWARE_PREFETCHING": {
"value": false,
diff --git a/libc/config/linux/config.json b/libc/config/linux/config.json
index 93f5a1ef1f184..8e7db248dc1bd 100644
--- a/libc/config/linux/config.json
+++ b/libc/config/linux/config.json
@@ -1,10 +1,10 @@
{
"string": {
"LIBC_CONF_STRING_LENGTH_IMPL": {
- "value": "generic",
+ "value": "clang_vector",
},
"LIBC_CONF_FIND_FIRST_CHARACTER_IMPL": {
- "value": "wide",
+ "value": "word",
}
}
}
diff --git a/libc/docs/configure.rst b/libc/docs/configure.rst
index 3049738aff6e7..ff1f82f932521 100644
--- a/libc/docs/configure.rst
+++ b/libc/docs/configure.rst
@@ -58,9 +58,9 @@ to learn about the defaults for your platform and target.
* **"setjmp" options**
- ``LIBC_CONF_SETJMP_AARCH64_RESTORE_PLATFORM_REGISTER``: Make setjmp save the value of x18, and longjmp restore it. The AArch64 ABI delegates this register to platform ABIs, which can choose whether to make it caller-saved.
* **"string" options**
- - ``LIBC_CONF_FIND_FIRST_CHARACTER_IMPL``: Selects the implementation for find-first-character-related functions: 'element', 'wide', 'generic' (vector), or 'arch'.
+ - ``LIBC_CONF_FIND_FIRST_CHARACTER_IMPL``: Selects the implementation for find-first-character-related functions: 'element', 'word', 'clang_vector', or 'arch_vector'.
- ``LIBC_CONF_MEMSET_X86_USE_SOFTWARE_PREFETCHING``: Inserts prefetch for write instructions (PREFETCHW) for memset on x86 to recover performance when hardware prefetcher is disabled.
- - ``LIBC_CONF_STRING_LENGTH_IMPL``: Selects the implementation for string-length: 'element', 'wide', 'generic' (vector), or 'arch'.
+ - ``LIBC_CONF_STRING_LENGTH_IMPL``: Selects the implementation for string-length: 'element', 'word', 'clang_vector', or 'arch_vector'.
* **"threads" options**
- ``LIBC_CONF_THREAD_MODE``: The implementation used for Mutex, acceptable values are LIBC_THREAD_MODE_PLATFORM, LIBC_THREAD_MODE_SINGLE, and LIBC_THREAD_MODE_EXTERNAL.
* **"time" options**
diff --git a/libc/src/string/memory_utils/aarch64/inline_strlen.h b/libc/src/string/memory_utils/aarch64/inline_strlen.h
index f7385ce75a813..78330fe0876c4 100644
--- a/libc/src/string/memory_utils/aarch64/inline_strlen.h
+++ b/libc/src/string/memory_utils/aarch64/inline_strlen.h
@@ -16,7 +16,7 @@
namespace LIBC_NAMESPACE_DECL {
-namespace internal::arch {
+namespace internal::arch_vector {
[[maybe_unused]] LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE static size_t
string_length(const char *src) {
using Vector __attribute__((may_alias)) = uint8x8_t;
@@ -44,7 +44,7 @@ string_length(const char *src) {
(cpp::countr_zero(cmp) >> 3));
}
}
-} // namespace internal::arch
+} // namespace internal::arch_vector
} // namespace LIBC_NAMESPACE_DECL
#endif // __ARM_NEON
diff --git a/libc/src/string/memory_utils/generic/inline_strlen.h b/libc/src/string/memory_utils/generic/inline_strlen.h
index 7630c0b7caedf..7a565b36617ed 100644
--- a/libc/src/string/memory_utils/generic/inline_strlen.h
+++ b/libc/src/string/memory_utils/generic/inline_strlen.h
@@ -14,7 +14,7 @@
#include "src/__support/common.h"
namespace LIBC_NAMESPACE_DECL {
-namespace generic {
+namespace clang_vector {
// Exploit the underlying integer representation to do a variable shift.
LIBC_INLINE constexpr cpp::simd_mask<char> shift_mask(cpp::simd_mask<char> m,
@@ -46,7 +46,7 @@ LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE size_t string_length(const char *src) {
cpp::find_first_set(mask);
}
}
-} // namespace generic
+} // namespace clang_vector
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/string/memory_utils/x86_64/inline_strlen.h b/libc/src/string/memory_utils/x86_64/inline_strlen.h
index 3d93960605f0c..07b4a470f0d77 100644
--- a/libc/src/string/memory_utils/x86_64/inline_strlen.h
+++ b/libc/src/string/memory_utils/x86_64/inline_strlen.h
@@ -15,7 +15,7 @@
namespace LIBC_NAMESPACE_DECL {
-namespace internal::arch {
+namespace internal::arch_vector {
// Return a bit-mask with the nth bit set if the nth-byte in block_ptr is zero.
template <typename Vector, typename Mask>
@@ -104,7 +104,7 @@ namespace avx512 {
#endif
}
-} // namespace internal::arch
+} // namespace internal::arch_vector
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index f4cecc5973978..7325bf10fb644 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -37,9 +37,9 @@ namespace LIBC_NAMESPACE_DECL {
namespace internal {
#if !LIBC_HAS_VECTOR_TYPE
-// Forward any generic vector impls to architecture specific ones
-namespace arch {}
-namespace generic = arch;
+// Forward any clang vector impls to architecture specific ones
+namespace arch_vector {}
+namespace clang_vector = arch_vector;
#endif
namespace element {
@@ -70,9 +70,9 @@ LIBC_INLINE void *find_first_character(const unsigned char *src,
}
} // namespace element
-namespace wide {
-// Generic, non-vector, implementations of functions that search for data
-// by reading from memory block-by-block.
+namespace word {
+// Non-vector, implementations of functions that search for data by reading from
+// memory word-by-word.
template <typename Word> LIBC_INLINE constexpr Word repeat_byte(Word byte) {
static_assert(CHAR_BIT == 8, "repeat_byte assumes a byte is 8 bits.");
@@ -179,16 +179,17 @@ find_first_character(const unsigned char *src, unsigned char ch,
return const_cast<unsigned char *>(char_ptr);
}
-} // namespace wide
+} // namespace word
// Dispatch mechanism for implementations of performance-sensitive
// functions. Always measure, but generally from lower- to higher-performance
// order:
//
// 1. element - read char-by-char or wchar-by-wchar
-// 3. wide - read word-by-word
-// 3. generic - read using clang's internal vector types
-// 4. arch - hand-coded per architecture. Possibly in asm, or with intrinsics.
+// 3. word - read word-by-word
+// 3. clang_vector - read using clang's internal vector types
+// 4. arch_vector - hand-coded per architecture. Possibly in asm, or with
+// intrinsics.
//
// The called implemenation is chosen at build-time by setting
// LIBC_CONF_{FUNC}_IMPL in config.json
diff --git a/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl b/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl
index 6166f52f80f8b..1c2b65ea20113 100644
--- a/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl
+++ b/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl
@@ -39,8 +39,8 @@ LIBC_CONFIGURE_OPTIONS = [
# "LIBC_COPT_SCANF_DISABLE_FLOAT",
# "LIBC_COPT_SCANF_DISABLE_INDEX_MODE",
"LIBC_COPT_STDIO_USE_SYSTEM_FILE",
- "LIBC_COPT_STRING_LENGTH_IMPL=generic",
- "LIBC_COPT_FIND_FIRST_CHARACTER_IMPL=wide",
+ "LIBC_COPT_STRING_LENGTH_IMPL=clang_vector",
+ "LIBC_COPT_FIND_FIRST_CHARACTER_IMPL=word",
# "LIBC_COPT_STRTOFLOAT_DISABLE_CLINGER_FAST_PATH",
# "LIBC_COPT_STRTOFLOAT_DISABLE_EISEL_LEMIRE",
# "LIBC_COPT_STRTOFLOAT_DISABLE_SIMPLE_DECIMAL_CONVERSION",
>From 0257a5d0edbc2913b89f71501f00444647afbc80 Mon Sep 17 00:00:00 2001
From: Sterling Augustine <saugustine at google.com>
Date: Tue, 18 Nov 2025 15:15:27 -0800
Subject: [PATCH 05/11] Move optimization routines to string_optimization.h
---
libc/src/string/string_optimization.h | 212 ++++++++++++++++++++++++++
libc/src/string/string_utils.h | 191 +----------------------
2 files changed, 213 insertions(+), 190 deletions(-)
create mode 100644 libc/src/string/string_optimization.h
diff --git a/libc/src/string/string_optimization.h b/libc/src/string/string_optimization.h
new file mode 100644
index 0000000000000..f47ef5d7d8387
--- /dev/null
+++ b/libc/src/string/string_optimization.h
@@ -0,0 +1,212 @@
+//===-- String Optimization -------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Basic implementation and dispatch mechanism for performance-sensitive string-
+// related code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/limits_macros.h"
+#include "hdr/stdint_proxy.h" // uintptr_t
+#include "hdr/types/size_t.h"
+#include "src/__support/CPP/type_traits.h" // cpp::is_same_v
+
+#if LIBC_HAS_VECTOR_TYPE
+#include "src/string/memory_utils/generic/inline_strlen.h"
+#endif
+#if defined(LIBC_TARGET_ARCH_IS_X86)
+#include "src/string/memory_utils/x86_64/inline_strlen.h"
+#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
+#include "src/string/memory_utils/aarch64/inline_strlen.h"
+#endif
+
+// Set sensible defaults
+#ifndef LIBC_COPT_STRING_LENGTH_IMPL
+#define LIBC_COPT_STRING_LENGTH_IMPL element
+#endif
+#ifndef LIBC_COPT_FIND_FIRST_CHARACTER_IMPL
+#define LIBC_COPT_STRING_LENGTH_IMPL element
+#endif
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
+
+#if !LIBC_HAS_VECTOR_TYPE
+// Forward any clang vector impls to architecture specific ones
+namespace arch_vector {}
+namespace clang_vector = arch_vector;
+#endif
+
+namespace element {
+// Element-by-element (usually a byte, but wider for wchar) implementations of
+// functions that search for data. Slow, but easy to understand and analyze.
+
+// Returns the length of a string, denoted by the first occurrence
+// of a null terminator.
+LIBC_INLINE size_t string_length(const char *src) {
+ size_t length;
+ for (length = 0; *src; ++src, ++length)
+ ;
+ return length;
+}
+
+template <typename T> LIBC_INLINE size_t string_length_element(const T *src) {
+ size_t length;
+ for (length = 0; *src; ++src, ++length)
+ ;
+ return length;
+}
+
+LIBC_INLINE void *find_first_character(const unsigned char *src,
+ unsigned char ch, size_t n) {
+ for (; n && *src != ch; --n, ++src)
+ ;
+ return n ? const_cast<unsigned char *>(src) : nullptr;
+}
+} // namespace element
+
+namespace word {
+// Non-vector, implementations of functions that search for data by reading from
+// memory word-by-word.
+
+template <typename Word> LIBC_INLINE constexpr Word repeat_byte(Word byte) {
+ static_assert(CHAR_BIT == 8, "repeat_byte assumes a byte is 8 bits.");
+ constexpr size_t BITS_IN_BYTE = CHAR_BIT;
+ constexpr size_t BYTE_MASK = 0xff;
+ Word result = 0;
+ byte = byte & BYTE_MASK;
+ for (size_t i = 0; i < sizeof(Word); ++i)
+ result = (result << BITS_IN_BYTE) | byte;
+ return result;
+}
+
+// The goal of this function is to take in a block of arbitrary size and return
+// if it has any bytes equal to zero without branching. This is done by
+// transforming the block such that zero bytes become non-zero and non-zero
+// bytes become zero.
+// The first transformation relies on the properties of carrying in arithmetic
+// subtraction. Specifically, if 0x01 is subtracted from a byte that is 0x00,
+// then the result for that byte must be equal to 0xff (or 0xfe if the next byte
+// needs a carry as well).
+// The next transformation is a simple mask. All zero bytes will have the high
+// bit set after the subtraction, so each byte is masked with 0x80. This narrows
+// the set of bytes that result in a non-zero value to only zero bytes and bytes
+// with the high bit and any other bit set.
+// The final transformation masks the result of the previous transformations
+// with the inverse of the original byte. This means that any byte that had the
+// high bit set will no longer have it set, narrowing the list of bytes which
+// result in non-zero values to just the zero byte.
+template <typename Word> LIBC_INLINE constexpr bool has_zeroes(Word block) {
+ constexpr unsigned int LOW_BITS = repeat_byte<Word>(0x01);
+ constexpr Word HIGH_BITS = repeat_byte<Word>(0x80);
+ Word subtracted = block - LOW_BITS;
+ Word inverted = ~block;
+ return (subtracted & inverted & HIGH_BITS) != 0;
+}
+
+// Unsigned int is the default size for most processors, and on x86-64 it
+// performs better than larger sizes when the src pointer can't be assumed to
+// be aligned to a word boundary, so it's the size we use for reading the
+// string a block at a time.
+
+LIBC_INLINE size_t string_length(const char *src) {
+ using Word = unsigned int;
+ const char *char_ptr = src;
+ // Step 1: read 1 byte at a time to align to block size
+ for (; reinterpret_cast<uintptr_t>(char_ptr) % sizeof(Word) != 0;
+ ++char_ptr) {
+ if (*char_ptr == '\0')
+ return static_cast<size_t>(char_ptr - src);
+ }
+ // Step 2: read blocks
+ for (const Word *block_ptr = reinterpret_cast<const Word *>(char_ptr);
+ !has_zeroes<Word>(*block_ptr); ++block_ptr) {
+ char_ptr = reinterpret_cast<const char *>(block_ptr);
+ }
+ // Step 3: find the zero in the block
+ for (; *char_ptr != '\0'; ++char_ptr) {
+ ;
+ }
+ return static_cast<size_t>(char_ptr - src);
+}
+
+LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE void *
+find_first_character(const unsigned char *src, unsigned char ch,
+ size_t max_strlen = cpp::numeric_limits<size_t>::max()) {
+ using Word = unsigned int;
+ const unsigned char *char_ptr = src;
+ size_t cur = 0;
+
+ // If the maximum size of the string is small, the overhead of aligning to a
+ // word boundary and generating a bitmask of the appropriate size may be
+ // greater than the gains from reading larger chunks. Based on some testing,
+ // the crossover point between when it's faster to just read bytewise and read
+ // blocks is somewhere between 16 and 32, so 4 times the size of the block
+ // should be in that range.
+ if (max_strlen < (sizeof(Word) * 4)) {
+ return element::find_first_character(src, ch, max_strlen);
+ }
+ size_t n = max_strlen;
+ // Step 1: read 1 byte at a time to align to block size
+ for (; reinterpret_cast<uintptr_t>(char_ptr) % sizeof(Word) != 0 && cur < n;
+ ++char_ptr, ++cur) {
+ if (*char_ptr == ch)
+ return const_cast<unsigned char *>(char_ptr);
+ }
+
+ const Word ch_mask = repeat_byte<Word>(ch);
+
+ // Step 2: read blocks
+ for (const Word *block_ptr = reinterpret_cast<const Word *>(char_ptr);
+ !has_zeroes<Word>((*block_ptr) ^ ch_mask) && cur < n;
+ ++block_ptr, cur += sizeof(Word)) {
+ char_ptr = reinterpret_cast<const unsigned char *>(block_ptr);
+ }
+
+ // Step 3: find the match in the block
+ for (; *char_ptr != ch && cur < n; ++char_ptr, ++cur) {
+ ;
+ }
+
+ if (*char_ptr != ch || cur >= n)
+ return static_cast<void *>(nullptr);
+
+ return const_cast<unsigned char *>(char_ptr);
+}
+
+} // namespace word
+
+// Dispatch mechanism for implementations of performance-sensitive
+// functions. Always measure, but generally from lower- to higher-performance
+// order:
+//
+// 1. element - read char-by-char or wchar-by-wchar
+// 3. word - read word-by-word
+// 3. clang_vector - read using clang's internal vector types
+// 4. arch_vector - hand-coded per architecture. Possibly in asm, or with
+// intrinsics.
+//
+// The called implemenation is chosen at build-time by setting
+// LIBC_CONF_{FUNC}_IMPL in config.json
+static constexpr auto &string_length_impl =
+ LIBC_COPT_STRING_LENGTH_IMPL::string_length;
+static constexpr auto &find_first_character_impl =
+ LIBC_COPT_FIND_FIRST_CHARACTER_IMPL::find_first_character;
+
+template <typename T> LIBC_INLINE size_t string_length(const T *src) {
+ if constexpr (cpp::is_same_v<T, char>)
+ return string_length_impl(src);
+ return element::string_length_element<T>(src);
+}
+
+// Returns the first occurrence of 'ch' within the first 'n' characters of
+// 'src'. If 'ch' is not found, returns nullptr.
+LIBC_INLINE void *find_first_character(const unsigned char *src,
+ unsigned char ch, size_t max_strlen) {
+ return find_first_character_impl(src, ch, max_strlen);
+}
diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index 7325bf10fb644..262b11fc92ea0 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -14,202 +14,13 @@
#ifndef LLVM_LIBC_SRC_STRING_STRING_UTILS_H
#define LLVM_LIBC_SRC_STRING_STRING_UTILS_H
-#include "hdr/limits_macros.h"
-#include "hdr/stdint_proxy.h" // uintptr_t
#include "hdr/types/size_t.h"
#include "src/__support/CPP/bitset.h"
-#include "src/__support/CPP/type_traits.h" // cpp::is_same_v
#include "src/__support/macros/attributes.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
#include "src/string/memory_utils/inline_memcpy.h"
-
-#if LIBC_HAS_VECTOR_TYPE
-#include "src/string/memory_utils/generic/inline_strlen.h"
-#endif
-#if defined(LIBC_TARGET_ARCH_IS_X86)
-#include "src/string/memory_utils/x86_64/inline_strlen.h"
-#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
-#include "src/string/memory_utils/aarch64/inline_strlen.h"
-#endif
-
-namespace LIBC_NAMESPACE_DECL {
-namespace internal {
-
-#if !LIBC_HAS_VECTOR_TYPE
-// Forward any clang vector impls to architecture specific ones
-namespace arch_vector {}
-namespace clang_vector = arch_vector;
-#endif
-
-namespace element {
-// Element-by-element (usually a byte, but wider for wchar) implementations of
-// functions that search for data. Slow, but easy to understand and analyze.
-
-// Returns the length of a string, denoted by the first occurrence
-// of a null terminator.
-LIBC_INLINE size_t string_length(const char *src) {
- size_t length;
- for (length = 0; *src; ++src, ++length)
- ;
- return length;
-}
-
-template <typename T> LIBC_INLINE size_t string_length_element(const T *src) {
- size_t length;
- for (length = 0; *src; ++src, ++length)
- ;
- return length;
-}
-
-LIBC_INLINE void *find_first_character(const unsigned char *src,
- unsigned char ch, size_t n) {
- for (; n && *src != ch; --n, ++src)
- ;
- return n ? const_cast<unsigned char *>(src) : nullptr;
-}
-} // namespace element
-
-namespace word {
-// Non-vector, implementations of functions that search for data by reading from
-// memory word-by-word.
-
-template <typename Word> LIBC_INLINE constexpr Word repeat_byte(Word byte) {
- static_assert(CHAR_BIT == 8, "repeat_byte assumes a byte is 8 bits.");
- constexpr size_t BITS_IN_BYTE = CHAR_BIT;
- constexpr size_t BYTE_MASK = 0xff;
- Word result = 0;
- byte = byte & BYTE_MASK;
- for (size_t i = 0; i < sizeof(Word); ++i)
- result = (result << BITS_IN_BYTE) | byte;
- return result;
-}
-
-// The goal of this function is to take in a block of arbitrary size and return
-// if it has any bytes equal to zero without branching. This is done by
-// transforming the block such that zero bytes become non-zero and non-zero
-// bytes become zero.
-// The first transformation relies on the properties of carrying in arithmetic
-// subtraction. Specifically, if 0x01 is subtracted from a byte that is 0x00,
-// then the result for that byte must be equal to 0xff (or 0xfe if the next byte
-// needs a carry as well).
-// The next transformation is a simple mask. All zero bytes will have the high
-// bit set after the subtraction, so each byte is masked with 0x80. This narrows
-// the set of bytes that result in a non-zero value to only zero bytes and bytes
-// with the high bit and any other bit set.
-// The final transformation masks the result of the previous transformations
-// with the inverse of the original byte. This means that any byte that had the
-// high bit set will no longer have it set, narrowing the list of bytes which
-// result in non-zero values to just the zero byte.
-template <typename Word> LIBC_INLINE constexpr bool has_zeroes(Word block) {
- constexpr unsigned int LOW_BITS = repeat_byte<Word>(0x01);
- constexpr Word HIGH_BITS = repeat_byte<Word>(0x80);
- Word subtracted = block - LOW_BITS;
- Word inverted = ~block;
- return (subtracted & inverted & HIGH_BITS) != 0;
-}
-
-// Unsigned int is the default size for most processors, and on x86-64 it
-// performs better than larger sizes when the src pointer can't be assumed to
-// be aligned to a word boundary, so it's the size we use for reading the
-// string a block at a time.
-
-LIBC_INLINE size_t string_length(const char *src) {
- using Word = unsigned int;
- const char *char_ptr = src;
- // Step 1: read 1 byte at a time to align to block size
- for (; reinterpret_cast<uintptr_t>(char_ptr) % sizeof(Word) != 0;
- ++char_ptr) {
- if (*char_ptr == '\0')
- return static_cast<size_t>(char_ptr - src);
- }
- // Step 2: read blocks
- for (const Word *block_ptr = reinterpret_cast<const Word *>(char_ptr);
- !has_zeroes<Word>(*block_ptr); ++block_ptr) {
- char_ptr = reinterpret_cast<const char *>(block_ptr);
- }
- // Step 3: find the zero in the block
- for (; *char_ptr != '\0'; ++char_ptr) {
- ;
- }
- return static_cast<size_t>(char_ptr - src);
-}
-
-LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE void *
-find_first_character(const unsigned char *src, unsigned char ch,
- size_t max_strlen = cpp::numeric_limits<size_t>::max()) {
- using Word = unsigned int;
- const unsigned char *char_ptr = src;
- size_t cur = 0;
-
- // If the maximum size of the string is small, the overhead of aligning to a
- // word boundary and generating a bitmask of the appropriate size may be
- // greater than the gains from reading larger chunks. Based on some testing,
- // the crossover point between when it's faster to just read bytewise and read
- // blocks is somewhere between 16 and 32, so 4 times the size of the block
- // should be in that range.
- if (max_strlen < (sizeof(Word) * 4)) {
- return element::find_first_character(src, ch, max_strlen);
- }
- size_t n = max_strlen;
- // Step 1: read 1 byte at a time to align to block size
- for (; reinterpret_cast<uintptr_t>(char_ptr) % sizeof(Word) != 0 && cur < n;
- ++char_ptr, ++cur) {
- if (*char_ptr == ch)
- return const_cast<unsigned char *>(char_ptr);
- }
-
- const Word ch_mask = repeat_byte<Word>(ch);
-
- // Step 2: read blocks
- for (const Word *block_ptr = reinterpret_cast<const Word *>(char_ptr);
- !has_zeroes<Word>((*block_ptr) ^ ch_mask) && cur < n;
- ++block_ptr, cur += sizeof(Word)) {
- char_ptr = reinterpret_cast<const unsigned char *>(block_ptr);
- }
-
- // Step 3: find the match in the block
- for (; *char_ptr != ch && cur < n; ++char_ptr, ++cur) {
- ;
- }
-
- if (*char_ptr != ch || cur >= n)
- return static_cast<void *>(nullptr);
-
- return const_cast<unsigned char *>(char_ptr);
-}
-
-} // namespace word
-
-// Dispatch mechanism for implementations of performance-sensitive
-// functions. Always measure, but generally from lower- to higher-performance
-// order:
-//
-// 1. element - read char-by-char or wchar-by-wchar
-// 3. word - read word-by-word
-// 3. clang_vector - read using clang's internal vector types
-// 4. arch_vector - hand-coded per architecture. Possibly in asm, or with
-// intrinsics.
-//
-// The called implemenation is chosen at build-time by setting
-// LIBC_CONF_{FUNC}_IMPL in config.json
-static constexpr auto &string_length_impl =
- LIBC_COPT_STRING_LENGTH_IMPL::string_length;
-static constexpr auto &find_first_character_impl =
- LIBC_COPT_FIND_FIRST_CHARACTER_IMPL::find_first_character;
-
-template <typename T> LIBC_INLINE size_t string_length(const T *src) {
- if constexpr (cpp::is_same_v<T, char>)
- return string_length_impl(src);
- return element::string_length_element<T>(src);
-}
-
-// Returns the first occurrence of 'ch' within the first 'n' characters of
-// 'src'. If 'ch' is not found, returns nullptr.
-LIBC_INLINE void *find_first_character(const unsigned char *src,
- unsigned char ch, size_t max_strlen) {
- return find_first_character_impl(src, ch, max_strlen);
-}
+#include "src/string/string_optimization.h"
// Returns the maximum length span that contains only characters not found in
// 'segment'. If no characters are found, returns the length of 'src'.
>From 232be6458c465d389ac8fd7fc0180d278b62740c Mon Sep 17 00:00:00 2001
From: Sterling Augustine <saugustine at google.com>
Date: Wed, 19 Nov 2025 15:55:29 -0800
Subject: [PATCH 06/11] Cleanup header-syntax, rename file.
---
.../{string_optimization.h => string_length.h} | 15 ++++++++-------
libc/src/string/string_utils.h | 13 ++++++++++++-
2 files changed, 20 insertions(+), 8 deletions(-)
rename libc/src/string/{string_optimization.h => string_length.h} (95%)
diff --git a/libc/src/string/string_optimization.h b/libc/src/string/string_length.h
similarity index 95%
rename from libc/src/string/string_optimization.h
rename to libc/src/string/string_length.h
index f47ef5d7d8387..80f8ca495a3a1 100644
--- a/libc/src/string/string_optimization.h
+++ b/libc/src/string/string_length.h
@@ -1,4 +1,4 @@
-//===-- String Optimization -------------------------------------*- C++ -*-===//
+//===-- String Length -------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -11,6 +11,9 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIBC_SRC_STRING_STRING_LENGTH_H
+#define LLVM_LIBC_SRC_STRING_STRING_LENGTH_H
+
#include "hdr/limits_macros.h"
#include "hdr/stdint_proxy.h" // uintptr_t
#include "hdr/types/size_t.h"
@@ -204,9 +207,7 @@ template <typename T> LIBC_INLINE size_t string_length(const T *src) {
return element::string_length_element<T>(src);
}
-// Returns the first occurrence of 'ch' within the first 'n' characters of
-// 'src'. If 'ch' is not found, returns nullptr.
-LIBC_INLINE void *find_first_character(const unsigned char *src,
- unsigned char ch, size_t max_strlen) {
- return find_first_character_impl(src, ch, max_strlen);
-}
+} // namespace internal
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_STRING_STRING_LENGTH_H
diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index 262b11fc92ea0..205a124fad906 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -20,7 +20,10 @@
#include "src/__support/macros/config.h"
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
#include "src/string/memory_utils/inline_memcpy.h"
-#include "src/string/string_optimization.h"
+#include "src/string/string_length.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
// Returns the maximum length span that contains only characters not found in
// 'segment'. If no characters are found, returns the length of 'src'.
@@ -114,6 +117,14 @@ LIBC_INLINE constexpr static char *strrchr_implementation(const char *src,
}
}
+
+// Returns the first occurrence of 'ch' within the first 'n' characters of
+// 'src'. If 'ch' is not found, returns nullptr.
+LIBC_INLINE void *find_first_character(const unsigned char *src,
+ unsigned char ch, size_t max_strlen) {
+ return find_first_character_impl(src, ch, max_strlen);
+}
+
} // namespace internal
} // namespace LIBC_NAMESPACE_DECL
>From 81edcb865c9d732181192f98fcee64d767839629 Mon Sep 17 00:00:00 2001
From: Sterling Augustine <saugustine at google.com>
Date: Thu, 20 Nov 2025 15:43:38 -0800
Subject: [PATCH 07/11] Fix formatting and rework namespace names
---
.../memory_utils/aarch64/inline_strlen.h | 57 +++++++++----------
1 file changed, 28 insertions(+), 29 deletions(-)
diff --git a/libc/src/string/memory_utils/aarch64/inline_strlen.h b/libc/src/string/memory_utils/aarch64/inline_strlen.h
index 50fc751868b17..b0cd6518445f6 100644
--- a/libc/src/string/memory_utils/aarch64/inline_strlen.h
+++ b/libc/src/string/memory_utils/aarch64/inline_strlen.h
@@ -10,50 +10,49 @@
#include "src/__support/macros/properties/cpu_features.h"
+namespace LIBC_NAMESPACE_DECL {
+
+namespace internal::arch_vector {
#if defined(__ARM_NEON)
#include "src/__support/CPP/bit.h" // countr_zero
#include <arm_neon.h>
#include <stddef.h> // size_t
-namespace LIBC_NAMESPACE_DECL {
-
-namespace internal::arch_vector {
namepace neon {
-[[maybe_unused]] LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE static size_t
-string_length(const char *src) {
- using Vector __attribute__((may_alias)) = uint8x8_t;
+ [[maybe_unused]] LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE static size_t
+ string_length(const char *src) {
+ using Vector __attribute__((may_alias)) = uint8x8_t;
- uintptr_t misalign_bytes = reinterpret_cast<uintptr_t>(src) % sizeof(Vector);
- const Vector *block_ptr =
- reinterpret_cast<const Vector *>(src - misalign_bytes);
- Vector v = *block_ptr;
- Vector vcmp = vceqz_u8(v);
- uint64x1_t cmp_mask = vreinterpret_u64_u8(vcmp);
- uint64_t cmp = vget_lane_u64(cmp_mask, 0);
- cmp = cmp >> (misalign_bytes << 3);
- if (cmp)
- return cpp::countr_zero(cmp) >> 3;
-
- while (true) {
- ++block_ptr;
- v = *block_ptr;
- vcmp = vceqz_u8(v);
- cmp_mask = vreinterpret_u64_u8(vcmp);
- cmp = vget_lane_u64(cmp_mask, 0);
+ uintptr_t misalign_bytes =
+ reinterpret_cast<uintptr_t>(src) % sizeof(Vector);
+ const Vector *block_ptr =
+ reinterpret_cast<const Vector *>(src - misalign_bytes);
+ Vector v = *block_ptr;
+ Vector vcmp = vceqz_u8(v);
+ uint64x1_t cmp_mask = vreinterpret_u64_u8(vcmp);
+ uint64_t cmp = vget_lane_u64(cmp_mask, 0);
+ cmp = cmp >> (misalign_bytes << 3);
if (cmp)
- return static_cast<size_t>(reinterpret_cast<uintptr_t>(block_ptr) -
- reinterpret_cast<uintptr_t>(src) +
- (cpp::countr_zero(cmp) >> 3));
+ return cpp::countr_zero(cmp) >> 3;
+
+ while (true) {
+ ++block_ptr;
+ v = *block_ptr;
+ vcmp = vceqz_u8(v);
+ cmp_mask = vreinterpret_u64_u8(vcmp);
+ cmp = vget_lane_u64(cmp_mask, 0);
+ if (cmp)
+ return static_cast<size_t>(reinterpret_cast<uintptr_t>(block_ptr) -
+ reinterpret_cast<uintptr_t>(src) +
+ (cpp::countr_zero(cmp) >> 3));
+ }
}
-}
} // namespace neon
-} // namespace LIBC_NAMESPACE_DECL
#endif // __ARM_NEON
#ifdef LIBC_TARGET_CPU_HAS_SVE
#include "src/__support/macros/optimization.h"
#include <arm_sve.h>
-namespace LIBC_NAMESPACE_DECL {
namespace sve {
[[maybe_unused]] LIBC_INLINE static size_t string_length(const char *src) {
const uint8_t *ptr = reinterpret_cast<const uint8_t *>(src);
>From fb5c5ea02c9a4b6cf0c131f50e2702febb893986 Mon Sep 17 00:00:00 2001
From: Sterling Augustine <saugustine at google.com>
Date: Thu, 20 Nov 2025 15:44:03 -0800
Subject: [PATCH 08/11] Fix bazel build
---
utils/bazel/llvm-project-overlay/libc/BUILD.bazel | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index bd48222856f22..5ccaadca427d1 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -5449,7 +5449,10 @@ libc_support_library(
libc_support_library(
name = "string_utils",
- hdrs = ["src/string/string_utils.h"],
+ hdrs = [
+ "src/string/string_utils.h",
+ "src/string/string_length.h",
+ ],
deps = [
":__support_common",
":__support_cpp_bitset",
>From db7168a2d3c34deb5250eeb4394f5e7f7e6ec09b Mon Sep 17 00:00:00 2001
From: Sterling Augustine <saugustine at google.com>
Date: Thu, 20 Nov 2025 15:48:49 -0800
Subject: [PATCH 09/11] More aarch64 fixes
---
libc/src/string/memory_utils/aarch64/inline_strlen.h | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/libc/src/string/memory_utils/aarch64/inline_strlen.h b/libc/src/string/memory_utils/aarch64/inline_strlen.h
index b0cd6518445f6..49db3df4863b2 100644
--- a/libc/src/string/memory_utils/aarch64/inline_strlen.h
+++ b/libc/src/string/memory_utils/aarch64/inline_strlen.h
@@ -95,15 +95,16 @@ namespace sve {
return len;
}
} // namespace sve
-} // internal::arch_vector
-} // namespace LIBC_NAMESPACE_DECL
#endif // LIBC_TARGET_CPU_HAS_SVE
-namespace LIBC_NAMESPACE_DECL {
+[[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) {
#ifdef LIBC_TARGET_CPU_HAS_SVE
-namespace string_length_impl = sve;
+ return sve::string_length(src);
#elif defined(__ARM_NEON)
-namespace string_length_impl = neon;
+ return neon::string_length(src);
#endif
+}
+
+} // internal::arch_vector
} // namespace LIBC_NAMESPACE_DECL
#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_STRLEN_H
>From 1f2d783be6f7499369f6e4efdb5a053906928db3 Mon Sep 17 00:00:00 2001
From: Sterling Augustine <saugustine at google.com>
Date: Thu, 20 Nov 2025 15:52:50 -0800
Subject: [PATCH 10/11] Formatting fixes.
---
libc/src/string/memory_utils/aarch64/inline_strlen.h | 2 +-
libc/src/string/string_utils.h | 1 -
2 files changed, 1 insertion(+), 2 deletions(-)
diff --git a/libc/src/string/memory_utils/aarch64/inline_strlen.h b/libc/src/string/memory_utils/aarch64/inline_strlen.h
index 49db3df4863b2..3101a55f75ccf 100644
--- a/libc/src/string/memory_utils/aarch64/inline_strlen.h
+++ b/libc/src/string/memory_utils/aarch64/inline_strlen.h
@@ -105,6 +105,6 @@ namespace sve {
#endif
}
-} // internal::arch_vector
+} // namespace internal::arch_vector
} // namespace LIBC_NAMESPACE_DECL
#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_STRLEN_H
diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index 205a124fad906..b0144e01a9006 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -117,7 +117,6 @@ LIBC_INLINE constexpr static char *strrchr_implementation(const char *src,
}
}
-
// Returns the first occurrence of 'ch' within the first 'n' characters of
// 'src'. If 'ch' is not found, returns nullptr.
LIBC_INLINE void *find_first_character(const unsigned char *src,
>From 9fd2e2b75213c8546bd88af10c0b077c77bd6a01 Mon Sep 17 00:00:00 2001
From: Sterling Augustine <saugustine at google.com>
Date: Thu, 20 Nov 2025 15:59:36 -0800
Subject: [PATCH 11/11] namepace --> namespace
---
libc/src/string/memory_utils/aarch64/inline_strlen.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libc/src/string/memory_utils/aarch64/inline_strlen.h b/libc/src/string/memory_utils/aarch64/inline_strlen.h
index 3101a55f75ccf..a684600b3c3ef 100644
--- a/libc/src/string/memory_utils/aarch64/inline_strlen.h
+++ b/libc/src/string/memory_utils/aarch64/inline_strlen.h
@@ -17,7 +17,7 @@ namespace internal::arch_vector {
#include "src/__support/CPP/bit.h" // countr_zero
#include <arm_neon.h>
#include <stddef.h> // size_t
-namepace neon {
+namespace neon {
[[maybe_unused]] LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE static size_t
string_length(const char *src) {
using Vector __attribute__((may_alias)) = uint8x8_t;
More information about the llvm-commits
mailing list