[libc-commits] [libc] [libc] Implement wide read strlen with LLVM vector type (PR #152605)
Joseph Huber via libc-commits
libc-commits at lists.llvm.org
Tue Aug 26 11:07:30 PDT 2025
https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/152605
>From a997d6d093495ad7abdb42d111b088f5e70848e7 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Thu, 7 Aug 2025 17:25:34 -0500
Subject: [PATCH] [libc] Implement generic SIMD helper 'simd.h' and implement
strlen
Summary:
This PR introduces a new 'simd.h' header that implements an interface
similar to the proposed `stdx::simd` in C++. However, we instead wrap
around the LLVM internal type. This makes heavy use of the clang vector
extensions and boolean vectors, instead using primitive vector types
instead of a class (many benefits to this).
I use this interface to implement a generic strlen implementation, but
propse we use this for math. Right now this requires a feature only
introduced in clang-22.
---
libc/src/__support/CPP/CMakeLists.txt | 6 +
libc/src/__support/CPP/algorithm.h | 6 +
libc/src/__support/CPP/simd.h | 213 ++++++++++++++++++
libc/src/__support/macros/attributes.h | 6 +
.../macros/properties/cpu_features.h | 4 +
libc/src/string/CMakeLists.txt | 1 +
.../memory_utils/generic/inline_strlen.h | 52 +++++
libc/src/string/string_utils.h | 6 +-
8 files changed, 293 insertions(+), 1 deletion(-)
create mode 100644 libc/src/__support/CPP/simd.h
create mode 100644 libc/src/string/memory_utils/generic/inline_strlen.h
diff --git a/libc/src/__support/CPP/CMakeLists.txt b/libc/src/__support/CPP/CMakeLists.txt
index 8b65a8839ab21..a389a6d1702fe 100644
--- a/libc/src/__support/CPP/CMakeLists.txt
+++ b/libc/src/__support/CPP/CMakeLists.txt
@@ -210,3 +210,9 @@ add_object_library(
libc.src.__support.common
libc.src.__support.macros.properties.os
)
+
+add_header_library(
+ simd
+ HDRS
+ simd.h
+)
diff --git a/libc/src/__support/CPP/algorithm.h b/libc/src/__support/CPP/algorithm.h
index 7704b3fa81f0c..de0c47369d945 100644
--- a/libc/src/__support/CPP/algorithm.h
+++ b/libc/src/__support/CPP/algorithm.h
@@ -18,6 +18,12 @@
namespace LIBC_NAMESPACE_DECL {
namespace cpp {
+template <class T = void> struct plus {};
+template <class T = void> struct multiplies {};
+template <class T = void> struct bit_and {};
+template <class T = void> struct bit_or {};
+template <class T = void> struct bit_xor {};
+
template <class T> LIBC_INLINE constexpr const T &max(const T &a, const T &b) {
return (a < b) ? b : a;
}
diff --git a/libc/src/__support/CPP/simd.h b/libc/src/__support/CPP/simd.h
new file mode 100644
index 0000000000000..42b32e79e4772
--- /dev/null
+++ b/libc/src/__support/CPP/simd.h
@@ -0,0 +1,213 @@
+//===-- Portable SIMD library similar to stdx::simd -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides a generic interface into fixed-size SIMD instructions
+// using the clang vector type. The API shares some similarities with the
+// stdx::simd proposal, but instead chooses to use vectors as primitive types
+// with several extra helper functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/stdint_proxy.h"
+#include "src/__support/CPP/algorithm.h"
+#include "src/__support/CPP/bit.h"
+#include "src/__support/CPP/type_traits/integral_constant.h"
+#include "src/__support/macros/attributes.h"
+#include "src/__support/macros/config.h"
+
+#include <stddef.h>
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_CPP_SIMD_H
+#define LLVM_LIBC_SRC___SUPPORT_CPP_SIMD_H
+
+namespace LIBC_NAMESPACE_DECL {
+namespace cpp {
+
+static_assert(LIBC_HAS_VECTOR_TYPE, "compiler does not support vector types");
+
+namespace internal {
+
+template <size_t Size> struct get_as_integer_type;
+
+template <> struct get_as_integer_type<1> {
+ using type = uint8_t;
+};
+template <> struct get_as_integer_type<2> {
+ using type = uint16_t;
+};
+template <> struct get_as_integer_type<4> {
+ using type = uint32_t;
+};
+template <> struct get_as_integer_type<8> {
+ using type = uint64_t;
+};
+
+template <class T>
+using get_as_integer_type_t = typename get_as_integer_type<sizeof(T)>::type;
+
+#if defined(LIBC_TARGET_CPU_HAS_AVX512F)
+template <typename T>
+inline constexpr size_t native_vector_size = 64 / sizeof(T);
+#elif defined(LIBC_TARGET_CPU_HAS_AVX2)
+template <typename T>
+inline constexpr size_t native_vector_size = 32 / sizeof(T);
+#elif defined(LIBC_TARGET_CPU_HAS_SSE2) || defined(LIBC_TARGET_CPU_HAS_ARM_NEON)
+template <typename T>
+inline constexpr size_t native_vector_size = 16 / sizeof(T);
+#else
+template <typename T> inline constexpr size_t native_vector_size = 1;
+#endif
+} // namespace internal
+
+// Type aliases.
+template <typename T, size_t N>
+using fixed_size_simd = T [[clang::ext_vector_type(N)]];
+template <typename T, size_t N = internal::native_vector_size<T>>
+using simd = T [[clang::ext_vector_type(N)]];
+template <typename T>
+using simd_mask = simd<bool, internal::native_vector_size<T>>;
+
+// Type trait helpers.
+template <typename T> struct simd_size : cpp::integral_constant<size_t, 1> {};
+template <typename T, unsigned N>
+struct simd_size<T [[clang::ext_vector_type(N)]]>
+ : cpp::integral_constant<size_t, N> {};
+template <class T> constexpr size_t simd_size_v = simd_size<T>::value;
+
+template <typename T> struct is_simd : cpp::integral_constant<bool, false> {};
+template <typename T, unsigned N>
+struct is_simd<T [[clang::ext_vector_type(N)]]>
+ : cpp::integral_constant<bool, true> {};
+template <class T> constexpr bool is_simd_v = is_simd<T>::value;
+
+template <typename T>
+struct is_simd_mask : cpp::integral_constant<bool, false> {};
+template <unsigned N>
+struct is_simd_mask<bool [[clang::ext_vector_type(N)]]>
+ : cpp::integral_constant<bool, true> {};
+template <class T> constexpr bool is_simd_mask_v = is_simd_mask<T>::value;
+
+template <typename To, typename From, size_t N>
+LIBC_INLINE constexpr simd<To, N> simd_cast(simd<From, N> v) {
+ return __builtin_convertvector(v, simd<To, N>);
+}
+
+// SIMD mask operations.
+template <size_t N> LIBC_INLINE constexpr bool all_of(simd<bool, N> m) {
+ return __builtin_reduce_and(m);
+}
+template <size_t N> LIBC_INLINE constexpr bool any_of(simd<bool, N> m) {
+ return __builtin_reduce_or(m);
+}
+template <size_t N> LIBC_INLINE constexpr bool none_of(simd<bool, N> m) {
+ return !any_of(m);
+}
+template <size_t N> LIBC_INLINE constexpr bool some_of(simd<bool, N> m) {
+ return any_of(m) && !all_of(m);
+}
+template <size_t N> LIBC_INLINE constexpr int popcount(simd<bool, N> m) {
+ return __builtin_popcountg(m);
+}
+template <size_t N> LIBC_INLINE constexpr int find_first_set(simd<bool, N> m) {
+ return __builtin_ctzg(m);
+}
+template <size_t N> LIBC_INLINE constexpr int find_last_set(simd<bool, N> m) {
+ constexpr size_t size = simd_size_v<simd<bool, N>>;
+ return size - __builtin_clzg(m);
+}
+
+// Elementwise operations.
+template <typename T, size_t N>
+LIBC_INLINE constexpr simd<T, N> min(simd<T, N> x, simd<T, N> y) {
+ return __builtin_elementwise_min(x, y);
+}
+template <typename T, size_t N>
+LIBC_INLINE constexpr simd<T, N> max(simd<T, N> x, simd<T, N> y) {
+ return __builtin_elementwise_max(x, y);
+}
+
+// Reduction operations.
+template <typename T, size_t N, typename Op = cpp::plus<>>
+LIBC_INLINE constexpr T reduce(simd<T, N> v, Op op = {}) {
+ return reduce(v, op);
+}
+template <typename T, size_t N>
+LIBC_INLINE constexpr T reduce(simd<T, N> v, cpp::plus<>) {
+ return __builtin_reduce_add(v);
+}
+template <typename T, size_t N>
+LIBC_INLINE constexpr T reduce(simd<T, N> v, cpp::multiplies<>) {
+ return __builtin_reduce_mul(v);
+}
+template <typename T, size_t N>
+LIBC_INLINE constexpr T reduce(simd<T, N> v, cpp::bit_and<>) {
+ return __builtin_reduce_and(v);
+}
+template <typename T, size_t N>
+LIBC_INLINE constexpr T reduce(simd<T, N> v, cpp::bit_or<>) {
+ return __builtin_reduce_or(v);
+}
+template <typename T, size_t N>
+LIBC_INLINE constexpr T reduce(simd<T, N> v, cpp::bit_xor<>) {
+ return __builtin_reduce_xor(v);
+}
+template <typename T, size_t N> LIBC_INLINE constexpr T hmin(simd<T, N> v) {
+ return __builtin_reduce_min(v);
+}
+template <typename T, size_t N> LIBC_INLINE constexpr T hmax(simd<T, N> v) {
+ return __builtin_reduce_max(v);
+}
+
+// Accessor helpers.
+template <typename T> LIBC_INLINE T load_unaligned(const void *ptr) {
+ T tmp;
+ __builtin_memcpy(&tmp, ptr, sizeof(T));
+ return tmp;
+}
+template <typename T> LIBC_INLINE T load_aligned(const void *ptr) {
+ return *reinterpret_cast<T *>(__builtin_assume_aligned(ptr, alignof(T)));
+}
+template <typename T> LIBC_INLINE void store_unaligned(T v, void *ptr) {
+ __builtin_memcpy(ptr, &v, sizeof(T));
+}
+template <typename T> LIBC_INLINE T store_aligned(T v, void *ptr) {
+ *reinterpret_cast<T *>(__builtin_assume_aligned(ptr, alignof(T))) = v;
+}
+
+// Construction helpers.
+template <typename T, size_t N> LIBC_INLINE constexpr simd<T, N> splat(T v) {
+ return simd<T, N>(v);
+}
+template <typename T> LIBC_INLINE constexpr simd<T> splat(T v) {
+ return splat<T, simd_size_v<simd<T>>>(v);
+}
+template <typename T, unsigned N>
+LIBC_INLINE constexpr simd<T, N> iota(T base = T(0), T step = T(1)) {
+ fixed_size_simd<T, N> v{};
+ for (unsigned i = 0; i < N; ++i)
+ v[i] = base + T(i) * step;
+ return v;
+}
+template <typename T>
+LIBC_INLINE constexpr simd<T> iota(T base = T(0), T step = T(1)) {
+ return iota<T, simd_size_v<simd<T>>>(base, step);
+}
+
+// Conditional helpers.
+template <typename T, size_t N>
+LIBC_INLINE constexpr simd<T, N> select(simd<bool, N> m, simd<T, N> x,
+ simd<T, N> y) {
+ return m ? x : y;
+}
+
+// TODO: where expressions, scalar overloads, ABI types.
+
+} // namespace cpp
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif
diff --git a/libc/src/__support/macros/attributes.h b/libc/src/__support/macros/attributes.h
index 4ff374b0e4fbd..d350a06125f00 100644
--- a/libc/src/__support/macros/attributes.h
+++ b/libc/src/__support/macros/attributes.h
@@ -73,4 +73,10 @@ LIBC_THREAD_MODE_EXTERNAL.
#define LIBC_PREFERED_TYPE(TYPE)
#endif
+#if __has_attribute(ext_vector_type) && __has_feature(ext_vector_type_boolean)
+#define LIBC_HAS_VECTOR_TYPE 1
+#else
+#define LIBC_HAS_VECTOR_TYPE 0
+#endif
+
#endif // LLVM_LIBC_SRC___SUPPORT_MACROS_ATTRIBUTES_H
diff --git a/libc/src/__support/macros/properties/cpu_features.h b/libc/src/__support/macros/properties/cpu_features.h
index fde30eadfd83b..fc6099ca6ccc5 100644
--- a/libc/src/__support/macros/properties/cpu_features.h
+++ b/libc/src/__support/macros/properties/cpu_features.h
@@ -59,6 +59,10 @@
#endif // LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE
#endif // __ARM_FP
+#if defined(__ARM_NEON)
+#define LIBC_TARGET_CPU_HAS_ARM_NEON
+#endif
+
#if defined(__riscv_flen)
// https://github.com/riscv-non-isa/riscv-c-api-doc/blob/main/src/c-api.adoc
#if defined(__riscv_zfhmin)
diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt
index 809decfbe5f08..5c9f622d44397 100644
--- a/libc/src/string/CMakeLists.txt
+++ b/libc/src/string/CMakeLists.txt
@@ -20,6 +20,7 @@ add_header_library(
libc.hdr.stdint_proxy
libc.src.__support.CPP.bitset
libc.src.__support.CPP.type_traits
+ libc.src.__support.CPP.simd
libc.src.__support.common
${string_config_options}
)
diff --git a/libc/src/string/memory_utils/generic/inline_strlen.h b/libc/src/string/memory_utils/generic/inline_strlen.h
new file mode 100644
index 0000000000000..a7cde9b6fb27b
--- /dev/null
+++ b/libc/src/string/memory_utils/generic/inline_strlen.h
@@ -0,0 +1,52 @@
+//===-- Strlen for generic SIMD types -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_INLINE_STRLEN_H
+#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_INLINE_STRLEN_H
+
+#include "src/__support/CPP/simd.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
+
+// Exploit the underlying integer representation to do a variable shift.
+LIBC_INLINE constexpr cpp::simd_mask<char> shift_mask(cpp::simd_mask<char> m,
+ size_t shift) {
+ using bitmask_ty = cpp::internal::get_as_integer_type_t<cpp::simd_mask<char>>;
+ bitmask_ty r = cpp::bit_cast<bitmask_ty>(m) >> shift;
+ return cpp::bit_cast<cpp::simd_mask<char>>(r);
+}
+
+[[clang::no_sanitize("address")]] LIBC_INLINE size_t
+string_length(const char *src) {
+ constexpr cpp::simd<char> null_byte = cpp::splat('\0');
+
+ size_t alignment = alignof(cpp::simd<char>);
+ const cpp::simd<char> *aligned = reinterpret_cast<const cpp::simd<char> *>(
+ __builtin_align_down(src, alignment));
+
+ cpp::simd<char> chars = cpp::load_aligned<cpp::simd<char>>(aligned);
+ cpp::simd_mask<char> mask = cpp::simd_cast<bool>(chars == null_byte);
+ size_t offset = src - reinterpret_cast<const char *>(aligned);
+ if (cpp::any_of(shift_mask(mask, offset)))
+ return cpp::find_first_set(shift_mask(mask, offset));
+
+ for (;;) {
+ cpp::simd<char> chars = cpp::load_aligned<cpp::simd<char>>(++aligned);
+ cpp::simd_mask<char> mask = cpp::simd_cast<bool>(chars == null_byte);
+ if (cpp::any_of(mask))
+ return cpp::find_first_set(mask);
+ }
+}
+} // namespace internal
+
+namespace string_length_impl = internal;
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_INLINE_STRLEN_H
diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index ce461581b9d95..d31eb67785629 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -23,6 +23,9 @@
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
#if defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ)
+#if LIBC_HAS_VECTOR_TYPE
+#include "src/string/memory_utils/generic/inline_strlen.h"
+#else
#if defined(LIBC_TARGET_ARCH_IS_X86)
#include "src/string/memory_utils/x86_64/inline_strlen.h"
#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_NEON)
@@ -30,7 +33,8 @@
#else
namespace string_length_impl = LIBC_NAMESPACE::wide_read;
#endif
-#endif
+#endif // LIBC_HAS_VECTOR_TYPE
+#endif // defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ)
namespace LIBC_NAMESPACE_DECL {
namespace internal {
More information about the libc-commits
mailing list