[libc-commits] [libc] [libc] add an SVE implementation of strlen (PR #167259)

Schrodinger ZHU Yifan via libc-commits libc-commits at lists.llvm.org
Sun Nov 9 17:17:31 PST 2025


https://github.com/SchrodingerZhu updated https://github.com/llvm/llvm-project/pull/167259

>From 2d40ffb0d37f0bc15ae967ccd1830de518058248 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Sun, 9 Nov 2025 20:09:04 -0500
Subject: [PATCH 1/2] [libc] add an SVE implementation of strlen

---
 libc/fuzzing/__support/freelist_heap_fuzz.cpp |  2 +-
 libc/fuzzing/string/CMakeLists.txt            |  8 +++
 libc/fuzzing/string/strlen_fuzz.cpp           | 32 ++++++++++
 .../memory_utils/aarch64/inline_strlen.h      | 63 +++++++++++++++++--
 libc/src/string/string_utils.h                | 14 ++++-
 libc/test/src/string/strlen_test.cpp          | 12 ++++
 6 files changed, 122 insertions(+), 9 deletions(-)
 create mode 100644 libc/fuzzing/string/strlen_fuzz.cpp

diff --git a/libc/fuzzing/__support/freelist_heap_fuzz.cpp b/libc/fuzzing/__support/freelist_heap_fuzz.cpp
index 7b7985a83c3e6..0b400cb156491 100644
--- a/libc/fuzzing/__support/freelist_heap_fuzz.cpp
+++ b/libc/fuzzing/__support/freelist_heap_fuzz.cpp
@@ -24,7 +24,7 @@ asm(R"(
 _end:
   .fill 1024
 __llvm_libc_heap_limit:
-)";
+)");
 
 using LIBC_NAMESPACE::FreeListHeap;
 using LIBC_NAMESPACE::inline_memset;
diff --git a/libc/fuzzing/string/CMakeLists.txt b/libc/fuzzing/string/CMakeLists.txt
index efda80b59c951..0918e92552ea7 100644
--- a/libc/fuzzing/string/CMakeLists.txt
+++ b/libc/fuzzing/string/CMakeLists.txt
@@ -40,3 +40,11 @@ add_libc_fuzzer(
   DEPENDS
     libc.src.strings.bcmp
 )
+
+add_libc_fuzzer(
+  strlen_fuzz
+  SRCS
+    strlen_fuzz.cpp
+  DEPENDS
+    libc.src.string.strlen
+)
diff --git a/libc/fuzzing/string/strlen_fuzz.cpp b/libc/fuzzing/string/strlen_fuzz.cpp
new file mode 100644
index 0000000000000..dd72c19b7fdc7
--- /dev/null
+++ b/libc/fuzzing/string/strlen_fuzz.cpp
@@ -0,0 +1,32 @@
+//===-- strlen_fuzz.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// Fuzzing test for llvm-libc strlen implementation.
+///
+//===----------------------------------------------------------------------===//
+
+#include "src/string/strlen.h"
+#include <cstdint>
+#include <cstring>
+
+// always null terminate the data
+extern "C" size_t LLVMFuzzerMutate(uint8_t *data, size_t size, size_t max_size);
+extern "C" size_t LLVMFuzzerCustomMutator(uint8_t *data, size_t size,
+                                          size_t max_size, unsigned int seed) {
+  size = LLVMFuzzerMutate(data, size, max_size);
+  data[size - 1] = '\0';
+  return size;
+}
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+  size_t ref = ::strlen(reinterpret_cast<const char *>(data));
+  size_t impl = LIBC_NAMESPACE::strlen(reinterpret_cast<const char *>(data));
+  if (ref != impl)
+    __builtin_trap();
+  return 0;
+}
diff --git a/libc/src/string/memory_utils/aarch64/inline_strlen.h b/libc/src/string/memory_utils/aarch64/inline_strlen.h
index 87f5ccdd56e23..5d6dfec7e91e5 100644
--- a/libc/src/string/memory_utils/aarch64/inline_strlen.h
+++ b/libc/src/string/memory_utils/aarch64/inline_strlen.h
@@ -8,14 +8,13 @@
 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_STRLEN_H
 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_STRLEN_H
 
+#include "src/__support/macros/properties/cpu_features.h"
+
 #if defined(__ARM_NEON)
 #include "src/__support/CPP/bit.h" // countr_zero
-
 #include <arm_neon.h>
 #include <stddef.h> // size_t
-
 namespace LIBC_NAMESPACE_DECL {
-
 namespace neon {
 [[maybe_unused]] LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE static size_t
 string_length(const char *src) {
@@ -45,9 +44,63 @@ string_length(const char *src) {
   }
 }
 } // namespace neon
+} // namespace LIBC_NAMESPACE_DECL
+#endif // __ARM_NEON
 
-namespace string_length_impl = neon;
+#ifdef LIBC_TARGET_CPU_HAS_SVE
+#include "src/__support/macros/optimization.h"
+#include <arm_sve.h>
+namespace LIBC_NAMESPACE_DECL {
+namespace sve {
+[[maybe_unused]] LIBC_INLINE static size_t string_length(const char *src) {
+  const uint8_t *ptr = reinterpret_cast<const uint8_t *>(src);
+  // Initialize the first-fault register to all true
+  svsetffr();
+  const svbool_t all_true = svptrue_b8(); // all true predicate
+  svbool_t cmp_zero;
+  size_t len = 0;
 
+  for (;;) {
+    // Read a vector's worth of bytes, stopping on first fault.
+    svuint8_t data = svldff1_u8(all_true, ptr);
+    svbool_t fault_mask = svrdffr_z(all_true);
+    bool has_no_fault = svptest_last(all_true, fault_mask);
+    if (LIBC_LIKELY(has_no_fault)) {
+      // First fault did not fail: the whole vector is valid.
+      // Avoid depending on the contents of FFR beyond the branch.
+      len += svcntb(); // speculative increment
+      cmp_zero = svcmpeq_n_u8(all_true, data, 0);
+      bool has_no_zero = !svptest_any(all_true, cmp_zero);
+      if (LIBC_LIKELY(has_no_zero))
+        continue;
+      len -= svcntb(); // undo speculative increment
+      break;
+    } else {
+      // First fault failed: only some of the vector is valid.
+      // Perform the comparison only on the valid bytes.
+      cmp_zero = svcmpeq_n_u8(fault_mask, data, 0);
+      bool has_zero = svptest_any(fault_mask, cmp_zero);
+      if (LIBC_LIKELY(has_zero))
+        break;
+      svsetffr();
+      len += svcntp_b8(all_true, fault_mask);
+      continue;
+    }
+  }
+  // Select the bytes before the first and count them.
+  svbool_t before_zero = svbrkb_z(all_true, cmp_zero);
+  len += svcntp_b8(all_true, before_zero);
+  return len;
+}
+} // namespace sve
+} // namespace LIBC_NAMESPACE_DECL
+#endif // LIBC_TARGET_CPU_HAS_SVE
+
+namespace LIBC_NAMESPACE_DECL {
+#ifdef LIBC_TARGET_CPU_HAS_SVE
+namespace string_length_impl = sve;
+#elif defined(__ARM_NEON)
+namespace string_length_impl = neon;
+#endif
 } // namespace LIBC_NAMESPACE_DECL
-#endif // __ARM_NEON
 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_STRLEN_H
diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index cbce62ead0328..c4984883addb7 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -22,9 +22,17 @@
 #include "src/__support/macros/attributes.h"
 #include "src/__support/macros/config.h"
 #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+#include "src/__support/macros/properties/cpu_features.h"
 #include "src/string/memory_utils/inline_memcpy.h"
 
-#if defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ)
+#if !defined(LIBC_TARGET_CPU_HAS_SVE)
+#error "SVE is not supported on this CPU"
+#endif
+
+// SVE implementation has fault safety
+#if defined(LIBC_TARGET_CPU_HAS_SVE)
+#include "src/string/memory_utils/aarch64/inline_strlen.h"
+#elif defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ)
 #if LIBC_HAS_VECTOR_TYPE
 #include "src/string/memory_utils/generic/inline_strlen.h"
 #elif defined(LIBC_TARGET_ARCH_IS_X86)
@@ -33,8 +41,8 @@
 #include "src/string/memory_utils/aarch64/inline_strlen.h"
 #else
 namespace string_length_impl = LIBC_NAMESPACE::wide_read;
-#endif
-#endif // defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ)
+#endif // LIBC_TARGET_CPU_HAS_SVE
+#endif // defined(LIBC_TARGET_CPU_HAS_SVE)
 
 namespace LIBC_NAMESPACE_DECL {
 namespace internal {
diff --git a/libc/test/src/string/strlen_test.cpp b/libc/test/src/string/strlen_test.cpp
index 4eb9d47e9209d..784dd7b194b3f 100644
--- a/libc/test/src/string/strlen_test.cpp
+++ b/libc/test/src/string/strlen_test.cpp
@@ -22,3 +22,15 @@ TEST(LlvmLibcStrLenTest, AnyString) {
   size_t result = LIBC_NAMESPACE::strlen(any);
   ASSERT_EQ((size_t)12, result);
 }
+
+TEST(LlvmLibcStrLenTest, DataAfterNulString) {
+  constexpr char A[10] = {'a', 'b', 'c', 'd', 'e', 'f', 0, 'h', 'i', 'j'};
+  size_t result = LIBC_NAMESPACE::strlen(A);
+  ASSERT_EQ((size_t)6, result);
+}
+
+TEST(LlvmLibcStrLenTest, MultipleNulsInOneWord) {
+  constexpr char A[10] = {'a', 'b', 0, 'd', 'e', 'f', 0, 'h', 'i', 'j'};
+  size_t result = LIBC_NAMESPACE::strlen(A);
+  ASSERT_EQ((size_t)2, result);
+}

>From 90aae3f6f3f8c0abba3ea058d236ae066f5effed Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Sun, 9 Nov 2025 20:17:19 -0500
Subject: [PATCH 2/2] [libc] remove my debug macro

---
 libc/src/string/string_utils.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index c4984883addb7..bd45bddf0b2a8 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -25,10 +25,6 @@
 #include "src/__support/macros/properties/cpu_features.h"
 #include "src/string/memory_utils/inline_memcpy.h"
 
-#if !defined(LIBC_TARGET_CPU_HAS_SVE)
-#error "SVE is not supported on this CPU"
-#endif
-
 // SVE implementation has fault safety
 #if defined(LIBC_TARGET_CPU_HAS_SVE)
 #include "src/string/memory_utils/aarch64/inline_strlen.h"



More information about the libc-commits mailing list