[libc-commits] [libc] [libc] wcsspn implementation (PR #142034)

via libc-commits libc-commits at lists.llvm.org
Fri May 30 10:39:13 PDT 2025


https://github.com/sribee8 updated https://github.com/llvm/llvm-project/pull/142034

>From b63141a1a25d19d57df898312d8bf4a29625a73a Mon Sep 17 00:00:00 2001
From: Sriya Pratipati <sriyap at google.com>
Date: Thu, 29 May 2025 20:53:31 +0000
Subject: [PATCH 1/2] [libc] wcsspn implementation

Implemented wcsspn as well as tests for the function.
---
 libc/config/linux/x86_64/entrypoints.txt |  1 +
 libc/include/wchar.yaml                  |  7 ++
 libc/src/wchar/CMakeLists.txt            | 12 ++++
 libc/src/wchar/wcsspn.cpp                | 35 ++++++++++
 libc/src/wchar/wcsspn.h                  | 22 ++++++
 libc/test/src/wchar/CMakeLists.txt       | 10 +++
 libc/test/src/wchar/wcsspn_test.cpp      | 86 ++++++++++++++++++++++++
 7 files changed, 173 insertions(+)
 create mode 100644 libc/src/wchar/wcsspn.cpp
 create mode 100644 libc/src/wchar/wcsspn.h
 create mode 100644 libc/test/src/wchar/wcsspn_test.cpp

diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index c1ba26008ca9c..2a35988ed24f4 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -366,6 +366,7 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.wchar.wctob
     libc.src.wchar.wmemset
     libc.src.wchar.wcschr
+    libc.src.wchar.wcsspn
 
     # sys/uio.h entrypoints
     libc.src.sys.uio.writev
diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index 0342c726146b3..94b9ed89c1e45 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -42,3 +42,10 @@ functions:
     arguments: 
       - type: const wchar_t *
       - type: wchar_t
+  - name: wcsspn
+    standards:
+      - stdc
+    return_type: size_t
+    arguments: 
+      - type: const wchar_t *
+      - type: const wchar_t *
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index caec1a3f3266f..dcafd36978e49 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -56,3 +56,15 @@ add_entrypoint_object(
     libc.hdr.wchar_macros
     libc.src.__support.wctype_utils
 )
+
+add_entrypoint_object(
+  wcsspn
+  SRCS
+    wcsspn.cpp
+  HDRS
+    wcsspn.h
+  DEPENDS
+    libc.hdr.wchar_macros
+    libc.hdr.types.size_t
+    libc.src.__support.wctype_utils
+)
diff --git a/libc/src/wchar/wcsspn.cpp b/libc/src/wchar/wcsspn.cpp
new file mode 100644
index 0000000000000..8bbb7096c0344
--- /dev/null
+++ b/libc/src/wchar/wcsspn.cpp
@@ -0,0 +1,35 @@
+//===-- Implementation of wcsspn ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcsspn.h"
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/CPP/bitset.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(size_t, wcsspn, (const wchar_t *s1, const wchar_t *s2)) {
+  size_t i = 0;
+  int in_s2 = 0;
+  for (; s1[i]; ++i) {
+    for (int n = 0; s2[n] && in_s2 == 0; ++n) {
+      if (s1[i] == s2[n])
+        in_s2 = 1;
+    }
+    if (in_s2 == 0) {
+      return i;
+    }
+    in_s2 = 0;
+  }
+  return i;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcsspn.h b/libc/src/wchar/wcsspn.h
new file mode 100644
index 0000000000000..6dbe65d9a4a98
--- /dev/null
+++ b/libc/src/wchar/wcsspn.h
@@ -0,0 +1,22 @@
+//===-- Implementation header for wcsspn ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_WCSSPN_H
+#define LLVM_LIBC_SRC_WCHAR_WCSSPN_H
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+size_t wcsspn(const wchar_t *s1, const wchar_t *s2);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_WCSCHR_H
diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index 4c75f070cf400..2b99a94d14a7e 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -54,3 +54,13 @@ add_libc_test(
   DEPENDS
     libc.src.wchar.wcschr
 )
+
+add_libc_test(
+  wcsspn_test
+  SUITE
+    libc_wchar_unittests
+  SRCS
+    wcsspn_test.cpp
+  DEPENDS
+    libc.src.wchar.wcsspn
+)
diff --git a/libc/test/src/wchar/wcsspn_test.cpp b/libc/test/src/wchar/wcsspn_test.cpp
new file mode 100644
index 0000000000000..fbcc35f2f63c3
--- /dev/null
+++ b/libc/test/src/wchar/wcsspn_test.cpp
@@ -0,0 +1,86 @@
+//===-- Unittests for wcsspn ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/wchar/wcsspn.h"
+#include "test/UnitTest/Test.h"
+
+TEST(LlvmLibcWCSSpnTest, EmptyStringShouldReturnZeroLengthSpan) {
+  // The search should not include the null terminator.
+  EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"", L""), size_t{0});
+  EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"_", L""), size_t{0});
+  EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"", L"_"), size_t{0});
+}
+
+TEST(LlvmLibcWCSSpnTest, ShouldNotSpanAnythingAfterNullTerminator) {
+  const wchar_t src[4] = {'a', 'b', '\0', 'c'};
+  EXPECT_EQ(LIBC_NAMESPACE::wcsspn(src, L"ab"), size_t{2});
+  EXPECT_EQ(LIBC_NAMESPACE::wcsspn(src, L"c"), size_t{0});
+
+  // Same goes for the segment to be searched for.
+  const wchar_t segment[4] = {'1', '2', '\0', '3'};
+  EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"123", segment), size_t{2});
+}
+
+TEST(LlvmLibcWCSSpnTest, SpanEachIndividualCharacter) {
+  const wchar_t *src = L"12345";
+  EXPECT_EQ(LIBC_NAMESPACE::wcsspn(src, L"1"), size_t{1});
+  // Since '1' is not within the segment, the span
+  // size should remain zero.
+  EXPECT_EQ(LIBC_NAMESPACE::wcsspn(src, L"2"), size_t{0});
+  EXPECT_EQ(LIBC_NAMESPACE::wcsspn(src, L"3"), size_t{0});
+  EXPECT_EQ(LIBC_NAMESPACE::wcsspn(src, L"4"), size_t{0});
+  EXPECT_EQ(LIBC_NAMESPACE::wcsspn(src, L"5"), size_t{0});
+}
+
+TEST(LlvmLibcWCSSpnTest, UnmatchedCharacterShouldNotBeCountedInSpan) {
+  EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"a", L"b"), size_t{0});
+  EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"abcdef", L"1"), size_t{0});
+  EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"123", L"4"), size_t{0});
+}
+
+TEST(LlvmLibcWCSSpnTest, SequentialCharactersShouldSpan) {
+  const wchar_t *src = L"abcde";
+  EXPECT_EQ(LIBC_NAMESPACE::wcsspn(src, L"a"), size_t{1});
+  EXPECT_EQ(LIBC_NAMESPACE::wcsspn(src, L"ab"), size_t{2});
+  EXPECT_EQ(LIBC_NAMESPACE::wcsspn(src, L"abc"), size_t{3});
+  EXPECT_EQ(LIBC_NAMESPACE::wcsspn(src, L"abcd"), size_t{4});
+  EXPECT_EQ(LIBC_NAMESPACE::wcsspn(src, L"abcde"), size_t{5});
+  // Same thing for when the roles are reversed.
+  EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"abcde", src), size_t{5});
+  EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"abcd", src), size_t{4});
+  EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"abc", src), size_t{3});
+  EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"ab", src), size_t{2});
+  EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"a", src), size_t{1});
+}
+
+TEST(LlvmLibcWCSSpnTest, NonSequentialCharactersShouldNotSpan) {
+  const wchar_t *src = L"123456789";
+  EXPECT_EQ(LIBC_NAMESPACE::wcsspn(src, L"_1_abc_2_def_3_"), size_t{3});
+  // Only spans 4 since '5' is not within the span.
+  EXPECT_EQ(LIBC_NAMESPACE::wcsspn(src, L"67__34abc12"), size_t{4});
+}
+
+TEST(LlvmLibcWCSSpnTest, ReverseCharacters) {
+  // Since these are still sequential, this should span.
+  EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"12345", L"54321"), size_t{5});
+  // Does not span any since '1' is not within the span.
+  EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"12345", L"432"), size_t{0});
+  // Only spans 1 since '2' is not within the span.
+  EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"12345", L"51"), size_t{1});
+}
+
+TEST(LlvmLibcWCSSpnTest, DuplicatedCharactersToBeSearchedForShouldStillMatch) {
+  // Only a single character, so only spans 1.
+  EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"a", L"aa"), size_t{1});
+  // This should count once for each 'a' in the source string.
+  EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"aa", L"aa"), size_t{2});
+  EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"aaa", L"aa"), size_t{3});
+  EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"aaaa", L"aa"), size_t{4});
+}

>From 040f3eb9ce4b4c38edd21c9010376cd5860de63f Mon Sep 17 00:00:00 2001
From: Sriya Pratipati <sriyap at google.com>
Date: Thu, 29 May 2025 23:05:42 +0000
Subject: [PATCH 2/2] Removed unused header file

---
 libc/src/wchar/wcsspn.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libc/src/wchar/wcsspn.cpp b/libc/src/wchar/wcsspn.cpp
index 8bbb7096c0344..2c59346858356 100644
--- a/libc/src/wchar/wcsspn.cpp
+++ b/libc/src/wchar/wcsspn.cpp
@@ -10,7 +10,6 @@
 
 #include "hdr/types/size_t.h"
 #include "hdr/types/wchar_t.h"
-#include "src/__support/CPP/bitset.h"
 #include "src/__support/common.h"
 #include "src/__support/macros/config.h"
 



More information about the libc-commits mailing list