[libc-commits] [libc] [libc] wcsspn implementation (PR #142034)
via libc-commits
libc-commits at lists.llvm.org
Thu May 29 13:55:43 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-libc
Author: None (sribee8)
<details>
<summary>Changes</summary>
Implemented wcsspn as well as tests for the function.
---
Full diff: https://github.com/llvm/llvm-project/pull/142034.diff
7 Files Affected:
- (modified) libc/config/linux/x86_64/entrypoints.txt (+1)
- (modified) libc/include/wchar.yaml (+7)
- (modified) libc/src/wchar/CMakeLists.txt (+12)
- (added) libc/src/wchar/wcsspn.cpp (+35)
- (added) libc/src/wchar/wcsspn.h (+22)
- (modified) libc/test/src/wchar/CMakeLists.txt (+10)
- (added) libc/test/src/wchar/wcsspn_test.cpp (+86)
``````````diff
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index c1ba26008ca9c..2a35988ed24f4 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -366,6 +366,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.wchar.wctob
libc.src.wchar.wmemset
libc.src.wchar.wcschr
+ libc.src.wchar.wcsspn
# sys/uio.h entrypoints
libc.src.sys.uio.writev
diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index 0342c726146b3..94b9ed89c1e45 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -42,3 +42,10 @@ functions:
arguments:
- type: const wchar_t *
- type: wchar_t
+ - name: wcsspn
+ standards:
+ - stdc
+ return_type: size_t
+ arguments:
+ - type: const wchar_t *
+ - type: const wchar_t *
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index caec1a3f3266f..dcafd36978e49 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -56,3 +56,15 @@ add_entrypoint_object(
libc.hdr.wchar_macros
libc.src.__support.wctype_utils
)
+
+add_entrypoint_object(
+ wcsspn
+ SRCS
+ wcsspn.cpp
+ HDRS
+ wcsspn.h
+ DEPENDS
+ libc.hdr.wchar_macros
+ libc.hdr.types.size_t
+ libc.src.__support.wctype_utils
+)
diff --git a/libc/src/wchar/wcsspn.cpp b/libc/src/wchar/wcsspn.cpp
new file mode 100644
index 0000000000000..8bbb7096c0344
--- /dev/null
+++ b/libc/src/wchar/wcsspn.cpp
@@ -0,0 +1,35 @@
+//===-- Implementation of wcsspn ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcsspn.h"
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/CPP/bitset.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(size_t, wcsspn, (const wchar_t *s1, const wchar_t *s2)) {
+ size_t i = 0;
+ int in_s2 = 0;
+ for (; s1[i]; ++i) {
+ for (int n = 0; s2[n] && in_s2 == 0; ++n) {
+ if (s1[i] == s2[n])
+ in_s2 = 1;
+ }
+ if (in_s2 == 0) {
+ return i;
+ }
+ in_s2 = 0;
+ }
+ return i;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcsspn.h b/libc/src/wchar/wcsspn.h
new file mode 100644
index 0000000000000..6dbe65d9a4a98
--- /dev/null
+++ b/libc/src/wchar/wcsspn.h
@@ -0,0 +1,22 @@
+//===-- Implementation header for wcsspn ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_WCSSPN_H
+#define LLVM_LIBC_SRC_WCHAR_WCSSPN_H
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+size_t wcsspn(const wchar_t *s1, const wchar_t *s2);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_WCSCHR_H
diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index 4c75f070cf400..2b99a94d14a7e 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -54,3 +54,13 @@ add_libc_test(
DEPENDS
libc.src.wchar.wcschr
)
+
+add_libc_test(
+ wcsspn_test
+ SUITE
+ libc_wchar_unittests
+ SRCS
+ wcsspn_test.cpp
+ DEPENDS
+ libc.src.wchar.wcsspn
+)
diff --git a/libc/test/src/wchar/wcsspn_test.cpp b/libc/test/src/wchar/wcsspn_test.cpp
new file mode 100644
index 0000000000000..fbcc35f2f63c3
--- /dev/null
+++ b/libc/test/src/wchar/wcsspn_test.cpp
@@ -0,0 +1,86 @@
+//===-- Unittests for wcsspn ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/wchar/wcsspn.h"
+#include "test/UnitTest/Test.h"
+
+TEST(LlvmLibcWCSSpnTest, EmptyStringShouldReturnZeroLengthSpan) {
+ // The search should not include the null terminator.
+ EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"", L""), size_t{0});
+ EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"_", L""), size_t{0});
+ EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"", L"_"), size_t{0});
+}
+
+TEST(LlvmLibcWCSSpnTest, ShouldNotSpanAnythingAfterNullTerminator) {
+ const wchar_t src[4] = {'a', 'b', '\0', 'c'};
+ EXPECT_EQ(LIBC_NAMESPACE::wcsspn(src, L"ab"), size_t{2});
+ EXPECT_EQ(LIBC_NAMESPACE::wcsspn(src, L"c"), size_t{0});
+
+ // Same goes for the segment to be searched for.
+ const wchar_t segment[4] = {'1', '2', '\0', '3'};
+ EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"123", segment), size_t{2});
+}
+
+TEST(LlvmLibcWCSSpnTest, SpanEachIndividualCharacter) {
+ const wchar_t *src = L"12345";
+ EXPECT_EQ(LIBC_NAMESPACE::wcsspn(src, L"1"), size_t{1});
+ // Since '1' is not within the segment, the span
+ // size should remain zero.
+ EXPECT_EQ(LIBC_NAMESPACE::wcsspn(src, L"2"), size_t{0});
+ EXPECT_EQ(LIBC_NAMESPACE::wcsspn(src, L"3"), size_t{0});
+ EXPECT_EQ(LIBC_NAMESPACE::wcsspn(src, L"4"), size_t{0});
+ EXPECT_EQ(LIBC_NAMESPACE::wcsspn(src, L"5"), size_t{0});
+}
+
+TEST(LlvmLibcWCSSpnTest, UnmatchedCharacterShouldNotBeCountedInSpan) {
+ EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"a", L"b"), size_t{0});
+ EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"abcdef", L"1"), size_t{0});
+ EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"123", L"4"), size_t{0});
+}
+
+TEST(LlvmLibcWCSSpnTest, SequentialCharactersShouldSpan) {
+ const wchar_t *src = L"abcde";
+ EXPECT_EQ(LIBC_NAMESPACE::wcsspn(src, L"a"), size_t{1});
+ EXPECT_EQ(LIBC_NAMESPACE::wcsspn(src, L"ab"), size_t{2});
+ EXPECT_EQ(LIBC_NAMESPACE::wcsspn(src, L"abc"), size_t{3});
+ EXPECT_EQ(LIBC_NAMESPACE::wcsspn(src, L"abcd"), size_t{4});
+ EXPECT_EQ(LIBC_NAMESPACE::wcsspn(src, L"abcde"), size_t{5});
+ // Same thing for when the roles are reversed.
+ EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"abcde", src), size_t{5});
+ EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"abcd", src), size_t{4});
+ EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"abc", src), size_t{3});
+ EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"ab", src), size_t{2});
+ EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"a", src), size_t{1});
+}
+
+TEST(LlvmLibcWCSSpnTest, NonSequentialCharactersShouldNotSpan) {
+ const wchar_t *src = L"123456789";
+ EXPECT_EQ(LIBC_NAMESPACE::wcsspn(src, L"_1_abc_2_def_3_"), size_t{3});
+ // Only spans 4 since '5' is not within the span.
+ EXPECT_EQ(LIBC_NAMESPACE::wcsspn(src, L"67__34abc12"), size_t{4});
+}
+
+TEST(LlvmLibcWCSSpnTest, ReverseCharacters) {
+ // Since these are still sequential, this should span.
+ EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"12345", L"54321"), size_t{5});
+ // Does not span any since '1' is not within the span.
+ EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"12345", L"432"), size_t{0});
+ // Only spans 1 since '2' is not within the span.
+ EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"12345", L"51"), size_t{1});
+}
+
+TEST(LlvmLibcWCSSpnTest, DuplicatedCharactersToBeSearchedForShouldStillMatch) {
+ // Only a single character, so only spans 1.
+ EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"a", L"aa"), size_t{1});
+ // This should count once for each 'a' in the source string.
+ EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"aa", L"aa"), size_t{2});
+ EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"aaa", L"aa"), size_t{3});
+ EXPECT_EQ(LIBC_NAMESPACE::wcsspn(L"aaaa", L"aa"), size_t{4});
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/142034
More information about the libc-commits
mailing list