[libc-commits] [libc] [llvm] [libc] wcscspn implementation (PR #146158)
via libc-commits
libc-commits at lists.llvm.org
Mon Jun 30 14:47:31 PDT 2025
https://github.com/sribee8 updated https://github.com/llvm/llvm-project/pull/146158
>From 4a610c0b65ba14354ac16ab92583b2c8847289dc Mon Sep 17 00:00:00 2001
From: Sriya Pratipati <sriyap at google.com>
Date: Fri, 27 Jun 2025 21:15:42 +0000
Subject: [PATCH 1/2] [libc] wcscspn implementation
Implemented wcscspn and tests.
---
libc/config/linux/x86_64/entrypoints.txt | 1 +
libc/include/wchar.yaml | 7 +++
libc/src/wchar/CMakeLists.txt | 11 ++++
libc/src/wchar/wcscspn.cpp | 34 ++++++++++++
libc/src/wchar/wcscspn.h | 22 ++++++++
libc/test/src/wchar/CMakeLists.txt | 10 ++++
libc/test/src/wchar/wcscspn_test.cpp | 68 ++++++++++++++++++++++++
7 files changed, 153 insertions(+)
create mode 100644 libc/src/wchar/wcscspn.cpp
create mode 100644 libc/src/wchar/wcscspn.h
create mode 100644 libc/test/src/wchar/wcscspn_test.cpp
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 7a954a480e698..5104a42e1789d 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -376,6 +376,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.wchar.wcspbrk
libc.src.wchar.wcsrchr
libc.src.wchar.wcsspn
+ libc.src.wchar.wcscspn
libc.src.wchar.wmemcmp
libc.src.wchar.wmempcpy
libc.src.wchar.wmemcpy
diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index 9e862ff984494..5bfb5944daca3 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -104,6 +104,13 @@ functions:
arguments:
- type: const wchar_t *
- type: const wchar_t *
+ - name: wcscspn
+ standards:
+ - stdc
+ return_type: size_t
+ arguments:
+ - type: const wchar_t *
+ - type: const wchar_t *
- name: wmemcmp
standards:
- stdc
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index 867aab6755bf6..59e430f7546b1 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -203,6 +203,17 @@ add_entrypoint_object(
libc.hdr.types.size_t
)
+add_entrypoint_object(
+ wcscspn
+ SRCS
+ wcscspn.cpp
+ HDRS
+ wcscspn.h
+ DEPENDS
+ libc.hdr.wchar_macros
+ libc.hdr.types.size_t
+)
+
add_entrypoint_object(
wmemcmp
SRCS
diff --git a/libc/src/wchar/wcscspn.cpp b/libc/src/wchar/wcscspn.cpp
new file mode 100644
index 0000000000000..8869d84cdfdee
--- /dev/null
+++ b/libc/src/wchar/wcscspn.cpp
@@ -0,0 +1,34 @@
+//===-- Implementation of wcscspn -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcscspn.h"
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+bool check(wchar_t c, const wchar_t *s2) {
+ for (int n = 0; s2[n]; ++n) {
+ if (s2[n] == c)
+ return false;
+ }
+ return true;
+}
+LLVM_LIBC_FUNCTION(size_t, wcscspn, (const wchar_t *s1, const wchar_t *s2)) {
+ size_t i = 0;
+ for (; s1[i]; ++i) {
+ if (!check(s1[i], s2))
+ return i;
+ }
+ return i;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcscspn.h b/libc/src/wchar/wcscspn.h
new file mode 100644
index 0000000000000..cffc2b98c3467
--- /dev/null
+++ b/libc/src/wchar/wcscspn.h
@@ -0,0 +1,22 @@
+//===-- Implementation header for wcscspn ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_WCSCSPN_H
+#define LLVM_LIBC_SRC_WCHAR_WCSCSPN_H
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+size_t wcscspn(const wchar_t *s1, const wchar_t *s2);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_WCSCSPN_H
diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index 02949c68d81dd..c87c60c8d93db 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -173,6 +173,16 @@ add_libc_test(
libc.src.wchar.wcsspn
)
+add_libc_test(
+ wcscspn_test
+ SUITE
+ libc_wchar_unittests
+ SRCS
+ wcscspn_test.cpp
+ DEPENDS
+ libc.src.wchar.wcscspn
+)
+
add_libc_test(
wmemchr_test
SUITE
diff --git a/libc/test/src/wchar/wcscspn_test.cpp b/libc/test/src/wchar/wcscspn_test.cpp
new file mode 100644
index 0000000000000..cb70decda2834
--- /dev/null
+++ b/libc/test/src/wchar/wcscspn_test.cpp
@@ -0,0 +1,68 @@
+//===-- Unittests for wcscspn
+//----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/wchar/wcscspn.h"
+#include "test/UnitTest/Test.h"
+
+TEST(LlvmLibcWCSCSpnTest, EmptyStringShouldReturnZeroLengthSpan) {
+ // The search should not include the null terminator.
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"", L""), size_t{0});
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"_", L""), size_t{1});
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"", L"_"), size_t{0});
+}
+
+TEST(LlvmLibcWCSCSpnTest, ShouldNotSpanAnythingAfterNullTerminator) {
+ const wchar_t src[4] = {L'a', L'b', L'\0', L'c'};
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(src, L"de"), size_t{2});
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(src, L"c"), size_t{2});
+
+ // Same goes for the segment to be searched for.
+ const wchar_t segment[4] = {L'1', L'2', L'\0', L'3'};
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"3", segment), size_t{1});
+}
+
+TEST(LlvmLibcWCSCSpnTest, SpanEachIndividualCharacter) {
+ const wchar_t *src = L"12345";
+ // These are all in the segment.
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(src, L"1"), size_t{0});
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(src, L"2"), size_t{1});
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(src, L"3"), size_t{2});
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(src, L"4"), size_t{3});
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(src, L"5"), size_t{4});
+}
+
+TEST(LlvmLibcWCSCSpnTest, UnmatchedCharacterShouldReturnLength) {
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"a", L"b"), size_t{1});
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"abcdef", L"1"), size_t{6});
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"123", L"4"), size_t{3});
+}
+
+TEST(LlvmLibcWCSCSpnTest, NonSequentialCharactersShouldNotSpan) {
+ const wchar_t *src = L"abc456789";
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(src, L"_1_abc_2_def_3_"), size_t{0});
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(src, L"67__34xyz12"), size_t{3});
+}
+
+TEST(LlvmLibcWCSCSpnTest, ReverseCharacters) {
+ // These are all in the string.
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"12345", L"54321"), size_t{0});
+ // 1 is not in the span.
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"12345", L"432"), size_t{1});
+ // 1 is in the span.
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"12345", L"51"), size_t{0});
+}
+
+TEST(LlvmLibcWCSCSpnTest, DuplicatedCharactersToBeSearchedForShouldStillMatch) {
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"a", L"aa"), size_t{0});
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"aa", L"aa"), size_t{0});
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"aaa", L"bb"), size_t{3});
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"aaaa", L"bb"), size_t{4});
+}
>From baea2f5ccbda001a5c9f99a4ad7c30ae9df8aad4 Mon Sep 17 00:00:00 2001
From: Sriya Pratipati <sriyap at google.com>
Date: Mon, 30 Jun 2025 21:47:16 +0000
Subject: [PATCH 2/2] added wcscspn to bazel and fixed size_t initialization
---
libc/test/src/wchar/wcscspn_test.cpp | 49 +++++++++----------
.../llvm-project-overlay/libc/BUILD.bazel | 12 +++++
.../libc/test/src/wchar/BUILD.bazel | 10 ++++
3 files changed, 46 insertions(+), 25 deletions(-)
diff --git a/libc/test/src/wchar/wcscspn_test.cpp b/libc/test/src/wchar/wcscspn_test.cpp
index cb70decda2834..6318302d10057 100644
--- a/libc/test/src/wchar/wcscspn_test.cpp
+++ b/libc/test/src/wchar/wcscspn_test.cpp
@@ -1,5 +1,4 @@
-//===-- Unittests for wcscspn
-//----------------------------------------------===//
+//===-- Unittests for wcscspn ---------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -14,55 +13,55 @@
TEST(LlvmLibcWCSCSpnTest, EmptyStringShouldReturnZeroLengthSpan) {
// The search should not include the null terminator.
- EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"", L""), size_t{0});
- EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"_", L""), size_t{1});
- EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"", L"_"), size_t{0});
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"", L""), size_t(0));
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"_", L""), size_t(1));
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"", L"_"), size_t(0));
}
TEST(LlvmLibcWCSCSpnTest, ShouldNotSpanAnythingAfterNullTerminator) {
const wchar_t src[4] = {L'a', L'b', L'\0', L'c'};
- EXPECT_EQ(LIBC_NAMESPACE::wcscspn(src, L"de"), size_t{2});
- EXPECT_EQ(LIBC_NAMESPACE::wcscspn(src, L"c"), size_t{2});
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(src, L"de"), size_t(2));
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(src, L"c"), size_t(2));
// Same goes for the segment to be searched for.
const wchar_t segment[4] = {L'1', L'2', L'\0', L'3'};
- EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"3", segment), size_t{1});
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"3", segment), size_t(1));
}
TEST(LlvmLibcWCSCSpnTest, SpanEachIndividualCharacter) {
const wchar_t *src = L"12345";
// These are all in the segment.
- EXPECT_EQ(LIBC_NAMESPACE::wcscspn(src, L"1"), size_t{0});
- EXPECT_EQ(LIBC_NAMESPACE::wcscspn(src, L"2"), size_t{1});
- EXPECT_EQ(LIBC_NAMESPACE::wcscspn(src, L"3"), size_t{2});
- EXPECT_EQ(LIBC_NAMESPACE::wcscspn(src, L"4"), size_t{3});
- EXPECT_EQ(LIBC_NAMESPACE::wcscspn(src, L"5"), size_t{4});
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(src, L"1"), size_t(0));
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(src, L"2"), size_t(1));
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(src, L"3"), size_t(2));
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(src, L"4"), size_t(3));
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(src, L"5"), size_t(4));
}
TEST(LlvmLibcWCSCSpnTest, UnmatchedCharacterShouldReturnLength) {
- EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"a", L"b"), size_t{1});
- EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"abcdef", L"1"), size_t{6});
- EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"123", L"4"), size_t{3});
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"a", L"b"), size_t(1));
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"abcdef", L"1"), size_t(6));
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"123", L"4"), size_t(3));
}
TEST(LlvmLibcWCSCSpnTest, NonSequentialCharactersShouldNotSpan) {
const wchar_t *src = L"abc456789";
- EXPECT_EQ(LIBC_NAMESPACE::wcscspn(src, L"_1_abc_2_def_3_"), size_t{0});
- EXPECT_EQ(LIBC_NAMESPACE::wcscspn(src, L"67__34xyz12"), size_t{3});
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(src, L"_1_abc_2_def_3_"), size_t(0));
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(src, L"67__34xyz12"), size_t(3));
}
TEST(LlvmLibcWCSCSpnTest, ReverseCharacters) {
// These are all in the string.
- EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"12345", L"54321"), size_t{0});
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"12345", L"54321"), size_t(0));
// 1 is not in the span.
- EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"12345", L"432"), size_t{1});
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"12345", L"432"), size_t(1));
// 1 is in the span.
- EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"12345", L"51"), size_t{0});
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"12345", L"51"), size_t(0));
}
TEST(LlvmLibcWCSCSpnTest, DuplicatedCharactersToBeSearchedForShouldStillMatch) {
- EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"a", L"aa"), size_t{0});
- EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"aa", L"aa"), size_t{0});
- EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"aaa", L"bb"), size_t{3});
- EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"aaaa", L"bb"), size_t{4});
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"a", L"aa"), size_t(0));
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"aa", L"aa"), size_t(0));
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"aaa", L"bb"), size_t(3));
+ EXPECT_EQ(LIBC_NAMESPACE::wcscspn(L"aaaa", L"bb"), size_t(4));
}
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index b4d1bcdb03f48..2484a2f1e2bd7 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -5815,6 +5815,18 @@ libc_function(
],
)
+libc_function(
+ name = "wcscspn",
+ srcs = ["src/wchar/wcscspn.cpp"],
+ hdrs = ["src/wchar/wcscspn.h"],
+ deps = [
+ ":__support_common",
+ ":__support_macros_config",
+ ":types_size_t",
+ ":types_wchar_t",
+ ],
+)
+
libc_function(
name = "wcslen",
srcs = ["src/wchar/wcslen.cpp"],
diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/wchar/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/wchar/BUILD.bazel
index 25243ae7c2b45..f94065237ef4f 100644
--- a/utils/bazel/llvm-project-overlay/libc/test/src/wchar/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/test/src/wchar/BUILD.bazel
@@ -73,6 +73,16 @@ libc_test(
],
)
+libc_test(
+ name = "wcscspn_test",
+ srcs = ["wcscspn_test.cpp"],
+ deps = [
+ "//libc:types_size_t",
+ "//libc:types_wchar_t",
+ "//libc:wcscspn",
+ ],
+)
+
libc_test(
name = "wcslen_test",
srcs = ["wcslen_test.cpp"],
More information about the libc-commits
mailing list