[libc-commits] [libc] [libc] wcsstr implementation (PR #142440)
via libc-commits
libc-commits at lists.llvm.org
Mon Jun 2 13:15:54 PDT 2025
https://github.com/sribee8 updated https://github.com/llvm/llvm-project/pull/142440
>From f01229cd4b231165d641ecf0579b3db77f9f9cfc Mon Sep 17 00:00:00 2001
From: Sriya Pratipati <sriyap at google.com>
Date: Mon, 2 Jun 2025 17:29:28 +0000
Subject: [PATCH 1/2] [libc] wcsstr implementation
Implemented wcsstr and tests.
---
libc/config/linux/x86_64/entrypoints.txt | 1 +
libc/include/wchar.yaml | 7 ++
libc/src/wchar/CMakeLists.txt | 12 +++
libc/src/wchar/wcsstr.cpp | 39 ++++++++
libc/src/wchar/wcsstr.h | 21 +++++
libc/test/src/wchar/CMakeLists.txt | 10 ++
libc/test/src/wchar/wcsstr_test.cpp | 113 +++++++++++++++++++++++
7 files changed, 203 insertions(+)
create mode 100644 libc/src/wchar/wcsstr.cpp
create mode 100644 libc/src/wchar/wcsstr.h
create mode 100644 libc/test/src/wchar/wcsstr_test.cpp
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 545b9227349fe..d0ca7f2f7f39f 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -370,6 +370,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.wchar.wcsspn
libc.src.wchar.wmemcmp
libc.src.wchar.wmemcpy
+ libc.src.wchar.wcsstr
# sys/uio.h entrypoints
libc.src.sys.uio.writev
diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index bfd9a10342019..d580de74cae41 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -72,3 +72,10 @@ functions:
- type: __restrict wchar_t *
- type: const __restrict wchar_t *
- type: size_t
+ - name: wcsstr
+ standards:
+ - stdc
+ return_type: const wchar_t *
+ arguments:
+ - type: const wchar_t *
+ - type: const wchar_t *
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index 9db121762348b..cd5c34dce7acc 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -104,3 +104,15 @@ add_entrypoint_object(
libc.hdr.wchar_macros
libc.src.__support.wctype_utils
)
+
+add_entrypoint_object(
+ wcsstr
+ SRCS
+ wcsstr.cpp
+ HDRS
+ wcsstr.h
+ DEPENDS
+ libc.hdr.types.size_t
+ libc.hdr.wchar_macros
+ libc.src.string.string_utils
+)
diff --git a/libc/src/wchar/wcsstr.cpp b/libc/src/wchar/wcsstr.cpp
new file mode 100644
index 0000000000000..312352a3ad5a8
--- /dev/null
+++ b/libc/src/wchar/wcsstr.cpp
@@ -0,0 +1,39 @@
+//===-- Implementation of wcsstr ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcsstr.h"
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/string/string_utils.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(const wchar_t *, wcsstr,
+ (const wchar_t *s1, const wchar_t *s2)) {
+ size_t s1_len = internal::string_length(s1);
+ size_t s2_len = internal::string_length(s2);
+ // If string to be found has length 0, return s1.
+ if (s2_len == 0)
+ return s1;
+ // If string to be found has length longer than s1, return nullptr.
+ if (s2_len > s1_len)
+ return nullptr;
+ for (size_t i = 0; i <= (s1_len - s2_len); ++i) {
+ size_t j = 0;
+ for (; j < s2_len && s1[i + j] == s2[j]; ++j)
+ ;
+ if (j == s2_len)
+ return (s1 + i);
+ }
+ return nullptr;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcsstr.h b/libc/src/wchar/wcsstr.h
new file mode 100644
index 0000000000000..af054d8495a4b
--- /dev/null
+++ b/libc/src/wchar/wcsstr.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for wcsstr ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_WCSSTR_H
+#define LLVM_LIBC_SRC_WCHAR_WCSSTR_H
+
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+const wchar_t *wcsstr(const wchar_t *s1, const wchar_t *s2);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_WCSSTR_H
diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index 9bc230e0bddf3..100be8226fa61 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -94,3 +94,13 @@ add_libc_test(
DEPENDS
libc.src.wchar.wmemcpy
)
+
+add_libc_test(
+ wcsstr_test
+ SUITE
+ libc_wchar_unittests
+ SRCS
+ wcsstr_test.cpp
+ DEPENDS
+ libc.src.wchar.wcsstr
+)
diff --git a/libc/test/src/wchar/wcsstr_test.cpp b/libc/test/src/wchar/wcsstr_test.cpp
new file mode 100644
index 0000000000000..c1448bbaf1abf
--- /dev/null
+++ b/libc/test/src/wchar/wcsstr_test.cpp
@@ -0,0 +1,113 @@
+//===-- Unittests for wcsstr ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/types/wchar_t.h"
+#include "src/wchar/wcsstr.h"
+#include "test/UnitTest/Test.h"
+
+TEST(LlvmLibcWCSStrTest, NeedleNotInHaystack) {
+ // Should return nullptr if string is not found.
+ const wchar_t *haystack = L"12345";
+ const wchar_t *needle = L"a";
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), nullptr);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedleIsEmptyString) {
+ // Should return pointer to first character if needle is empty.
+ const wchar_t *haystack = L"12345";
+ const wchar_t *needle = L"";
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack);
+}
+
+TEST(LlvmLibcWCSStrTest, HaystackIsEmptyString) {
+ // Should return nullptr since haystack is empty.
+ const wchar_t *needle = L"12345";
+ const wchar_t *haystack = L"";
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), nullptr);
+}
+
+TEST(LlvmLibcWCSStrTest, HaystackAndNeedleAreEmptyStrings) {
+ // Should point to haystack since needle is empty.
+ const wchar_t *needle = L"";
+ const wchar_t *haystack = L"";
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack);
+}
+
+TEST(LlvmLibcWCSStrTest, HaystackAndNeedleAreSingleCharacters) {
+ const wchar_t *haystack = L"a";
+ // Should point to haystack.
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"a"), haystack);
+ // Should return nullptr.
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"b"), nullptr);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedleEqualToHaystack) {
+ const wchar_t *haystack = L"12345";
+ // Should point to haystack.
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"12345"), haystack);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedleLargerThanHaystack) {
+ const wchar_t *haystack = L"123";
+ // Should return nullptr.
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"12345"), nullptr);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedleAtBeginning) {
+ const wchar_t *haystack = L"12345";
+ const wchar_t *needle = L"12";
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedleInMiddle) {
+ const wchar_t *haystack = L"abcdefghi";
+ const wchar_t *needle = L"def";
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack + 3);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedleDirectlyBeforeNullTerminator) {
+ const wchar_t *haystack = L"abcdefghi";
+ const wchar_t *needle = L"ghi";
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack + 6);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedlePastNullTerminator) {
+ const wchar_t haystack[5] = {L'1', L'2', L'\0', L'3', L'4'};
+ // Shouldn't find anything after the null terminator.
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, /*needle=*/L"3"), nullptr);
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, /*needle=*/L"4"), nullptr);
+}
+
+TEST(LlvmLibcWCSStrTest, PartialNeedle) {
+ const wchar_t *haystack = L"la_ap_lap";
+ const wchar_t *needle = L"lap";
+ // Shouldn't find la or ap.
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack + 6);
+}
+
+TEST(LlvmLibcWCSStrTest, MisspelledNeedle) {
+ const wchar_t *haystack = L"atalloftwocities...wait, tale";
+ const wchar_t *needle = L"tale";
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack + 25);
+}
+
+TEST(LlvmLibcWCSStrTest, AnagramNeedle) {
+ const wchar_t *haystack = L"dgo_ogd_god_odg_gdo_dog";
+ const wchar_t *needle = L"dog";
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack + 20);
+}
+
+TEST(LlvmLibcWCSStrTest, MorphedNeedle) {
+ // Changes a single letter in the needle to mismatch with the haystack.
+ const wchar_t *haystack = L"once upon a time";
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"time"), haystack + 12);
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"lime"), nullptr);
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"tome"), nullptr);
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"tire"), nullptr);
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"timo"), nullptr);
+}
>From 85f23172eb5341e97e3fced927fe26374b8557be Mon Sep 17 00:00:00 2001
From: Sriya Pratipati <sriyap at google.com>
Date: Mon, 2 Jun 2025 20:15:43 +0000
Subject: [PATCH 2/2] Added comments for clarity
---
libc/src/wchar/wcsstr.cpp | 3 +++
1 file changed, 3 insertions(+)
diff --git a/libc/src/wchar/wcsstr.cpp b/libc/src/wchar/wcsstr.cpp
index 312352a3ad5a8..ef91ba1e14eb4 100644
--- a/libc/src/wchar/wcsstr.cpp
+++ b/libc/src/wchar/wcsstr.cpp
@@ -26,10 +26,13 @@ LLVM_LIBC_FUNCTION(const wchar_t *, wcsstr,
// If string to be found has length longer than s1, return nullptr.
if (s2_len > s1_len)
return nullptr;
+ // Checking for a match
for (size_t i = 0; i <= (s1_len - s2_len); ++i) {
size_t j = 0;
+ // j will increment until the characters don't match or end of string.
for (; j < s2_len && s1[i + j] == s2[j]; ++j)
;
+ // Checking to see if we reached end of string.
if (j == s2_len)
return (s1 + i);
}
More information about the libc-commits
mailing list