[libc-commits] [libc] [libc] wcsstr implementation (PR #142440)
via libc-commits
libc-commits at lists.llvm.org
Mon Jun 2 10:30:38 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-libc
Author: None (sribee8)
<details>
<summary>Changes</summary>
Implemented wcsstr and tests.
---
Full diff: https://github.com/llvm/llvm-project/pull/142440.diff
7 Files Affected:
- (modified) libc/config/linux/x86_64/entrypoints.txt (+1)
- (modified) libc/include/wchar.yaml (+7)
- (modified) libc/src/wchar/CMakeLists.txt (+12)
- (added) libc/src/wchar/wcsstr.cpp (+39)
- (added) libc/src/wchar/wcsstr.h (+21)
- (modified) libc/test/src/wchar/CMakeLists.txt (+10)
- (added) libc/test/src/wchar/wcsstr_test.cpp (+113)
``````````diff
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 545b9227349fe..d0ca7f2f7f39f 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -370,6 +370,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.wchar.wcsspn
libc.src.wchar.wmemcmp
libc.src.wchar.wmemcpy
+ libc.src.wchar.wcsstr
# sys/uio.h entrypoints
libc.src.sys.uio.writev
diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index bfd9a10342019..d580de74cae41 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -72,3 +72,10 @@ functions:
- type: __restrict wchar_t *
- type: const __restrict wchar_t *
- type: size_t
+ - name: wcsstr
+ standards:
+ - stdc
+ return_type: const wchar_t *
+ arguments:
+ - type: const wchar_t *
+ - type: const wchar_t *
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index 9db121762348b..cd5c34dce7acc 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -104,3 +104,15 @@ add_entrypoint_object(
libc.hdr.wchar_macros
libc.src.__support.wctype_utils
)
+
+add_entrypoint_object(
+ wcsstr
+ SRCS
+ wcsstr.cpp
+ HDRS
+ wcsstr.h
+ DEPENDS
+ libc.hdr.types.size_t
+ libc.hdr.wchar_macros
+ libc.src.string.string_utils
+)
diff --git a/libc/src/wchar/wcsstr.cpp b/libc/src/wchar/wcsstr.cpp
new file mode 100644
index 0000000000000..312352a3ad5a8
--- /dev/null
+++ b/libc/src/wchar/wcsstr.cpp
@@ -0,0 +1,39 @@
+//===-- Implementation of wcsstr ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcsstr.h"
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/string/string_utils.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(const wchar_t *, wcsstr,
+ (const wchar_t *s1, const wchar_t *s2)) {
+ size_t s1_len = internal::string_length(s1);
+ size_t s2_len = internal::string_length(s2);
+ // If string to be found has length 0, return s1.
+ if (s2_len == 0)
+ return s1;
+ // If string to be found has length longer than s1, return nullptr.
+ if (s2_len > s1_len)
+ return nullptr;
+ for (size_t i = 0; i <= (s1_len - s2_len); ++i) {
+ size_t j = 0;
+ for (; j < s2_len && s1[i + j] == s2[j]; ++j)
+ ;
+ if (j == s2_len)
+ return (s1 + i);
+ }
+ return nullptr;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcsstr.h b/libc/src/wchar/wcsstr.h
new file mode 100644
index 0000000000000..af054d8495a4b
--- /dev/null
+++ b/libc/src/wchar/wcsstr.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for wcsstr ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_WCSSTR_H
+#define LLVM_LIBC_SRC_WCHAR_WCSSTR_H
+
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+const wchar_t *wcsstr(const wchar_t *s1, const wchar_t *s2);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_WCSSTR_H
diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index 9bc230e0bddf3..100be8226fa61 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -94,3 +94,13 @@ add_libc_test(
DEPENDS
libc.src.wchar.wmemcpy
)
+
+add_libc_test(
+ wcsstr_test
+ SUITE
+ libc_wchar_unittests
+ SRCS
+ wcsstr_test.cpp
+ DEPENDS
+ libc.src.wchar.wcsstr
+)
diff --git a/libc/test/src/wchar/wcsstr_test.cpp b/libc/test/src/wchar/wcsstr_test.cpp
new file mode 100644
index 0000000000000..c1448bbaf1abf
--- /dev/null
+++ b/libc/test/src/wchar/wcsstr_test.cpp
@@ -0,0 +1,113 @@
+//===-- Unittests for wcsstr ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/types/wchar_t.h"
+#include "src/wchar/wcsstr.h"
+#include "test/UnitTest/Test.h"
+
+TEST(LlvmLibcWCSStrTest, NeedleNotInHaystack) {
+ // Should return nullptr if string is not found.
+ const wchar_t *haystack = L"12345";
+ const wchar_t *needle = L"a";
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), nullptr);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedleIsEmptyString) {
+ // Should return pointer to first character if needle is empty.
+ const wchar_t *haystack = L"12345";
+ const wchar_t *needle = L"";
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack);
+}
+
+TEST(LlvmLibcWCSStrTest, HaystackIsEmptyString) {
+ // Should return nullptr since haystack is empty.
+ const wchar_t *needle = L"12345";
+ const wchar_t *haystack = L"";
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), nullptr);
+}
+
+TEST(LlvmLibcWCSStrTest, HaystackAndNeedleAreEmptyStrings) {
+ // Should point to haystack since needle is empty.
+ const wchar_t *needle = L"";
+ const wchar_t *haystack = L"";
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack);
+}
+
+TEST(LlvmLibcWCSStrTest, HaystackAndNeedleAreSingleCharacters) {
+ const wchar_t *haystack = L"a";
+ // Should point to haystack.
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"a"), haystack);
+ // Should return nullptr.
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"b"), nullptr);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedleEqualToHaystack) {
+ const wchar_t *haystack = L"12345";
+ // Should point to haystack.
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"12345"), haystack);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedleLargerThanHaystack) {
+ const wchar_t *haystack = L"123";
+ // Should return nullptr.
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"12345"), nullptr);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedleAtBeginning) {
+ const wchar_t *haystack = L"12345";
+ const wchar_t *needle = L"12";
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedleInMiddle) {
+ const wchar_t *haystack = L"abcdefghi";
+ const wchar_t *needle = L"def";
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack + 3);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedleDirectlyBeforeNullTerminator) {
+ const wchar_t *haystack = L"abcdefghi";
+ const wchar_t *needle = L"ghi";
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack + 6);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedlePastNullTerminator) {
+ const wchar_t haystack[5] = {L'1', L'2', L'\0', L'3', L'4'};
+ // Shouldn't find anything after the null terminator.
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, /*needle=*/L"3"), nullptr);
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, /*needle=*/L"4"), nullptr);
+}
+
+TEST(LlvmLibcWCSStrTest, PartialNeedle) {
+ const wchar_t *haystack = L"la_ap_lap";
+ const wchar_t *needle = L"lap";
+ // Shouldn't find la or ap.
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack + 6);
+}
+
+TEST(LlvmLibcWCSStrTest, MisspelledNeedle) {
+ const wchar_t *haystack = L"atalloftwocities...wait, tale";
+ const wchar_t *needle = L"tale";
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack + 25);
+}
+
+TEST(LlvmLibcWCSStrTest, AnagramNeedle) {
+ const wchar_t *haystack = L"dgo_ogd_god_odg_gdo_dog";
+ const wchar_t *needle = L"dog";
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack + 20);
+}
+
+TEST(LlvmLibcWCSStrTest, MorphedNeedle) {
+ // Changes a single letter in the needle to mismatch with the haystack.
+ const wchar_t *haystack = L"once upon a time";
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"time"), haystack + 12);
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"lime"), nullptr);
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"tome"), nullptr);
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"tire"), nullptr);
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"timo"), nullptr);
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/142440
More information about the libc-commits
mailing list