[libc-commits] [libc] e3d1a33 - [libc] wcsstr implementation (#142440)
via libc-commits
libc-commits at lists.llvm.org
Mon Jun 2 16:05:15 PDT 2025
Author: sribee8
Date: 2025-06-02T16:05:12-07:00
New Revision: e3d1a33b7ef6c0f0a27ae7cc5a0b4a2572a392c4
URL: https://github.com/llvm/llvm-project/commit/e3d1a33b7ef6c0f0a27ae7cc5a0b4a2572a392c4
DIFF: https://github.com/llvm/llvm-project/commit/e3d1a33b7ef6c0f0a27ae7cc5a0b4a2572a392c4.diff
LOG: [libc] wcsstr implementation (#142440)
Implemented wcsstr and tests.
fixes #124348
---------
Co-authored-by: Sriya Pratipati <sriyap at google.com>
Added:
libc/src/wchar/wcsstr.cpp
libc/src/wchar/wcsstr.h
libc/test/src/wchar/wcsstr_test.cpp
Modified:
libc/config/linux/x86_64/entrypoints.txt
libc/include/wchar.yaml
libc/src/wchar/CMakeLists.txt
libc/test/src/wchar/CMakeLists.txt
Removed:
################################################################################
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 43f03f6424177..c883cb6a908e7 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -373,6 +373,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.wchar.wmemcmp
libc.src.wchar.wmempcpy
libc.src.wchar.wmemcpy
+ libc.src.wchar.wcsstr
libc.src.wchar.wcsncat
libc.src.wchar.wcscpy
diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index 92ae060d3dc72..2eaefbf8990f5 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -94,6 +94,13 @@ functions:
- type: __restrict wchar_t *
- type: const __restrict wchar_t *
- type: size_t
+ - name: wcsstr
+ standards:
+ - stdc
+ return_type: const wchar_t *
+ arguments:
+ - type: const wchar_t *
+ - type: const wchar_t *
- name: wcsncat
standards:
- stdc
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index 0f5b7470ada8e..17b5500660054 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -126,6 +126,18 @@ add_entrypoint_object(
libc.src.__support.wctype_utils
)
+add_entrypoint_object(
+ wcsstr
+ SRCS
+ wcsstr.cpp
+ HDRS
+ wcsstr.h
+ DEPENDS
+ libc.hdr.types.size_t
+ libc.hdr.wchar_macros
+ libc.src.string.string_utils
+)
+
add_entrypoint_object(
wcsncat
SRCS
diff --git a/libc/src/wchar/wcsstr.cpp b/libc/src/wchar/wcsstr.cpp
new file mode 100644
index 0000000000000..961835ae39659
--- /dev/null
+++ b/libc/src/wchar/wcsstr.cpp
@@ -0,0 +1,38 @@
+//===-- Implementation of wcsstr ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcsstr.h"
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/string/string_utils.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(const wchar_t *, wcsstr,
+ (const wchar_t *s1, const wchar_t *s2)) {
+ size_t s1_len = internal::string_length(s1);
+ size_t s2_len = internal::string_length(s2);
+ if (s2_len == 0)
+ return s1;
+ if (s2_len > s1_len)
+ return nullptr;
+ for (size_t i = 0; i <= (s1_len - s2_len); ++i) {
+ size_t j = 0;
+ // j will increment until the characters don't match or end of string.
+ for (; j < s2_len && s1[i + j] == s2[j]; ++j)
+ ;
+ if (j == s2_len)
+ return (s1 + i);
+ }
+ return nullptr;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcsstr.h b/libc/src/wchar/wcsstr.h
new file mode 100644
index 0000000000000..af054d8495a4b
--- /dev/null
+++ b/libc/src/wchar/wcsstr.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for wcsstr ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_WCSSTR_H
+#define LLVM_LIBC_SRC_WCHAR_WCSSTR_H
+
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+const wchar_t *wcsstr(const wchar_t *s1, const wchar_t *s2);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_WCSSTR_H
diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index fe0c2afa0491b..7743b0f025448 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -125,6 +125,16 @@ add_libc_test(
libc.src.wchar.wmemcpy
)
+add_libc_test(
+ wcsstr_test
+ SUITE
+ libc_wchar_unittests
+ SRCS
+ wcsstr_test.cpp
+ DEPENDS
+ libc.src.wchar.wcsstr
+)
+
add_libc_test(
wcsncat_test
SUITE
diff --git a/libc/test/src/wchar/wcsstr_test.cpp b/libc/test/src/wchar/wcsstr_test.cpp
new file mode 100644
index 0000000000000..c1448bbaf1abf
--- /dev/null
+++ b/libc/test/src/wchar/wcsstr_test.cpp
@@ -0,0 +1,113 @@
+//===-- Unittests for wcsstr ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/types/wchar_t.h"
+#include "src/wchar/wcsstr.h"
+#include "test/UnitTest/Test.h"
+
+TEST(LlvmLibcWCSStrTest, NeedleNotInHaystack) {
+ // Should return nullptr if string is not found.
+ const wchar_t *haystack = L"12345";
+ const wchar_t *needle = L"a";
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), nullptr);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedleIsEmptyString) {
+ // Should return pointer to first character if needle is empty.
+ const wchar_t *haystack = L"12345";
+ const wchar_t *needle = L"";
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack);
+}
+
+TEST(LlvmLibcWCSStrTest, HaystackIsEmptyString) {
+ // Should return nullptr since haystack is empty.
+ const wchar_t *needle = L"12345";
+ const wchar_t *haystack = L"";
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), nullptr);
+}
+
+TEST(LlvmLibcWCSStrTest, HaystackAndNeedleAreEmptyStrings) {
+ // Should point to haystack since needle is empty.
+ const wchar_t *needle = L"";
+ const wchar_t *haystack = L"";
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack);
+}
+
+TEST(LlvmLibcWCSStrTest, HaystackAndNeedleAreSingleCharacters) {
+ const wchar_t *haystack = L"a";
+ // Should point to haystack.
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"a"), haystack);
+ // Should return nullptr.
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"b"), nullptr);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedleEqualToHaystack) {
+ const wchar_t *haystack = L"12345";
+ // Should point to haystack.
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"12345"), haystack);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedleLargerThanHaystack) {
+ const wchar_t *haystack = L"123";
+ // Should return nullptr.
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"12345"), nullptr);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedleAtBeginning) {
+ const wchar_t *haystack = L"12345";
+ const wchar_t *needle = L"12";
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedleInMiddle) {
+ const wchar_t *haystack = L"abcdefghi";
+ const wchar_t *needle = L"def";
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack + 3);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedleDirectlyBeforeNullTerminator) {
+ const wchar_t *haystack = L"abcdefghi";
+ const wchar_t *needle = L"ghi";
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack + 6);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedlePastNullTerminator) {
+ const wchar_t haystack[5] = {L'1', L'2', L'\0', L'3', L'4'};
+ // Shouldn't find anything after the null terminator.
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, /*needle=*/L"3"), nullptr);
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, /*needle=*/L"4"), nullptr);
+}
+
+TEST(LlvmLibcWCSStrTest, PartialNeedle) {
+ const wchar_t *haystack = L"la_ap_lap";
+ const wchar_t *needle = L"lap";
+ // Shouldn't find la or ap.
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack + 6);
+}
+
+TEST(LlvmLibcWCSStrTest, MisspelledNeedle) {
+ const wchar_t *haystack = L"atalloftwocities...wait, tale";
+ const wchar_t *needle = L"tale";
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack + 25);
+}
+
+TEST(LlvmLibcWCSStrTest, AnagramNeedle) {
+ const wchar_t *haystack = L"dgo_ogd_god_odg_gdo_dog";
+ const wchar_t *needle = L"dog";
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack + 20);
+}
+
+TEST(LlvmLibcWCSStrTest, MorphedNeedle) {
+ // Changes a single letter in the needle to mismatch with the haystack.
+ const wchar_t *haystack = L"once upon a time";
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"time"), haystack + 12);
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"lime"), nullptr);
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"tome"), nullptr);
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"tire"), nullptr);
+ ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"timo"), nullptr);
+}
More information about the libc-commits
mailing list