[libc-commits] [libc] e3d1a33 - [libc] wcsstr implementation (#142440)

via libc-commits libc-commits at lists.llvm.org
Mon Jun 2 16:05:15 PDT 2025


Author: sribee8
Date: 2025-06-02T16:05:12-07:00
New Revision: e3d1a33b7ef6c0f0a27ae7cc5a0b4a2572a392c4

URL: https://github.com/llvm/llvm-project/commit/e3d1a33b7ef6c0f0a27ae7cc5a0b4a2572a392c4
DIFF: https://github.com/llvm/llvm-project/commit/e3d1a33b7ef6c0f0a27ae7cc5a0b4a2572a392c4.diff

LOG: [libc] wcsstr implementation (#142440)

Implemented wcsstr and tests.
fixes #124348

---------

Co-authored-by: Sriya Pratipati <sriyap at google.com>

Added: 
    libc/src/wchar/wcsstr.cpp
    libc/src/wchar/wcsstr.h
    libc/test/src/wchar/wcsstr_test.cpp

Modified: 
    libc/config/linux/x86_64/entrypoints.txt
    libc/include/wchar.yaml
    libc/src/wchar/CMakeLists.txt
    libc/test/src/wchar/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 43f03f6424177..c883cb6a908e7 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -373,6 +373,7 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.wchar.wmemcmp
     libc.src.wchar.wmempcpy
     libc.src.wchar.wmemcpy
+    libc.src.wchar.wcsstr
     libc.src.wchar.wcsncat
     libc.src.wchar.wcscpy
 

diff  --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index 92ae060d3dc72..2eaefbf8990f5 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -94,6 +94,13 @@ functions:
       - type: __restrict wchar_t *
       - type: const __restrict wchar_t *
       - type: size_t
+  - name: wcsstr
+    standards:
+      - stdc
+    return_type: const wchar_t *
+    arguments: 
+      - type: const wchar_t *
+      - type: const wchar_t *
   - name: wcsncat
     standards:
       - stdc

diff  --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index 0f5b7470ada8e..17b5500660054 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -126,6 +126,18 @@ add_entrypoint_object(
     libc.src.__support.wctype_utils
 )
 
+add_entrypoint_object(
+  wcsstr
+  SRCS
+    wcsstr.cpp
+  HDRS
+    wcsstr.h
+  DEPENDS
+    libc.hdr.types.size_t
+    libc.hdr.wchar_macros
+    libc.src.string.string_utils
+)
+
 add_entrypoint_object(
   wcsncat
   SRCS

diff  --git a/libc/src/wchar/wcsstr.cpp b/libc/src/wchar/wcsstr.cpp
new file mode 100644
index 0000000000000..961835ae39659
--- /dev/null
+++ b/libc/src/wchar/wcsstr.cpp
@@ -0,0 +1,38 @@
+//===-- Implementation of wcsstr ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcsstr.h"
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/string/string_utils.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(const wchar_t *, wcsstr,
+                   (const wchar_t *s1, const wchar_t *s2)) {
+  size_t s1_len = internal::string_length(s1);
+  size_t s2_len = internal::string_length(s2);
+  if (s2_len == 0)
+    return s1;
+  if (s2_len > s1_len)
+    return nullptr;
+  for (size_t i = 0; i <= (s1_len - s2_len); ++i) {
+    size_t j = 0;
+    // j will increment until the characters don't match or end of string.
+    for (; j < s2_len && s1[i + j] == s2[j]; ++j)
+      ;
+    if (j == s2_len)
+      return (s1 + i);
+  }
+  return nullptr;
+}
+
+} // namespace LIBC_NAMESPACE_DECL

diff  --git a/libc/src/wchar/wcsstr.h b/libc/src/wchar/wcsstr.h
new file mode 100644
index 0000000000000..af054d8495a4b
--- /dev/null
+++ b/libc/src/wchar/wcsstr.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for wcsstr ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_WCSSTR_H
+#define LLVM_LIBC_SRC_WCHAR_WCSSTR_H
+
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+const wchar_t *wcsstr(const wchar_t *s1, const wchar_t *s2);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_WCSSTR_H

diff  --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index fe0c2afa0491b..7743b0f025448 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -125,6 +125,16 @@ add_libc_test(
     libc.src.wchar.wmemcpy
 )
 
+add_libc_test(
+  wcsstr_test
+  SUITE
+    libc_wchar_unittests
+  SRCS
+    wcsstr_test.cpp
+  DEPENDS
+    libc.src.wchar.wcsstr
+)
+
 add_libc_test(
   wcsncat_test
   SUITE

diff  --git a/libc/test/src/wchar/wcsstr_test.cpp b/libc/test/src/wchar/wcsstr_test.cpp
new file mode 100644
index 0000000000000..c1448bbaf1abf
--- /dev/null
+++ b/libc/test/src/wchar/wcsstr_test.cpp
@@ -0,0 +1,113 @@
+//===-- Unittests for wcsstr ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/types/wchar_t.h"
+#include "src/wchar/wcsstr.h"
+#include "test/UnitTest/Test.h"
+
+TEST(LlvmLibcWCSStrTest, NeedleNotInHaystack) {
+  // Should return nullptr if string is not found.
+  const wchar_t *haystack = L"12345";
+  const wchar_t *needle = L"a";
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), nullptr);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedleIsEmptyString) {
+  // Should return pointer to first character if needle is empty.
+  const wchar_t *haystack = L"12345";
+  const wchar_t *needle = L"";
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack);
+}
+
+TEST(LlvmLibcWCSStrTest, HaystackIsEmptyString) {
+  // Should return nullptr since haystack is empty.
+  const wchar_t *needle = L"12345";
+  const wchar_t *haystack = L"";
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), nullptr);
+}
+
+TEST(LlvmLibcWCSStrTest, HaystackAndNeedleAreEmptyStrings) {
+  // Should point to haystack since needle is empty.
+  const wchar_t *needle = L"";
+  const wchar_t *haystack = L"";
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack);
+}
+
+TEST(LlvmLibcWCSStrTest, HaystackAndNeedleAreSingleCharacters) {
+  const wchar_t *haystack = L"a";
+  // Should point to haystack.
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"a"), haystack);
+  // Should return nullptr.
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"b"), nullptr);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedleEqualToHaystack) {
+  const wchar_t *haystack = L"12345";
+  // Should point to haystack.
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"12345"), haystack);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedleLargerThanHaystack) {
+  const wchar_t *haystack = L"123";
+  // Should return nullptr.
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"12345"), nullptr);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedleAtBeginning) {
+  const wchar_t *haystack = L"12345";
+  const wchar_t *needle = L"12";
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedleInMiddle) {
+  const wchar_t *haystack = L"abcdefghi";
+  const wchar_t *needle = L"def";
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack + 3);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedleDirectlyBeforeNullTerminator) {
+  const wchar_t *haystack = L"abcdefghi";
+  const wchar_t *needle = L"ghi";
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack + 6);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedlePastNullTerminator) {
+  const wchar_t haystack[5] = {L'1', L'2', L'\0', L'3', L'4'};
+  // Shouldn't find anything after the null terminator.
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, /*needle=*/L"3"), nullptr);
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, /*needle=*/L"4"), nullptr);
+}
+
+TEST(LlvmLibcWCSStrTest, PartialNeedle) {
+  const wchar_t *haystack = L"la_ap_lap";
+  const wchar_t *needle = L"lap";
+  // Shouldn't find la or ap.
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack + 6);
+}
+
+TEST(LlvmLibcWCSStrTest, MisspelledNeedle) {
+  const wchar_t *haystack = L"atalloftwocities...wait, tale";
+  const wchar_t *needle = L"tale";
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack + 25);
+}
+
+TEST(LlvmLibcWCSStrTest, AnagramNeedle) {
+  const wchar_t *haystack = L"dgo_ogd_god_odg_gdo_dog";
+  const wchar_t *needle = L"dog";
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack + 20);
+}
+
+TEST(LlvmLibcWCSStrTest, MorphedNeedle) {
+  // Changes a single letter in the needle to mismatch with the haystack.
+  const wchar_t *haystack = L"once upon a time";
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"time"), haystack + 12);
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"lime"), nullptr);
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"tome"), nullptr);
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"tire"), nullptr);
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"timo"), nullptr);
+}


        


More information about the libc-commits mailing list