[libc-commits] [libc] [libc] wcsstr implementation (PR #142440)

via libc-commits libc-commits at lists.llvm.org
Mon Jun 2 13:31:48 PDT 2025


https://github.com/sribee8 updated https://github.com/llvm/llvm-project/pull/142440

>From f01229cd4b231165d641ecf0579b3db77f9f9cfc Mon Sep 17 00:00:00 2001
From: Sriya Pratipati <sriyap at google.com>
Date: Mon, 2 Jun 2025 17:29:28 +0000
Subject: [PATCH 1/3] [libc] wcsstr implementation

Implemented wcsstr and tests.
---
 libc/config/linux/x86_64/entrypoints.txt |   1 +
 libc/include/wchar.yaml                  |   7 ++
 libc/src/wchar/CMakeLists.txt            |  12 +++
 libc/src/wchar/wcsstr.cpp                |  39 ++++++++
 libc/src/wchar/wcsstr.h                  |  21 +++++
 libc/test/src/wchar/CMakeLists.txt       |  10 ++
 libc/test/src/wchar/wcsstr_test.cpp      | 113 +++++++++++++++++++++++
 7 files changed, 203 insertions(+)
 create mode 100644 libc/src/wchar/wcsstr.cpp
 create mode 100644 libc/src/wchar/wcsstr.h
 create mode 100644 libc/test/src/wchar/wcsstr_test.cpp

diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 545b9227349fe..d0ca7f2f7f39f 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -370,6 +370,7 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.wchar.wcsspn
     libc.src.wchar.wmemcmp
     libc.src.wchar.wmemcpy
+    libc.src.wchar.wcsstr
 
     # sys/uio.h entrypoints
     libc.src.sys.uio.writev
diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index bfd9a10342019..d580de74cae41 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -72,3 +72,10 @@ functions:
       - type: __restrict wchar_t *
       - type: const __restrict wchar_t *
       - type: size_t
+  - name: wcsstr
+    standards:
+      - stdc
+    return_type: const wchar_t *
+    arguments: 
+      - type: const wchar_t *
+      - type: const wchar_t *
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index 9db121762348b..cd5c34dce7acc 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -104,3 +104,15 @@ add_entrypoint_object(
     libc.hdr.wchar_macros
     libc.src.__support.wctype_utils
 )
+
+add_entrypoint_object(
+  wcsstr
+  SRCS
+    wcsstr.cpp
+  HDRS
+    wcsstr.h
+  DEPENDS
+    libc.hdr.types.size_t
+    libc.hdr.wchar_macros
+    libc.src.string.string_utils
+)
diff --git a/libc/src/wchar/wcsstr.cpp b/libc/src/wchar/wcsstr.cpp
new file mode 100644
index 0000000000000..312352a3ad5a8
--- /dev/null
+++ b/libc/src/wchar/wcsstr.cpp
@@ -0,0 +1,39 @@
+//===-- Implementation of wcsstr ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcsstr.h"
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/string/string_utils.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(const wchar_t *, wcsstr,
+                   (const wchar_t *s1, const wchar_t *s2)) {
+  size_t s1_len = internal::string_length(s1);
+  size_t s2_len = internal::string_length(s2);
+  // If string to be found has length 0, return s1.
+  if (s2_len == 0)
+    return s1;
+  // If string to be found has length longer than s1, return nullptr.
+  if (s2_len > s1_len)
+    return nullptr;
+  for (size_t i = 0; i <= (s1_len - s2_len); ++i) {
+    size_t j = 0;
+    for (; j < s2_len && s1[i + j] == s2[j]; ++j)
+      ;
+    if (j == s2_len)
+      return (s1 + i);
+  }
+  return nullptr;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcsstr.h b/libc/src/wchar/wcsstr.h
new file mode 100644
index 0000000000000..af054d8495a4b
--- /dev/null
+++ b/libc/src/wchar/wcsstr.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for wcsstr ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_WCSSTR_H
+#define LLVM_LIBC_SRC_WCHAR_WCSSTR_H
+
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+const wchar_t *wcsstr(const wchar_t *s1, const wchar_t *s2);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_WCSSTR_H
diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index 9bc230e0bddf3..100be8226fa61 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -94,3 +94,13 @@ add_libc_test(
   DEPENDS
     libc.src.wchar.wmemcpy
 )
+
+add_libc_test(
+  wcsstr_test
+  SUITE
+    libc_wchar_unittests
+  SRCS
+    wcsstr_test.cpp
+  DEPENDS
+    libc.src.wchar.wcsstr
+)
diff --git a/libc/test/src/wchar/wcsstr_test.cpp b/libc/test/src/wchar/wcsstr_test.cpp
new file mode 100644
index 0000000000000..c1448bbaf1abf
--- /dev/null
+++ b/libc/test/src/wchar/wcsstr_test.cpp
@@ -0,0 +1,113 @@
+//===-- Unittests for wcsstr ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/types/wchar_t.h"
+#include "src/wchar/wcsstr.h"
+#include "test/UnitTest/Test.h"
+
+TEST(LlvmLibcWCSStrTest, NeedleNotInHaystack) {
+  // Should return nullptr if string is not found.
+  const wchar_t *haystack = L"12345";
+  const wchar_t *needle = L"a";
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), nullptr);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedleIsEmptyString) {
+  // Should return pointer to first character if needle is empty.
+  const wchar_t *haystack = L"12345";
+  const wchar_t *needle = L"";
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack);
+}
+
+TEST(LlvmLibcWCSStrTest, HaystackIsEmptyString) {
+  // Should return nullptr since haystack is empty.
+  const wchar_t *needle = L"12345";
+  const wchar_t *haystack = L"";
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), nullptr);
+}
+
+TEST(LlvmLibcWCSStrTest, HaystackAndNeedleAreEmptyStrings) {
+  // Should point to haystack since needle is empty.
+  const wchar_t *needle = L"";
+  const wchar_t *haystack = L"";
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack);
+}
+
+TEST(LlvmLibcWCSStrTest, HaystackAndNeedleAreSingleCharacters) {
+  const wchar_t *haystack = L"a";
+  // Should point to haystack.
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"a"), haystack);
+  // Should return nullptr.
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"b"), nullptr);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedleEqualToHaystack) {
+  const wchar_t *haystack = L"12345";
+  // Should point to haystack.
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"12345"), haystack);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedleLargerThanHaystack) {
+  const wchar_t *haystack = L"123";
+  // Should return nullptr.
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"12345"), nullptr);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedleAtBeginning) {
+  const wchar_t *haystack = L"12345";
+  const wchar_t *needle = L"12";
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedleInMiddle) {
+  const wchar_t *haystack = L"abcdefghi";
+  const wchar_t *needle = L"def";
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack + 3);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedleDirectlyBeforeNullTerminator) {
+  const wchar_t *haystack = L"abcdefghi";
+  const wchar_t *needle = L"ghi";
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack + 6);
+}
+
+TEST(LlvmLibcWCSStrTest, NeedlePastNullTerminator) {
+  const wchar_t haystack[5] = {L'1', L'2', L'\0', L'3', L'4'};
+  // Shouldn't find anything after the null terminator.
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, /*needle=*/L"3"), nullptr);
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, /*needle=*/L"4"), nullptr);
+}
+
+TEST(LlvmLibcWCSStrTest, PartialNeedle) {
+  const wchar_t *haystack = L"la_ap_lap";
+  const wchar_t *needle = L"lap";
+  // Shouldn't find la or ap.
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack + 6);
+}
+
+TEST(LlvmLibcWCSStrTest, MisspelledNeedle) {
+  const wchar_t *haystack = L"atalloftwocities...wait, tale";
+  const wchar_t *needle = L"tale";
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack + 25);
+}
+
+TEST(LlvmLibcWCSStrTest, AnagramNeedle) {
+  const wchar_t *haystack = L"dgo_ogd_god_odg_gdo_dog";
+  const wchar_t *needle = L"dog";
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, needle), haystack + 20);
+}
+
+TEST(LlvmLibcWCSStrTest, MorphedNeedle) {
+  // Changes a single letter in the needle to mismatch with the haystack.
+  const wchar_t *haystack = L"once upon a time";
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"time"), haystack + 12);
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"lime"), nullptr);
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"tome"), nullptr);
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"tire"), nullptr);
+  ASSERT_EQ(LIBC_NAMESPACE::wcsstr(haystack, L"timo"), nullptr);
+}

>From 85f23172eb5341e97e3fced927fe26374b8557be Mon Sep 17 00:00:00 2001
From: Sriya Pratipati <sriyap at google.com>
Date: Mon, 2 Jun 2025 20:15:43 +0000
Subject: [PATCH 2/3] Added comments for clarity

---
 libc/src/wchar/wcsstr.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/libc/src/wchar/wcsstr.cpp b/libc/src/wchar/wcsstr.cpp
index 312352a3ad5a8..ef91ba1e14eb4 100644
--- a/libc/src/wchar/wcsstr.cpp
+++ b/libc/src/wchar/wcsstr.cpp
@@ -26,10 +26,13 @@ LLVM_LIBC_FUNCTION(const wchar_t *, wcsstr,
   // If string to be found has length longer than s1, return nullptr.
   if (s2_len > s1_len)
     return nullptr;
+  // Checking for a match
   for (size_t i = 0; i <= (s1_len - s2_len); ++i) {
     size_t j = 0;
+    // j will increment until the characters don't match or end of string.
     for (; j < s2_len && s1[i + j] == s2[j]; ++j)
       ;
+    // Checking to see if we reached end of string.
     if (j == s2_len)
       return (s1 + i);
   }

>From 4d41c070428845b27a2a165335ff8be98c12887d Mon Sep 17 00:00:00 2001
From: Sriya Pratipati <sriyap at google.com>
Date: Mon, 2 Jun 2025 20:31:30 +0000
Subject: [PATCH 3/3] Removed unnecessary comments

---
 libc/src/wchar/wcsstr.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/libc/src/wchar/wcsstr.cpp b/libc/src/wchar/wcsstr.cpp
index ef91ba1e14eb4..961835ae39659 100644
--- a/libc/src/wchar/wcsstr.cpp
+++ b/libc/src/wchar/wcsstr.cpp
@@ -20,19 +20,15 @@ LLVM_LIBC_FUNCTION(const wchar_t *, wcsstr,
                    (const wchar_t *s1, const wchar_t *s2)) {
   size_t s1_len = internal::string_length(s1);
   size_t s2_len = internal::string_length(s2);
-  // If string to be found has length 0, return s1.
   if (s2_len == 0)
     return s1;
-  // If string to be found has length longer than s1, return nullptr.
   if (s2_len > s1_len)
     return nullptr;
-  // Checking for a match
   for (size_t i = 0; i <= (s1_len - s2_len); ++i) {
     size_t j = 0;
     // j will increment until the characters don't match or end of string.
     for (; j < s2_len && s1[i + j] == s2[j]; ++j)
       ;
-    // Checking to see if we reached end of string.
     if (j == s2_len)
       return (s1 + i);
   }



More information about the libc-commits mailing list