[libc-commits] [libc] [libc] Implemented wcspbrk (PR #142040)

Uzair Nawaz via libc-commits libc-commits at lists.llvm.org
Fri May 30 11:27:02 PDT 2025


https://github.com/uzairnawaz updated https://github.com/llvm/llvm-project/pull/142040

>From 7dd7eda3bc23d97a05dcf0e1accb21812ed3b2c2 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Thu, 29 May 2025 21:11:52 +0000
Subject: [PATCH 1/4] implemented wcspbrk; added tests

---
 libc/config/linux/x86_64/entrypoints.txt |  1 +
 libc/include/wchar.yaml                  |  7 ++
 libc/src/wchar/CMakeLists.txt            | 11 ++++
 libc/src/wchar/wcspbrk.cpp               | 27 ++++++++
 libc/src/wchar/wcspbrk.h                 | 21 ++++++
 libc/test/src/wchar/CMakeLists.txt       | 10 +++
 libc/test/src/wchar/wcspbrk_test.cpp     | 83 ++++++++++++++++++++++++
 7 files changed, 160 insertions(+)
 create mode 100644 libc/src/wchar/wcspbrk.cpp
 create mode 100644 libc/src/wchar/wcspbrk.h
 create mode 100644 libc/test/src/wchar/wcspbrk_test.cpp

diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 52e746e32a1cd..593117f94b64e 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -365,6 +365,7 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.wchar.wcslen
     libc.src.wchar.wctob
     libc.src.wchar.wcschr
+    libc.src.wchar.wcspbrk
 
     # sys/uio.h entrypoints
     libc.src.sys.uio.writev
diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index 987bdc0b806dc..52ebc6804d043 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -34,3 +34,10 @@ functions:
     arguments: 
       - type: const wchar_t *
       - type: wchar_t
+  - name: wcspbrk
+    standards:
+      - stdc
+    return_type: const wchar_t *
+    arguments:
+      - type: const wchar_t *
+      - type: const wchar_t *
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index 41034cab16d4d..4d923c5f14474 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -44,3 +44,14 @@ add_entrypoint_object(
     libc.hdr.wchar_macros
     libc.src.__support.wctype_utils
 )
+
+add_entrypoint_object(
+  wcspbrk
+  SRCS
+    wcspbrk.cpp
+  HDRS
+    wcspbrk.h
+  DEPENDS
+    libc.hdr.wchar_macros
+    libc.src.__support.wctype_utils
+)
diff --git a/libc/src/wchar/wcspbrk.cpp b/libc/src/wchar/wcspbrk.cpp
new file mode 100644
index 0000000000000..e92be0853a5c0
--- /dev/null
+++ b/libc/src/wchar/wcspbrk.cpp
@@ -0,0 +1,27 @@
+//===-- Implementation of wcspbrk -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcspbrk.h"
+
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(const wchar_t *, wcspbrk,
+                   (const wchar_t *src, const wchar_t *breakset)) {
+  // currently O(n * m), can be further optimized to O(n + m) with a hash set
+  for (int src_idx = 0; src[src_idx] != 0; src_idx++)
+    for (int breakset_idx = 0; breakset[breakset_idx] != 0; breakset_idx++)
+      if (src[src_idx] == breakset[breakset_idx])
+        return src + src_idx;
+
+  return nullptr;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcspbrk.h b/libc/src/wchar/wcspbrk.h
new file mode 100644
index 0000000000000..531651b0b723a
--- /dev/null
+++ b/libc/src/wchar/wcspbrk.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for wcspbrk ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_WCSPBRK_H
+#define LLVM_LIBC_SRC_WCHAR_WCSPBRK_H
+
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+const wchar_t *wcspbrk(const wchar_t *src, const wchar_t *breakset);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_WCSPBRK_H
diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index 7d64dfeb13b6e..0e3d5c618ae1a 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -42,3 +42,13 @@ add_libc_test(
   DEPENDS
     libc.src.wchar.wcschr
 )
+
+add_libc_test(
+  wcspbrk_test
+  SUITE
+    libc_wchar_unittests
+  SRCS
+    wcspbrk_test.cpp
+  DEPENDS
+    libc.src.wchar.wcspbrk
+)
diff --git a/libc/test/src/wchar/wcspbrk_test.cpp b/libc/test/src/wchar/wcspbrk_test.cpp
new file mode 100644
index 0000000000000..ac7ac473542d9
--- /dev/null
+++ b/libc/test/src/wchar/wcspbrk_test.cpp
@@ -0,0 +1,83 @@
+//===-- Unittests for wcspbrk
+//----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/types/wchar_t.h"
+#include "src/wchar/wcspbrk.h"
+#include "test/UnitTest/Test.h"
+
+TEST(LlvmLibcWCSPBrkTest, EmptyStringShouldReturnNullptr) {
+  // The search should not include the null terminator.
+  EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(L"", L""), nullptr);
+  EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(L"_", L""), nullptr);
+  EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(L"", L"_"), nullptr);
+}
+
+TEST(LlvmLibcWCSPBrkTest, ShouldNotFindAnythingAfterNullTerminator) {
+  const wchar_t src[4] = {'a', 'b', '\0', 'c'};
+  EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"c"), nullptr);
+}
+
+TEST(LlvmLibcWCSPBrkTest, ShouldReturnNullptrIfNoCharactersFound) {
+  EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(L"12345", L"abcdef"), nullptr);
+}
+
+TEST(LlvmLibcWCSPBrkTest, FindsFirstCharacter) {
+  const wchar_t *src = L"12345";
+  EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"1"), src);
+  EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"-1"), src);
+  EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"1_"), src);
+  EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"f1_"), src);
+
+  EXPECT_TRUE(src[0] == L'1');
+  EXPECT_TRUE(src[1] == L'2');
+  EXPECT_TRUE(src[2] == L'3');
+  EXPECT_TRUE(src[3] == L'4');
+  EXPECT_TRUE(src[4] == L'5');
+  EXPECT_TRUE(src[5] == 0);
+}
+
+TEST(LlvmLibcWCSPBrkTest, FindsMiddleCharacter) {
+  const wchar_t *src = L"12345";
+  EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"3"), src + 2);
+  EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"?3"), src + 2);
+  EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"3F"), src + 2);
+  EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"z3_"), src + 2);
+
+  EXPECT_TRUE(src[0] == L'1');
+  EXPECT_TRUE(src[1] == L'2');
+  EXPECT_TRUE(src[2] == L'3');
+  EXPECT_TRUE(src[3] == L'4');
+  EXPECT_TRUE(src[4] == L'5');
+  EXPECT_TRUE(src[5] == 0);
+}
+
+TEST(LlvmLibcWCSPBrkTest, FindsLastCharacter) {
+  const wchar_t *src = L"12345";
+  EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"5"), src + 4);
+  EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"r5"), src + 4);
+  EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"59"), src + 4);
+  EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"n5_"), src + 4);
+
+  EXPECT_TRUE(src[0] == L'1');
+  EXPECT_TRUE(src[1] == L'2');
+  EXPECT_TRUE(src[2] == L'3');
+  EXPECT_TRUE(src[3] == L'4');
+  EXPECT_TRUE(src[4] == L'5');
+  EXPECT_TRUE(src[5] == 0);
+}
+
+TEST(LlvmLibcWCSPBrkTest, FindsFirstOfRepeated) {
+  const wchar_t *src = L"A,B,C,D";
+  EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L","), src + 1);
+}
+
+TEST(LlvmLibcWCSPBrkTest, FindsFirstInBreakset) {
+  const wchar_t *src = L"12345";
+  EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"34"), src + 2);
+}

>From 379e86ee9384c63c95c091df657e7db33e602808 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Thu, 29 May 2025 23:07:40 +0000
Subject: [PATCH 2/4] updated tests

---
 libc/test/src/wchar/wcspbrk_test.cpp | 22 +---------------------
 1 file changed, 1 insertion(+), 21 deletions(-)

diff --git a/libc/test/src/wchar/wcspbrk_test.cpp b/libc/test/src/wchar/wcspbrk_test.cpp
index ac7ac473542d9..af78624027605 100644
--- a/libc/test/src/wchar/wcspbrk_test.cpp
+++ b/libc/test/src/wchar/wcspbrk_test.cpp
@@ -33,13 +33,6 @@ TEST(LlvmLibcWCSPBrkTest, FindsFirstCharacter) {
   EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"-1"), src);
   EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"1_"), src);
   EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"f1_"), src);
-
-  EXPECT_TRUE(src[0] == L'1');
-  EXPECT_TRUE(src[1] == L'2');
-  EXPECT_TRUE(src[2] == L'3');
-  EXPECT_TRUE(src[3] == L'4');
-  EXPECT_TRUE(src[4] == L'5');
-  EXPECT_TRUE(src[5] == 0);
 }
 
 TEST(LlvmLibcWCSPBrkTest, FindsMiddleCharacter) {
@@ -48,13 +41,6 @@ TEST(LlvmLibcWCSPBrkTest, FindsMiddleCharacter) {
   EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"?3"), src + 2);
   EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"3F"), src + 2);
   EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"z3_"), src + 2);
-
-  EXPECT_TRUE(src[0] == L'1');
-  EXPECT_TRUE(src[1] == L'2');
-  EXPECT_TRUE(src[2] == L'3');
-  EXPECT_TRUE(src[3] == L'4');
-  EXPECT_TRUE(src[4] == L'5');
-  EXPECT_TRUE(src[5] == 0);
 }
 
 TEST(LlvmLibcWCSPBrkTest, FindsLastCharacter) {
@@ -63,13 +49,6 @@ TEST(LlvmLibcWCSPBrkTest, FindsLastCharacter) {
   EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"r5"), src + 4);
   EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"59"), src + 4);
   EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"n5_"), src + 4);
-
-  EXPECT_TRUE(src[0] == L'1');
-  EXPECT_TRUE(src[1] == L'2');
-  EXPECT_TRUE(src[2] == L'3');
-  EXPECT_TRUE(src[3] == L'4');
-  EXPECT_TRUE(src[4] == L'5');
-  EXPECT_TRUE(src[5] == 0);
 }
 
 TEST(LlvmLibcWCSPBrkTest, FindsFirstOfRepeated) {
@@ -80,4 +59,5 @@ TEST(LlvmLibcWCSPBrkTest, FindsFirstOfRepeated) {
 TEST(LlvmLibcWCSPBrkTest, FindsFirstInBreakset) {
   const wchar_t *src = L"12345";
   EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"34"), src + 2);
+  EXPECT_EQ(LIBC_NAMESPACE::wcspbrk(src, L"43"), src + 2);
 }

>From d7870088b93c7462c3b35523f5c4af3e2a4dcf1c Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Fri, 30 May 2025 18:08:09 +0000
Subject: [PATCH 3/4] created helper function for test; fixed formatting

---
 libc/src/wchar/wcspbrk.cpp           | 13 ++++++++++---
 libc/test/src/wchar/wcspbrk_test.cpp |  3 +--
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/libc/src/wchar/wcspbrk.cpp b/libc/src/wchar/wcspbrk.cpp
index e92be0853a5c0..65947dd199e57 100644
--- a/libc/src/wchar/wcspbrk.cpp
+++ b/libc/src/wchar/wcspbrk.cpp
@@ -13,13 +13,20 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
+bool contains_char(const wchar_t *str, wchar_t target) {
+  for (; *str != 0; str++)
+    if (*str == target)
+      return true;
+
+  return false;
+}
+
 LLVM_LIBC_FUNCTION(const wchar_t *, wcspbrk,
                    (const wchar_t *src, const wchar_t *breakset)) {
   // currently O(n * m), can be further optimized to O(n + m) with a hash set
   for (int src_idx = 0; src[src_idx] != 0; src_idx++)
-    for (int breakset_idx = 0; breakset[breakset_idx] != 0; breakset_idx++)
-      if (src[src_idx] == breakset[breakset_idx])
-        return src + src_idx;
+    if (contains_char(breakset, src[src_idx]))
+      return src + src_idx;
 
   return nullptr;
 }
diff --git a/libc/test/src/wchar/wcspbrk_test.cpp b/libc/test/src/wchar/wcspbrk_test.cpp
index af78624027605..f7754c0b324e9 100644
--- a/libc/test/src/wchar/wcspbrk_test.cpp
+++ b/libc/test/src/wchar/wcspbrk_test.cpp
@@ -1,5 +1,4 @@
-//===-- Unittests for wcspbrk
-//----------------------------------------------===//
+//===-- Unittests for wcspbrk ---------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.

>From 85dec8595d1bf31a1b81557b93a1ea8d484a4d3a Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Fri, 30 May 2025 18:26:19 +0000
Subject: [PATCH 4/4] fixed loop condition in contains_char

---
 libc/src/wchar/wcspbrk.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libc/src/wchar/wcspbrk.cpp b/libc/src/wchar/wcspbrk.cpp
index 65947dd199e57..bf305a5dbe125 100644
--- a/libc/src/wchar/wcspbrk.cpp
+++ b/libc/src/wchar/wcspbrk.cpp
@@ -14,7 +14,7 @@
 namespace LIBC_NAMESPACE_DECL {
 
 bool contains_char(const wchar_t *str, wchar_t target) {
-  for (; *str != 0; str++)
+  for (; *str != L'\0'; str++)
     if (*str == target)
       return true;
 



More information about the libc-commits mailing list