[libc-commits] [libc] [libc] Stop duplicating wcschr(). (PR #150661)

via libc-commits libc-commits at lists.llvm.org
Fri Jul 25 11:52:19 PDT 2025


https://github.com/enh-google updated https://github.com/llvm/llvm-project/pull/150661

>From 59207f0e108ab95a8987b06d0c453d02846d6eb2 Mon Sep 17 00:00:00 2001
From: enh-google <enh at google.com>
Date: Fri, 25 Jul 2025 13:16:35 -0400
Subject: [PATCH 1/5] [libc] Stop duplicating wcschr().

---
 libc/src/wchar/wchar_utils.h | 11 ++++-------
 libc/src/wchar/wcschr.cpp    |  7 ++-----
 libc/src/wchar/wcspbrk.cpp   | 10 +---------
 libc/src/wchar/wcstok.cpp    | 13 +++----------
 4 files changed, 10 insertions(+), 31 deletions(-)

diff --git a/libc/src/wchar/wchar_utils.h b/libc/src/wchar/wchar_utils.h
index e0218c7d89b1f..d69638fa71912 100644
--- a/libc/src/wchar/wchar_utils.h
+++ b/libc/src/wchar/wchar_utils.h
@@ -17,13 +17,10 @@
 namespace LIBC_NAMESPACE_DECL {
 namespace internal {
 
-// returns true if the character exists in the string
-LIBC_INLINE static bool wcschr(wchar_t c, const wchar_t *str) {
-  for (int n = 0; str[n]; ++n) {
-    if (str[n] == c)
-      return true;
-  }
-  return false;
+LIBC_INLINE static wchar_t *wcschr(const wchar_t *s, wchar_t c) {
+  for (; *s && *s != c; ++s)
+    ;
+  return (*s == c) ? s : nullptr;
 }
 
 // bool should be true for wcscspn for complimentary span
diff --git a/libc/src/wchar/wcschr.cpp b/libc/src/wchar/wcschr.cpp
index defc2ce3c3b72..e53ec9a4c95ba 100644
--- a/libc/src/wchar/wcschr.cpp
+++ b/libc/src/wchar/wcschr.cpp
@@ -11,15 +11,12 @@
 #include "hdr/types/wchar_t.h"
 #include "src/__support/common.h"
 #include "src/__support/macros/config.h"
+#include "wchar_utils.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(const wchar_t *, wcschr, (const wchar_t *s, wchar_t c)) {
-  for (; *s && *s != c; ++s)
-    ;
-  if (*s == c)
-    return s;
-  return nullptr;
+  return internal::wcschr(s, c);
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcspbrk.cpp b/libc/src/wchar/wcspbrk.cpp
index a00ba9979a489..9faa34fe5fd1a 100644
--- a/libc/src/wchar/wcspbrk.cpp
+++ b/libc/src/wchar/wcspbrk.cpp
@@ -14,14 +14,6 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
-bool contains_char(const wchar_t *str, wchar_t target) {
-  for (; *str != L'\0'; str++)
-    if (*str == target)
-      return true;
-
-  return false;
-}
-
 LLVM_LIBC_FUNCTION(const wchar_t *, wcspbrk,
                    (const wchar_t *src, const wchar_t *breakset)) {
   LIBC_CRASH_ON_NULLPTR(src);
@@ -29,7 +21,7 @@ LLVM_LIBC_FUNCTION(const wchar_t *, wcspbrk,
 
   // currently O(n * m), can be further optimized to O(n + m) with a hash set
   for (int src_idx = 0; src[src_idx] != 0; src_idx++)
-    if (contains_char(breakset, src[src_idx]))
+    if (internal::wcschr(breakset, src[src_idx]))
       return src + src_idx;
 
   return nullptr;
diff --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp
index 291efc15e158a..32a500bf302c7 100644
--- a/libc/src/wchar/wcstok.cpp
+++ b/libc/src/wchar/wcstok.cpp
@@ -13,15 +13,8 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
-bool isADelimeter(wchar_t wc, const wchar_t *delimiters) {
-  for (const wchar_t *delim_ptr = delimiters; *delim_ptr != L'\0'; ++delim_ptr)
-    if (wc == *delim_ptr)
-      return true;
-  return false;
-}
-
 LLVM_LIBC_FUNCTION(wchar_t *, wcstok,
-                   (wchar_t *__restrict str, const wchar_t *__restrict delim,
+                   (wchar_t *__restrict str, const wchar_t *__restrict delims,
                     wchar_t **__restrict context)) {
   if (str == nullptr) {
     if (*context == nullptr)
@@ -31,11 +24,11 @@ LLVM_LIBC_FUNCTION(wchar_t *, wcstok,
   }
 
   wchar_t *tok_start, *tok_end;
-  for (tok_start = str; *tok_start != L'\0' && isADelimeter(*tok_start, delim);
+  for (tok_start = str; *tok_start != L'\0' && wcschr(delims, *tok_start);
        ++tok_start)
     ;
 
-  for (tok_end = tok_start; *tok_end != L'\0' && !isADelimeter(*tok_end, delim);
+  for (tok_end = tok_start; *tok_end != L'\0' && !wcschr(delims, *tok_end);
        ++tok_end)
     ;
 

>From bd309b55530fab119f2a05df1dbade75f048b5f9 Mon Sep 17 00:00:00 2001
From: enh-google <enh at google.com>
Date: Fri, 25 Jul 2025 13:35:01 -0400
Subject: [PATCH 2/5] [libc] add missing includes and LIBC_CRASH_ON_NULLPTR().

---
 libc/src/wchar/wcschr.cpp  | 2 ++
 libc/src/wchar/wcspbrk.cpp | 1 +
 libc/src/wchar/wcstok.cpp  | 1 +
 3 files changed, 4 insertions(+)

diff --git a/libc/src/wchar/wcschr.cpp b/libc/src/wchar/wcschr.cpp
index e53ec9a4c95ba..8ac491602a65c 100644
--- a/libc/src/wchar/wcschr.cpp
+++ b/libc/src/wchar/wcschr.cpp
@@ -11,11 +11,13 @@
 #include "hdr/types/wchar_t.h"
 #include "src/__support/common.h"
 #include "src/__support/macros/config.h"
+#include "src/__support/macros/null_check.h"
 #include "wchar_utils.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(const wchar_t *, wcschr, (const wchar_t *s, wchar_t c)) {
+  LIBC_CRASH_ON_NULLPTR(s);
   return internal::wcschr(s, c);
 }
 
diff --git a/libc/src/wchar/wcspbrk.cpp b/libc/src/wchar/wcspbrk.cpp
index 9faa34fe5fd1a..5d86a494bdf39 100644
--- a/libc/src/wchar/wcspbrk.cpp
+++ b/libc/src/wchar/wcspbrk.cpp
@@ -11,6 +11,7 @@
 #include "hdr/types/wchar_t.h"
 #include "src/__support/common.h"
 #include "src/__support/macros/null_check.h"
+#include "wchar_utils.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
diff --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp
index 32a500bf302c7..472df33558e86 100644
--- a/libc/src/wchar/wcstok.cpp
+++ b/libc/src/wchar/wcstok.cpp
@@ -10,6 +10,7 @@
 
 #include "hdr/types/wchar_t.h"
 #include "src/__support/common.h"
+#include "wchar_utils.h"
 
 namespace LIBC_NAMESPACE_DECL {
 

>From b0492ab2b7f360ac9dd97140dc12d1904031d7ce Mon Sep 17 00:00:00 2001
From: enh-google <enh at google.com>
Date: Fri, 25 Jul 2025 13:49:28 -0400
Subject: [PATCH 3/5] [libc] Fix internal::wcschr() return type.

The exported function is using a similar white lie for convenience, so the internal function may as well rather than adding const casts.
---
 libc/src/wchar/wchar_utils.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libc/src/wchar/wchar_utils.h b/libc/src/wchar/wchar_utils.h
index d69638fa71912..55a3cee99190f 100644
--- a/libc/src/wchar/wchar_utils.h
+++ b/libc/src/wchar/wchar_utils.h
@@ -17,7 +17,7 @@
 namespace LIBC_NAMESPACE_DECL {
 namespace internal {
 
-LIBC_INLINE static wchar_t *wcschr(const wchar_t *s, wchar_t c) {
+LIBC_INLINE static const wchar_t *wcschr(const wchar_t *s, wchar_t c) {
   for (; *s && *s != c; ++s)
     ;
   return (*s == c) ? s : nullptr;
@@ -29,7 +29,7 @@ LIBC_INLINE static size_t wcsspn(const wchar_t *s1, const wchar_t *s2,
                                  bool not_match_set) {
   size_t i = 0;
   for (; s1[i]; ++i) {
-    bool in_set = wcschr(s1[i], s2);
+    bool in_set = internal::wcschr(s2, s1[i]);
     if (in_set == not_match_set)
       return i;
   }

>From 3adedaeee4f510390c7256945c46446c60950f61 Mon Sep 17 00:00:00 2001
From: enh-google <enh at google.com>
Date: Fri, 25 Jul 2025 14:22:11 -0400
Subject: [PATCH 4/5] [libc] wcstok(): explicitly say internal::wcschr().

---
 libc/src/wchar/wcstok.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp
index 472df33558e86..bf680dbf877ee 100644
--- a/libc/src/wchar/wcstok.cpp
+++ b/libc/src/wchar/wcstok.cpp
@@ -25,11 +25,11 @@ LLVM_LIBC_FUNCTION(wchar_t *, wcstok,
   }
 
   wchar_t *tok_start, *tok_end;
-  for (tok_start = str; *tok_start != L'\0' && wcschr(delims, *tok_start);
+  for (tok_start = str; *tok_start != L'\0' && internal::wcschr(delims, *tok_start);
        ++tok_start)
     ;
 
-  for (tok_end = tok_start; *tok_end != L'\0' && !wcschr(delims, *tok_end);
+  for (tok_end = tok_start; *tok_end != L'\0' && !internal::wcschr(delims, *tok_end);
        ++tok_end)
     ;
 

>From f0698b3f55bbb1b10a1fd5875c570a0c4c5836fd Mon Sep 17 00:00:00 2001
From: enh-google <enh at google.com>
Date: Fri, 25 Jul 2025 14:52:10 -0400
Subject: [PATCH 5/5] [libc] fix wcstok() formatting.

---
 libc/src/wchar/wcstok.cpp | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp
index bf680dbf877ee..ed4f0aad08ea5 100644
--- a/libc/src/wchar/wcstok.cpp
+++ b/libc/src/wchar/wcstok.cpp
@@ -24,14 +24,13 @@ LLVM_LIBC_FUNCTION(wchar_t *, wcstok,
     str = *context;
   }
 
-  wchar_t *tok_start, *tok_end;
-  for (tok_start = str; *tok_start != L'\0' && internal::wcschr(delims, *tok_start);
-       ++tok_start)
-    ;
-
-  for (tok_end = tok_start; *tok_end != L'\0' && !internal::wcschr(delims, *tok_end);
-       ++tok_end)
-    ;
+  wchar_t *tok_start = str;
+  while (*tok_start != L'\0' && internal::wcschr(delims, *tok_start))
+    ++tok_start;
+
+  wchar_t *tok_end = tok_start;
+  while (*tok_end != L'\0' && !internal::wcschr(delims, *tok_end))
+    ++tok_end;
 
   if (*tok_end != L'\0') {
     *tok_end = L'\0';



More information about the libc-commits mailing list