[libc-commits] [libc] [libc] wcstok implementation (PR #145989)

Uzair Nawaz via libc-commits libc-commits at lists.llvm.org
Mon Jun 30 10:06:49 PDT 2025


https://github.com/uzairnawaz updated https://github.com/llvm/llvm-project/pull/145989

>From a484c4b859cc92c1912072a3e5167e708a90d093 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Wed, 25 Jun 2025 23:27:07 +0000
Subject: [PATCH 1/5] set up build files

---
 libc/config/linux/x86_64/entrypoints.txt |  1 +
 libc/include/wchar.yaml                  |  8 ++++++++
 libc/src/wchar/CMakeLists.txt            | 10 ++++++++++
 libc/src/wchar/wcstok.cpp                | 22 ++++++++++++++++++++++
 libc/src/wchar/wcstok.h                  | 22 ++++++++++++++++++++++
 5 files changed, 63 insertions(+)
 create mode 100644 libc/src/wchar/wcstok.cpp
 create mode 100644 libc/src/wchar/wcstok.h

diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 6b3fc9485ec1a..bf04ae2e83fb3 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -386,6 +386,7 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.wchar.wmemchr
     libc.src.wchar.wcpcpy
     libc.src.wchar.wcpncpy
+    libc.src.wchar.wcstok
 
     # sys/uio.h entrypoints
     libc.src.sys.uio.writev
diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index 397296894829d..15025f42c0723 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -189,6 +189,14 @@ functions:
     arguments:
       - type: wchar_t *__restrict
       - type: const wchar_t *__restrict
+  - name: wcstok
+    standards:
+      - stdc
+    return_type: wchar_t *
+    arguments:
+      - type: wchar_t *__restrict
+      - type: const wchar_t *__restrict
+      - type: wchar_t** __restrict
   - name: wcpcpy
     standards:
       - stdc
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index 16664100d42c7..6d93b82b2d2bf 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -34,6 +34,16 @@ add_entrypoint_object(
     libc.src.__support.wctype_utils
 )
 
+add_entrypoint_object(
+  wcstok
+  SRCS
+    wcstok.cpp
+  HDRS
+    wcstok.h
+  DEPENDS
+    libc.hdr.types.wchar_t
+)
+
 add_entrypoint_object(
   wcrtomb
   SRCS
diff --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp
new file mode 100644
index 0000000000000..dc004c0a7af37
--- /dev/null
+++ b/libc/src/wchar/wcstok.cpp
@@ -0,0 +1,22 @@
+//===-- Implementation of wcstok ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcstok.h"
+
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(wchar_t *, wcstok,
+                   (wchar_t *__restrict str, const wchar_t *__restrict delim,
+                    wchar_t **__restrict ptr)) {
+  
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcstok.h b/libc/src/wchar/wcstok.h
new file mode 100644
index 0000000000000..9f41ea37a947a
--- /dev/null
+++ b/libc/src/wchar/wcstok.h
@@ -0,0 +1,22 @@
+//===-- Implementation header for wcstok ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_WCSTOK_H
+#define LLVM_LIBC_SRC_WCHAR_WCSTOK_H
+
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+wchar_t *wcstok(wchar_t *__restrict str, const wchar_t *__restrict delim,
+                wchar_t **__restrict ptr);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_WCSTOK_H

>From 930e37e5de03c3879d5794b19219e211a7d8a469 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Thu, 26 Jun 2025 21:29:47 +0000
Subject: [PATCH 2/5] started impl

---
 libc/src/wchar/wcstok.cpp | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp
index dc004c0a7af37..c6ad51af25978 100644
--- a/libc/src/wchar/wcstok.cpp
+++ b/libc/src/wchar/wcstok.cpp
@@ -16,7 +16,15 @@ namespace LIBC_NAMESPACE_DECL {
 LLVM_LIBC_FUNCTION(wchar_t *, wcstok,
                    (wchar_t *__restrict str, const wchar_t *__restrict delim,
                     wchar_t **__restrict ptr)) {
-  
+    if (str == nullptr)
+        str = *ptr;
+    
+    while (*str != L'\0') {
+        bool inDelim = false;
+        for (const wchar_t* delim_ptr = delim; delim_ptr != L'\0'; delim_ptr++) {
+            
+        }
+    }
 }
 
 } // namespace LIBC_NAMESPACE_DECL

>From 17f33604af3b7680dabad1fc8a94c2ff88237f09 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Thu, 26 Jun 2025 23:13:46 +0000
Subject: [PATCH 3/5] tests

---
 libc/src/wchar/wcstok.cpp           |  32 ++++--
 libc/test/src/wchar/CMakeLists.txt  |  10 ++
 libc/test/src/wchar/wcstok_test.cpp | 145 ++++++++++++++++++++++++++++
 3 files changed, 179 insertions(+), 8 deletions(-)
 create mode 100644 libc/test/src/wchar/wcstok_test.cpp

diff --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp
index c6ad51af25978..a02332ac1fbbd 100644
--- a/libc/src/wchar/wcstok.cpp
+++ b/libc/src/wchar/wcstok.cpp
@@ -16,15 +16,31 @@ namespace LIBC_NAMESPACE_DECL {
 LLVM_LIBC_FUNCTION(wchar_t *, wcstok,
                    (wchar_t *__restrict str, const wchar_t *__restrict delim,
                     wchar_t **__restrict ptr)) {
-    if (str == nullptr)
-        str = *ptr;
-    
-    while (*str != L'\0') {
-        bool inDelim = false;
-        for (const wchar_t* delim_ptr = delim; delim_ptr != L'\0'; delim_ptr++) {
-            
-        }
+  if (str == nullptr)
+    str = *ptr;
+
+  bool foundTokenStart = false;
+  wchar_t *out = nullptr;
+  wchar_t *str_ptr;
+  for (str_ptr = str; *str_ptr != L'\0'; str_ptr++) {
+    bool inDelim = false;
+    for (const wchar_t *delim_ptr = delim; *delim_ptr != L'\0' && !inDelim;
+         delim_ptr++)
+      if (*str_ptr == *delim_ptr)
+        inDelim = true;
+
+    if (!inDelim && !foundTokenStart) {
+      foundTokenStart = true;
+      out = str_ptr;
+    } else if (inDelim && foundTokenStart) {
+      *str_ptr = L'\0';
+      *ptr = str_ptr + 1;
+      return out;
     }
+  }
+
+  *ptr = str_ptr;
+  return out;
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index bf16fdd7f8c4d..8967cc1e8d353 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -111,6 +111,16 @@ add_libc_test(
     libc.src.wchar.wcschr
 )
 
+add_libc_test(
+  wcstok_test
+  SUITE
+    libc_wchar_unittests
+  SRCS
+    wcstok_test.cpp
+  DEPENDS
+    libc.src.wchar.wcstok
+)
+
 add_libc_test(
   wcsncmp_test
   SUITE
diff --git a/libc/test/src/wchar/wcstok_test.cpp b/libc/test/src/wchar/wcstok_test.cpp
new file mode 100644
index 0000000000000..79153ccc0adad
--- /dev/null
+++ b/libc/test/src/wchar/wcstok_test.cpp
@@ -0,0 +1,145 @@
+//===-- Unittests for wcstok ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/wchar/wcstok.h"
+#include "test/UnitTest/Test.h"
+
+TEST(LlvmLibcStrTokTest, NoTokenFound) {
+  wchar_t empty[] = L"";
+  wchar_t *buf;
+  ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"", &buf), nullptr);
+  ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"_", &buf), nullptr);
+
+  wchar_t single[] = L"_";
+  wchar_t *token = LIBC_NAMESPACE::wcstok(single, L"", &buf);
+  ASSERT_TRUE(token[0] == L'_');
+  ASSERT_TRUE(token[1] == L'\0');
+
+  wchar_t multiple[] = L"1,2";
+  token = LIBC_NAMESPACE::wcstok(multiple, L":", &buf);
+  ASSERT_TRUE(multiple[0] == L'1');
+  ASSERT_TRUE(multiple[1] == L',');
+  ASSERT_TRUE(multiple[2] == L'2');
+  ASSERT_TRUE(multiple[3] == L'\0');
+}
+
+TEST(LlvmLibcStrTokTest, DelimiterAsFirstCharacterShouldBeIgnored) {
+  wchar_t *buf;
+  wchar_t src[] = L".123";
+  wchar_t *token = LIBC_NAMESPACE::wcstok(src, L".", &buf);
+  ASSERT_TRUE(token[0] == L'1');
+  ASSERT_TRUE(token[1] == L'2');
+  ASSERT_TRUE(token[2] == L'3');
+  ASSERT_TRUE(token[3] == L'\0');
+}
+
+TEST(LlvmLibcStrTokTest, DelimiterIsMiddleCharacter) {
+  wchar_t src[] = L"12,34";
+  wchar_t *buf;
+  wchar_t *token = LIBC_NAMESPACE::wcstok(src, L",", &buf);
+  ASSERT_TRUE(token[0] == L'1');
+  ASSERT_TRUE(token[1] == L'2');
+  ASSERT_TRUE(token[2] == L'\0');
+}
+
+TEST(LlvmLibcStrTokTest, DelimiterAsLastCharacterShouldBeIgnored) {
+  wchar_t src[] = L"1234:";
+  wchar_t *buf;
+  wchar_t *token = LIBC_NAMESPACE::wcstok(src, L":", &buf);
+  ASSERT_TRUE(token[0] == L'1');
+  ASSERT_TRUE(token[1] == L'2');
+  ASSERT_TRUE(token[2] == L'3');
+  ASSERT_TRUE(token[3] == L'4');
+  ASSERT_TRUE(token[4] == L'\0');
+}
+
+TEST(LlvmLibcStrTokTest, MultipleDelimiters) {
+  wchar_t src[] = L"12,.34";
+  wchar_t *buf;
+  wchar_t *token;
+
+  token = LIBC_NAMESPACE::wcstok(src, L".", &buf);
+  ASSERT_TRUE(token[0] == L'1');
+  ASSERT_TRUE(token[1] == L'2');
+  ASSERT_TRUE(token[2] == L',');
+  ASSERT_TRUE(token[3] == L'\0');
+
+  token = LIBC_NAMESPACE::wcstok(src, L".,", &buf);
+  ASSERT_TRUE(token[0] == L'1');
+  ASSERT_TRUE(token[1] == L'2');
+  ASSERT_TRUE(token[2] == L'\0');
+
+  token = LIBC_NAMESPACE::wcstok(src, L",.", &buf);
+  ASSERT_TRUE(token[0] == L'1');
+  ASSERT_TRUE(token[1] == L'2');
+  ASSERT_TRUE(token[2] == L'\0');
+
+  token = LIBC_NAMESPACE::wcstok(src, L":,.", &buf);
+  ASSERT_TRUE(token[0] == L'1');
+  ASSERT_TRUE(token[1] == L'2');
+  ASSERT_TRUE(token[2] == L'\0');
+}
+
+TEST(LlvmLibcStrTokTest, ShouldNotGoPastNullTerminator) {
+  wchar_t src[] = {L'1', L'2', L'\0', L',', L'3'};
+  wchar_t *buf;
+  wchar_t *token = LIBC_NAMESPACE::wcstok(src, L",", &buf);
+  ASSERT_TRUE(token[0] == L'1');
+  ASSERT_TRUE(token[1] == L'2');
+  ASSERT_TRUE(token[2] == L'\0');
+}
+
+TEST(LlvmLibcStrTokTest, SubsequentCallsShouldFindFollowingDelimiters) {
+  wchar_t src[] = L"12,34.56";
+  wchar_t *buf;
+  wchar_t *token = LIBC_NAMESPACE::wcstok(src, L",.", &buf);
+  ASSERT_TRUE(token[0] == L'1');
+  ASSERT_TRUE(token[1] == L'2');
+  ASSERT_TRUE(token[2] == L'\0');
+
+  token = LIBC_NAMESPACE::wcstok(nullptr, L",.", &buf);
+  ASSERT_TRUE(token[0] == L'3');
+  ASSERT_TRUE(token[1] == L'4');
+  ASSERT_TRUE(token[2] == L'\0');
+
+  token = LIBC_NAMESPACE::wcstok(nullptr, L",.", &buf);
+  ASSERT_TRUE(token[0] == L'5');
+  ASSERT_TRUE(token[1] == L'6');
+  ASSERT_TRUE(token[2] == L'\0');
+
+  token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &buf);
+  ASSERT_EQ(token, nullptr);
+  // Subsequent calls after hitting the end of the string should also return
+  // nullptr.
+  token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &buf);
+  ASSERT_EQ(token, nullptr);
+}
+
+TEST(LlvmLibcStrTokTest, DelimitersShouldNotBeIncludedInToken) {
+  wchar_t *buf;
+  wchar_t src[] = L"__ab__:_cd__:__ef__:__";
+  wchar_t *token = LIBC_NAMESPACE::wcstok(src, L"_:", &buf);
+  ASSERT_TRUE(token[0] == L'a');
+  ASSERT_TRUE(token[1] == L'b');
+  ASSERT_TRUE(token[2] == L'\0');
+
+  token = LIBC_NAMESPACE::wcstok(nullptr, L":_", &buf);
+  ASSERT_TRUE(token[0] == L'c');
+  ASSERT_TRUE(token[1] == L'd');
+  ASSERT_TRUE(token[2] == L'\0');
+
+  token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,", &buf);
+  ASSERT_TRUE(token[0] == L'e');
+  ASSERT_TRUE(token[1] == L'f');
+  ASSERT_TRUE(token[2] == L'\0');
+
+  token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &buf);
+  ASSERT_EQ(token, nullptr);
+}

>From a562d1c5c9ddb8b632b96c780134e05eced6877d Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Fri, 27 Jun 2025 16:27:39 +0000
Subject: [PATCH 4/5] refactored and replaced tests to mimic strtok_r

---
 libc/src/wchar/wcstok.cpp           |  52 +++----
 libc/src/wchar/wcstok.h             |   2 +-
 libc/test/src/wchar/wcstok_test.cpp | 204 ++++++++++++++++------------
 3 files changed, 149 insertions(+), 109 deletions(-)

diff --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp
index a02332ac1fbbd..b34f6613b732f 100644
--- a/libc/src/wchar/wcstok.cpp
+++ b/libc/src/wchar/wcstok.cpp
@@ -13,34 +13,38 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
+bool isADelimeter(wchar_t wc, const wchar_t *delimiters) {
+  for (const wchar_t *delim_ptr = delimiters; *delim_ptr != L'\0'; delim_ptr++)
+    if (wc == *delim_ptr)
+      return true;
+  return false;
+}
+
 LLVM_LIBC_FUNCTION(wchar_t *, wcstok,
                    (wchar_t *__restrict str, const wchar_t *__restrict delim,
-                    wchar_t **__restrict ptr)) {
-  if (str == nullptr)
-    str = *ptr;
-
-  bool foundTokenStart = false;
-  wchar_t *out = nullptr;
-  wchar_t *str_ptr;
-  for (str_ptr = str; *str_ptr != L'\0'; str_ptr++) {
-    bool inDelim = false;
-    for (const wchar_t *delim_ptr = delim; *delim_ptr != L'\0' && !inDelim;
-         delim_ptr++)
-      if (*str_ptr == *delim_ptr)
-        inDelim = true;
-
-    if (!inDelim && !foundTokenStart) {
-      foundTokenStart = true;
-      out = str_ptr;
-    } else if (inDelim && foundTokenStart) {
-      *str_ptr = L'\0';
-      *ptr = str_ptr + 1;
-      return out;
-    }
+                    wchar_t **__restrict context)) {
+  if (str == nullptr) {
+    if (*context == nullptr)
+      return nullptr;
+
+    str = *context;
   }
 
-  *ptr = str_ptr;
-  return out;
+  wchar_t *tok_start, *tok_end;
+  for (tok_start = str; *tok_start != L'\0' && isADelimeter(*tok_start, delim);
+       tok_start++)
+    ;
+
+  for (tok_end = tok_start; *tok_end != L'\0' && !isADelimeter(*tok_end, delim);
+       tok_end++)
+    ;
+
+  if (*tok_end != L'\0') {
+    *tok_end = L'\0';
+    tok_end++;
+  }
+  *context = tok_end;
+  return *tok_start == L'\0' ? nullptr : tok_start;
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcstok.h b/libc/src/wchar/wcstok.h
index 9f41ea37a947a..5e673ff4e89b9 100644
--- a/libc/src/wchar/wcstok.h
+++ b/libc/src/wchar/wcstok.h
@@ -15,7 +15,7 @@
 namespace LIBC_NAMESPACE_DECL {
 
 wchar_t *wcstok(wchar_t *__restrict str, const wchar_t *__restrict delim,
-                wchar_t **__restrict ptr);
+                wchar_t **__restrict context);
 
 } // namespace LIBC_NAMESPACE_DECL
 
diff --git a/libc/test/src/wchar/wcstok_test.cpp b/libc/test/src/wchar/wcstok_test.cpp
index 79153ccc0adad..02b5b49d55ce3 100644
--- a/libc/test/src/wchar/wcstok_test.cpp
+++ b/libc/test/src/wchar/wcstok_test.cpp
@@ -11,135 +11,171 @@
 #include "src/wchar/wcstok.h"
 #include "test/UnitTest/Test.h"
 
-TEST(LlvmLibcStrTokTest, NoTokenFound) {
-  wchar_t empty[] = L"";
-  wchar_t *buf;
-  ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"", &buf), nullptr);
-  ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"_", &buf), nullptr);
-
-  wchar_t single[] = L"_";
-  wchar_t *token = LIBC_NAMESPACE::wcstok(single, L"", &buf);
-  ASSERT_TRUE(token[0] == L'_');
-  ASSERT_TRUE(token[1] == L'\0');
-
-  wchar_t multiple[] = L"1,2";
-  token = LIBC_NAMESPACE::wcstok(multiple, L":", &buf);
-  ASSERT_TRUE(multiple[0] == L'1');
-  ASSERT_TRUE(multiple[1] == L',');
-  ASSERT_TRUE(multiple[2] == L'2');
-  ASSERT_TRUE(multiple[3] == L'\0');
+TEST(LlvmLibcWCSTokReentrantTest, NoTokenFound) {
+  { // Empty source and delimiter string.
+    wchar_t empty[] = L"";
+    wchar_t *reserve = nullptr;
+    ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"", &reserve), nullptr);
+    // Another call to ensure that 'reserve' is not in a bad state.
+    ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"", &reserve), nullptr);
+    ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"", &reserve), nullptr);
+  }
+  { // Empty source and single character delimiter string.
+    wchar_t empty[] = L"";
+    wchar_t *reserve = nullptr;
+    ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"_", &reserve), nullptr);
+    // Another call to ensure that 'reserve' is not in a bad state.
+    ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"_", &reserve), nullptr);
+    ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr);
+  }
+  { // Same wchar_tacter source and delimiter string.
+    wchar_t single[] = L"_";
+    wchar_t *reserve = nullptr;
+    ASSERT_EQ(LIBC_NAMESPACE::wcstok(single, L"_", &reserve), nullptr);
+    // Another call to ensure that 'reserve' is not in a bad state.
+    ASSERT_EQ(LIBC_NAMESPACE::wcstok(single, L"_", &reserve), nullptr);
+    ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr);
+  }
+  { // Multiple wchar_tacter source and single wchar_tacter delimiter string.
+    wchar_t multiple[] = L"1,2";
+    wchar_t *reserve = nullptr;
+    wchar_t *tok = LIBC_NAMESPACE::wcstok(multiple, L":", &reserve);
+    ASSERT_TRUE(tok[0] == L'1');
+    ASSERT_TRUE(tok[1] == L',');
+    ASSERT_TRUE(tok[2] == L'2');
+    ASSERT_TRUE(tok[3] == L'\0');
+    // Another call to ensure that 'reserve' is not in a bad state.
+    tok = LIBC_NAMESPACE::wcstok(multiple, L":", &reserve);
+    ASSERT_TRUE(tok[0] == L'1');
+    ASSERT_TRUE(tok[1] == L',');
+    ASSERT_TRUE(tok[2] == L'2');
+    ASSERT_TRUE(tok[3] == L'\0');
+    ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L":", &reserve), nullptr);
+  }
 }
 
-TEST(LlvmLibcStrTokTest, DelimiterAsFirstCharacterShouldBeIgnored) {
-  wchar_t *buf;
+TEST(LlvmLibcWCSTokReentrantTest, DelimiterAsFirstCharacterShouldBeIgnored) {
   wchar_t src[] = L".123";
-  wchar_t *token = LIBC_NAMESPACE::wcstok(src, L".", &buf);
-  ASSERT_TRUE(token[0] == L'1');
-  ASSERT_TRUE(token[1] == L'2');
-  ASSERT_TRUE(token[2] == L'3');
-  ASSERT_TRUE(token[3] == L'\0');
+  wchar_t *reserve = nullptr;
+  wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L".", &reserve);
+  ASSERT_TRUE(tok[0] == L'1');
+  ASSERT_TRUE(tok[1] == L'2');
+  ASSERT_TRUE(tok[2] == L'3');
+  ASSERT_TRUE(tok[3] == L'\0');
+  // Another call to ensure that 'reserve' is not in a bad state.
+  tok = LIBC_NAMESPACE::wcstok(src, L".", &reserve);
+  ASSERT_TRUE(tok[0] == L'1');
+  ASSERT_TRUE(tok[1] == L'2');
+  ASSERT_TRUE(tok[2] == L'3');
+  ASSERT_TRUE(tok[3] == L'\0');
+  ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L".", &reserve), nullptr);
 }
 
-TEST(LlvmLibcStrTokTest, DelimiterIsMiddleCharacter) {
+TEST(LlvmLibcWCSTokReentrantTest, DelimiterIsMiddleCharacter) {
   wchar_t src[] = L"12,34";
-  wchar_t *buf;
-  wchar_t *token = LIBC_NAMESPACE::wcstok(src, L",", &buf);
-  ASSERT_TRUE(token[0] == L'1');
-  ASSERT_TRUE(token[1] == L'2');
-  ASSERT_TRUE(token[2] == L'\0');
+  wchar_t *reserve = nullptr;
+  wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve);
+  ASSERT_TRUE(tok[0] == L'1');
+  ASSERT_TRUE(tok[1] == L'2');
+  ASSERT_TRUE(tok[2] == L'\0');
+  // Another call to ensure that 'reserve' is not in a bad state.
+  tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve);
+  ASSERT_TRUE(tok[0] == L'1');
+  ASSERT_TRUE(tok[1] == L'2');
+  ASSERT_TRUE(tok[2] == L'\0');
+  ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L",", &reserve), nullptr);
 }
 
-TEST(LlvmLibcStrTokTest, DelimiterAsLastCharacterShouldBeIgnored) {
+TEST(LlvmLibcWCSTokReentrantTest, DelimiterAsLastCharacterShouldBeIgnored) {
   wchar_t src[] = L"1234:";
-  wchar_t *buf;
-  wchar_t *token = LIBC_NAMESPACE::wcstok(src, L":", &buf);
-  ASSERT_TRUE(token[0] == L'1');
-  ASSERT_TRUE(token[1] == L'2');
-  ASSERT_TRUE(token[2] == L'3');
-  ASSERT_TRUE(token[3] == L'4');
-  ASSERT_TRUE(token[4] == L'\0');
+  wchar_t *reserve = nullptr;
+  wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L":", &reserve);
+  ASSERT_TRUE(tok[0] == L'1');
+  ASSERT_TRUE(tok[1] == L'2');
+  ASSERT_TRUE(tok[2] == L'3');
+  ASSERT_TRUE(tok[3] == L'4');
+  ASSERT_TRUE(tok[4] == L'\0');
+  // Another call to ensure that 'reserve' is not in a bad state.
+  tok = LIBC_NAMESPACE::wcstok(src, L":", &reserve);
+  ASSERT_TRUE(tok[0] == L'1');
+  ASSERT_TRUE(tok[1] == L'2');
+  ASSERT_TRUE(tok[2] == L'3');
+  ASSERT_TRUE(tok[3] == L'4');
+  ASSERT_TRUE(tok[4] == L'\0');
+  ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L":", &reserve), nullptr);
 }
 
-TEST(LlvmLibcStrTokTest, MultipleDelimiters) {
-  wchar_t src[] = L"12,.34";
-  wchar_t *buf;
-  wchar_t *token;
-
-  token = LIBC_NAMESPACE::wcstok(src, L".", &buf);
-  ASSERT_TRUE(token[0] == L'1');
-  ASSERT_TRUE(token[1] == L'2');
-  ASSERT_TRUE(token[2] == L',');
-  ASSERT_TRUE(token[3] == L'\0');
-
-  token = LIBC_NAMESPACE::wcstok(src, L".,", &buf);
-  ASSERT_TRUE(token[0] == L'1');
-  ASSERT_TRUE(token[1] == L'2');
-  ASSERT_TRUE(token[2] == L'\0');
-
-  token = LIBC_NAMESPACE::wcstok(src, L",.", &buf);
-  ASSERT_TRUE(token[0] == L'1');
-  ASSERT_TRUE(token[1] == L'2');
-  ASSERT_TRUE(token[2] == L'\0');
-
-  token = LIBC_NAMESPACE::wcstok(src, L":,.", &buf);
-  ASSERT_TRUE(token[0] == L'1');
-  ASSERT_TRUE(token[1] == L'2');
-  ASSERT_TRUE(token[2] == L'\0');
+TEST(LlvmLibcWCSTokReentrantTest, ShouldNotGoPastNullTerminator) {
+  wchar_t src[] = {L'1', L'2', L'\0', L',', L'3'};
+  wchar_t *reserve = nullptr;
+  wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve);
+  ASSERT_TRUE(tok[0] == L'1');
+  ASSERT_TRUE(tok[1] == L'2');
+  ASSERT_TRUE(tok[2] == L'\0');
+  // Another call to ensure that 'reserve' is not in a bad state.
+  tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve);
+  ASSERT_TRUE(tok[0] == L'1');
+  ASSERT_TRUE(tok[1] == L'2');
+  ASSERT_TRUE(tok[2] == L'\0');
+  ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L",", &reserve), nullptr);
 }
 
-TEST(LlvmLibcStrTokTest, ShouldNotGoPastNullTerminator) {
-  wchar_t src[] = {L'1', L'2', L'\0', L',', L'3'};
-  wchar_t *buf;
-  wchar_t *token = LIBC_NAMESPACE::wcstok(src, L",", &buf);
-  ASSERT_TRUE(token[0] == L'1');
-  ASSERT_TRUE(token[1] == L'2');
-  ASSERT_TRUE(token[2] == L'\0');
+TEST(LlvmLibcWCSTokReentrantTest,
+     ShouldReturnNullptrWhenBothSrcAndSaveptrAreNull) {
+  wchar_t *src = nullptr;
+  wchar_t *reserve = nullptr;
+  // Ensure that instead of crashing if src and reserve are null, nullptr is
+  // returned
+  ASSERT_EQ(LIBC_NAMESPACE::wcstok(src, L",", &reserve), nullptr);
+  // And that neither src nor reserve are changed when that happens
+  ASSERT_EQ(src, nullptr);
+  ASSERT_EQ(reserve, nullptr);
 }
 
-TEST(LlvmLibcStrTokTest, SubsequentCallsShouldFindFollowingDelimiters) {
+TEST(LlvmLibcWCSTokReentrantTest,
+     SubsequentCallsShouldFindFollowingDelimiters) {
   wchar_t src[] = L"12,34.56";
-  wchar_t *buf;
-  wchar_t *token = LIBC_NAMESPACE::wcstok(src, L",.", &buf);
+  wchar_t *reserve = nullptr;
+  wchar_t *token = LIBC_NAMESPACE::wcstok(src, L",.", &reserve);
   ASSERT_TRUE(token[0] == L'1');
   ASSERT_TRUE(token[1] == L'2');
   ASSERT_TRUE(token[2] == L'\0');
 
-  token = LIBC_NAMESPACE::wcstok(nullptr, L",.", &buf);
+  token = LIBC_NAMESPACE::wcstok(nullptr, L",.", &reserve);
   ASSERT_TRUE(token[0] == L'3');
   ASSERT_TRUE(token[1] == L'4');
   ASSERT_TRUE(token[2] == L'\0');
 
-  token = LIBC_NAMESPACE::wcstok(nullptr, L",.", &buf);
+  token = LIBC_NAMESPACE::wcstok(nullptr, L",.", &reserve);
   ASSERT_TRUE(token[0] == L'5');
   ASSERT_TRUE(token[1] == L'6');
   ASSERT_TRUE(token[2] == L'\0');
-
-  token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &buf);
+  token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &reserve);
   ASSERT_EQ(token, nullptr);
   // Subsequent calls after hitting the end of the string should also return
   // nullptr.
-  token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &buf);
+  token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &reserve);
   ASSERT_EQ(token, nullptr);
 }
 
-TEST(LlvmLibcStrTokTest, DelimitersShouldNotBeIncludedInToken) {
-  wchar_t *buf;
+TEST(LlvmLibcWCSTokReentrantTest, DelimitersShouldNotBeIncludedInToken) {
   wchar_t src[] = L"__ab__:_cd__:__ef__:__";
-  wchar_t *token = LIBC_NAMESPACE::wcstok(src, L"_:", &buf);
+  wchar_t *reserve = nullptr;
+  wchar_t *token = LIBC_NAMESPACE::wcstok(src, L"_:", &reserve);
   ASSERT_TRUE(token[0] == L'a');
   ASSERT_TRUE(token[1] == L'b');
   ASSERT_TRUE(token[2] == L'\0');
 
-  token = LIBC_NAMESPACE::wcstok(nullptr, L":_", &buf);
+  token = LIBC_NAMESPACE::wcstok(nullptr, L":_", &reserve);
   ASSERT_TRUE(token[0] == L'c');
   ASSERT_TRUE(token[1] == L'd');
   ASSERT_TRUE(token[2] == L'\0');
 
-  token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,", &buf);
+  token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,", &reserve);
   ASSERT_TRUE(token[0] == L'e');
   ASSERT_TRUE(token[1] == L'f');
   ASSERT_TRUE(token[2] == L'\0');
 
-  token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &buf);
+  token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &reserve);
   ASSERT_EQ(token, nullptr);
 }

>From 15f0166593dbe7193af95e675b7c7950285cb2aa Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Mon, 30 Jun 2025 17:06:18 +0000
Subject: [PATCH 5/5] formatting

---
 libc/src/wchar/wcstok.cpp           | 8 ++++----
 libc/test/src/wchar/wcstok_test.cpp | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp
index b34f6613b732f..291efc15e158a 100644
--- a/libc/src/wchar/wcstok.cpp
+++ b/libc/src/wchar/wcstok.cpp
@@ -14,7 +14,7 @@
 namespace LIBC_NAMESPACE_DECL {
 
 bool isADelimeter(wchar_t wc, const wchar_t *delimiters) {
-  for (const wchar_t *delim_ptr = delimiters; *delim_ptr != L'\0'; delim_ptr++)
+  for (const wchar_t *delim_ptr = delimiters; *delim_ptr != L'\0'; ++delim_ptr)
     if (wc == *delim_ptr)
       return true;
   return false;
@@ -32,16 +32,16 @@ LLVM_LIBC_FUNCTION(wchar_t *, wcstok,
 
   wchar_t *tok_start, *tok_end;
   for (tok_start = str; *tok_start != L'\0' && isADelimeter(*tok_start, delim);
-       tok_start++)
+       ++tok_start)
     ;
 
   for (tok_end = tok_start; *tok_end != L'\0' && !isADelimeter(*tok_end, delim);
-       tok_end++)
+       ++tok_end)
     ;
 
   if (*tok_end != L'\0') {
     *tok_end = L'\0';
-    tok_end++;
+    ++tok_end;
   }
   *context = tok_end;
   return *tok_start == L'\0' ? nullptr : tok_start;
diff --git a/libc/test/src/wchar/wcstok_test.cpp b/libc/test/src/wchar/wcstok_test.cpp
index 02b5b49d55ce3..7106e9f2fab5e 100644
--- a/libc/test/src/wchar/wcstok_test.cpp
+++ b/libc/test/src/wchar/wcstok_test.cpp
@@ -28,7 +28,7 @@ TEST(LlvmLibcWCSTokReentrantTest, NoTokenFound) {
     ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"_", &reserve), nullptr);
     ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr);
   }
-  { // Same wchar_tacter source and delimiter string.
+  { // Same character source and delimiter string.
     wchar_t single[] = L"_";
     wchar_t *reserve = nullptr;
     ASSERT_EQ(LIBC_NAMESPACE::wcstok(single, L"_", &reserve), nullptr);
@@ -36,7 +36,7 @@ TEST(LlvmLibcWCSTokReentrantTest, NoTokenFound) {
     ASSERT_EQ(LIBC_NAMESPACE::wcstok(single, L"_", &reserve), nullptr);
     ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr);
   }
-  { // Multiple wchar_tacter source and single wchar_tacter delimiter string.
+  { // Multiple character source and single character delimiter string.
     wchar_t multiple[] = L"1,2";
     wchar_t *reserve = nullptr;
     wchar_t *tok = LIBC_NAMESPACE::wcstok(multiple, L":", &reserve);



More information about the libc-commits mailing list