[libc-commits] [libc] b36f05c - [libc] Fix wcstok() "subsequent searches" behavior. (#151589)

via libc-commits libc-commits at lists.llvm.org
Fri Aug 1 11:15:38 PDT 2025


Author: enh-google
Date: 2025-08-01T14:15:34-04:00
New Revision: b36f05ce48bd714182481765ac14fcaff129639f

URL: https://github.com/llvm/llvm-project/commit/b36f05ce48bd714182481765ac14fcaff129639f
DIFF: https://github.com/llvm/llvm-project/commit/b36f05ce48bd714182481765ac14fcaff129639f.diff

LOG: [libc] Fix wcstok() "subsequent searches" behavior. (#151589)

POSIX says "If no such wide-character code is found, the current token
extends to the end of the wide-character string pointed to by ws1, and
subsequent searches for a token shall return a null pointer", but the
current implementation only returns nullptr the first time. This failed
an existing bionic test when I tried to switch over to llvm-libc
wcstok().

Added: 
    

Modified: 
    libc/src/wchar/wcstok.cpp
    libc/test/src/wchar/wcstok_test.cpp

Removed: 
    


################################################################################
diff  --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp
index ed4f0aad08ea5..85513a6ecfb93 100644
--- a/libc/src/wchar/wcstok.cpp
+++ b/libc/src/wchar/wcstok.cpp
@@ -27,17 +27,22 @@ LLVM_LIBC_FUNCTION(wchar_t *, wcstok,
   wchar_t *tok_start = str;
   while (*tok_start != L'\0' && internal::wcschr(delims, *tok_start))
     ++tok_start;
+  if (*tok_start == L'\0') {
+    *context = nullptr;
+    return nullptr;
+  }
 
   wchar_t *tok_end = tok_start;
   while (*tok_end != L'\0' && !internal::wcschr(delims, *tok_end))
     ++tok_end;
 
-  if (*tok_end != L'\0') {
+  if (*tok_end == L'\0') {
+    *context = nullptr;
+  } else {
     *tok_end = L'\0';
-    ++tok_end;
+    *context = tok_end + 1;
   }
-  *context = tok_end;
-  return *tok_start == L'\0' ? nullptr : tok_start;
+  return tok_start;
 }
 
 } // namespace LIBC_NAMESPACE_DECL

diff  --git a/libc/test/src/wchar/wcstok_test.cpp b/libc/test/src/wchar/wcstok_test.cpp
index 7106e9f2fab5e..3bb1014aff3ab 100644
--- a/libc/test/src/wchar/wcstok_test.cpp
+++ b/libc/test/src/wchar/wcstok_test.cpp
@@ -19,6 +19,8 @@ TEST(LlvmLibcWCSTokReentrantTest, NoTokenFound) {
     // Another call to ensure that 'reserve' is not in a bad state.
     ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"", &reserve), nullptr);
     ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"", &reserve), nullptr);
+    // Subsequent searches still return nullptr.
+    ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"", &reserve), nullptr);
   }
   { // Empty source and single character delimiter string.
     wchar_t empty[] = L"";
@@ -27,6 +29,8 @@ TEST(LlvmLibcWCSTokReentrantTest, NoTokenFound) {
     // Another call to ensure that 'reserve' is not in a bad state.
     ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"_", &reserve), nullptr);
     ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr);
+    // Subsequent searches still return nullptr.
+    ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr);
   }
   { // Same character source and delimiter string.
     wchar_t single[] = L"_";
@@ -35,6 +39,8 @@ TEST(LlvmLibcWCSTokReentrantTest, NoTokenFound) {
     // Another call to ensure that 'reserve' is not in a bad state.
     ASSERT_EQ(LIBC_NAMESPACE::wcstok(single, L"_", &reserve), nullptr);
     ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr);
+    // Subsequent searches still return nullptr.
+    ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr);
   }
   { // Multiple character source and single character delimiter string.
     wchar_t multiple[] = L"1,2";
@@ -51,6 +57,8 @@ TEST(LlvmLibcWCSTokReentrantTest, NoTokenFound) {
     ASSERT_TRUE(tok[2] == L'2');
     ASSERT_TRUE(tok[3] == L'\0');
     ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L":", &reserve), nullptr);
+    // Subsequent searches still return nullptr.
+    ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L":", &reserve), nullptr);
   }
 }
 


        


More information about the libc-commits mailing list