[libc-commits] [libc] b36f05c - [libc] Fix wcstok() "subsequent searches" behavior. (#151589)
via libc-commits
libc-commits at lists.llvm.org
Fri Aug 1 11:15:38 PDT 2025
Author: enh-google
Date: 2025-08-01T14:15:34-04:00
New Revision: b36f05ce48bd714182481765ac14fcaff129639f
URL: https://github.com/llvm/llvm-project/commit/b36f05ce48bd714182481765ac14fcaff129639f
DIFF: https://github.com/llvm/llvm-project/commit/b36f05ce48bd714182481765ac14fcaff129639f.diff
LOG: [libc] Fix wcstok() "subsequent searches" behavior. (#151589)
POSIX says "If no such wide-character code is found, the current token
extends to the end of the wide-character string pointed to by ws1, and
subsequent searches for a token shall return a null pointer", but the
current implementation only returns nullptr the first time. This failed
an existing bionic test when I tried to switch over to llvm-libc
wcstok().
Added:
Modified:
libc/src/wchar/wcstok.cpp
libc/test/src/wchar/wcstok_test.cpp
Removed:
################################################################################
diff --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp
index ed4f0aad08ea5..85513a6ecfb93 100644
--- a/libc/src/wchar/wcstok.cpp
+++ b/libc/src/wchar/wcstok.cpp
@@ -27,17 +27,22 @@ LLVM_LIBC_FUNCTION(wchar_t *, wcstok,
wchar_t *tok_start = str;
while (*tok_start != L'\0' && internal::wcschr(delims, *tok_start))
++tok_start;
+ if (*tok_start == L'\0') {
+ *context = nullptr;
+ return nullptr;
+ }
wchar_t *tok_end = tok_start;
while (*tok_end != L'\0' && !internal::wcschr(delims, *tok_end))
++tok_end;
- if (*tok_end != L'\0') {
+ if (*tok_end == L'\0') {
+ *context = nullptr;
+ } else {
*tok_end = L'\0';
- ++tok_end;
+ *context = tok_end + 1;
}
- *context = tok_end;
- return *tok_start == L'\0' ? nullptr : tok_start;
+ return tok_start;
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/src/wchar/wcstok_test.cpp b/libc/test/src/wchar/wcstok_test.cpp
index 7106e9f2fab5e..3bb1014aff3ab 100644
--- a/libc/test/src/wchar/wcstok_test.cpp
+++ b/libc/test/src/wchar/wcstok_test.cpp
@@ -19,6 +19,8 @@ TEST(LlvmLibcWCSTokReentrantTest, NoTokenFound) {
// Another call to ensure that 'reserve' is not in a bad state.
ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"", &reserve), nullptr);
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"", &reserve), nullptr);
+ // Subsequent searches still return nullptr.
+ ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"", &reserve), nullptr);
}
{ // Empty source and single character delimiter string.
wchar_t empty[] = L"";
@@ -27,6 +29,8 @@ TEST(LlvmLibcWCSTokReentrantTest, NoTokenFound) {
// Another call to ensure that 'reserve' is not in a bad state.
ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"_", &reserve), nullptr);
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr);
+ // Subsequent searches still return nullptr.
+ ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr);
}
{ // Same character source and delimiter string.
wchar_t single[] = L"_";
@@ -35,6 +39,8 @@ TEST(LlvmLibcWCSTokReentrantTest, NoTokenFound) {
// Another call to ensure that 'reserve' is not in a bad state.
ASSERT_EQ(LIBC_NAMESPACE::wcstok(single, L"_", &reserve), nullptr);
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr);
+ // Subsequent searches still return nullptr.
+ ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr);
}
{ // Multiple character source and single character delimiter string.
wchar_t multiple[] = L"1,2";
@@ -51,6 +57,8 @@ TEST(LlvmLibcWCSTokReentrantTest, NoTokenFound) {
ASSERT_TRUE(tok[2] == L'2');
ASSERT_TRUE(tok[3] == L'\0');
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L":", &reserve), nullptr);
+ // Subsequent searches still return nullptr.
+ ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L":", &reserve), nullptr);
}
}
More information about the libc-commits
mailing list