[libc-commits] [libc] [libc] wcstok implementation (PR #145989)
Uzair Nawaz via libc-commits
libc-commits at lists.llvm.org
Mon Jun 30 10:06:49 PDT 2025
https://github.com/uzairnawaz updated https://github.com/llvm/llvm-project/pull/145989
>From a484c4b859cc92c1912072a3e5167e708a90d093 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Wed, 25 Jun 2025 23:27:07 +0000
Subject: [PATCH 1/5] set up build files
---
libc/config/linux/x86_64/entrypoints.txt | 1 +
libc/include/wchar.yaml | 8 ++++++++
libc/src/wchar/CMakeLists.txt | 10 ++++++++++
libc/src/wchar/wcstok.cpp | 22 ++++++++++++++++++++++
libc/src/wchar/wcstok.h | 22 ++++++++++++++++++++++
5 files changed, 63 insertions(+)
create mode 100644 libc/src/wchar/wcstok.cpp
create mode 100644 libc/src/wchar/wcstok.h
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 6b3fc9485ec1a..bf04ae2e83fb3 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -386,6 +386,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.wchar.wmemchr
libc.src.wchar.wcpcpy
libc.src.wchar.wcpncpy
+ libc.src.wchar.wcstok
# sys/uio.h entrypoints
libc.src.sys.uio.writev
diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index 397296894829d..15025f42c0723 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -189,6 +189,14 @@ functions:
arguments:
- type: wchar_t *__restrict
- type: const wchar_t *__restrict
+ - name: wcstok
+ standards:
+ - stdc
+ return_type: wchar_t *
+ arguments:
+ - type: wchar_t *__restrict
+ - type: const wchar_t *__restrict
+ - type: wchar_t** __restrict
- name: wcpcpy
standards:
- stdc
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index 16664100d42c7..6d93b82b2d2bf 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -34,6 +34,16 @@ add_entrypoint_object(
libc.src.__support.wctype_utils
)
+add_entrypoint_object(
+ wcstok
+ SRCS
+ wcstok.cpp
+ HDRS
+ wcstok.h
+ DEPENDS
+ libc.hdr.types.wchar_t
+)
+
add_entrypoint_object(
wcrtomb
SRCS
diff --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp
new file mode 100644
index 0000000000000..dc004c0a7af37
--- /dev/null
+++ b/libc/src/wchar/wcstok.cpp
@@ -0,0 +1,22 @@
+//===-- Implementation of wcstok ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcstok.h"
+
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(wchar_t *, wcstok,
+ (wchar_t *__restrict str, const wchar_t *__restrict delim,
+ wchar_t **__restrict ptr)) {
+
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcstok.h b/libc/src/wchar/wcstok.h
new file mode 100644
index 0000000000000..9f41ea37a947a
--- /dev/null
+++ b/libc/src/wchar/wcstok.h
@@ -0,0 +1,22 @@
+//===-- Implementation header for wcstok ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_WCSTOK_H
+#define LLVM_LIBC_SRC_WCHAR_WCSTOK_H
+
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+wchar_t *wcstok(wchar_t *__restrict str, const wchar_t *__restrict delim,
+ wchar_t **__restrict ptr);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_WCSTOK_H
>From 930e37e5de03c3879d5794b19219e211a7d8a469 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Thu, 26 Jun 2025 21:29:47 +0000
Subject: [PATCH 2/5] started impl
---
libc/src/wchar/wcstok.cpp | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp
index dc004c0a7af37..c6ad51af25978 100644
--- a/libc/src/wchar/wcstok.cpp
+++ b/libc/src/wchar/wcstok.cpp
@@ -16,7 +16,15 @@ namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(wchar_t *, wcstok,
(wchar_t *__restrict str, const wchar_t *__restrict delim,
wchar_t **__restrict ptr)) {
-
+ if (str == nullptr)
+ str = *ptr;
+
+ while (*str != L'\0') {
+ bool inDelim = false;
+ for (const wchar_t* delim_ptr = delim; delim_ptr != L'\0'; delim_ptr++) {
+
+ }
+ }
}
} // namespace LIBC_NAMESPACE_DECL
>From 17f33604af3b7680dabad1fc8a94c2ff88237f09 Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Thu, 26 Jun 2025 23:13:46 +0000
Subject: [PATCH 3/5] tests
---
libc/src/wchar/wcstok.cpp | 32 ++++--
libc/test/src/wchar/CMakeLists.txt | 10 ++
libc/test/src/wchar/wcstok_test.cpp | 145 ++++++++++++++++++++++++++++
3 files changed, 179 insertions(+), 8 deletions(-)
create mode 100644 libc/test/src/wchar/wcstok_test.cpp
diff --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp
index c6ad51af25978..a02332ac1fbbd 100644
--- a/libc/src/wchar/wcstok.cpp
+++ b/libc/src/wchar/wcstok.cpp
@@ -16,15 +16,31 @@ namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(wchar_t *, wcstok,
(wchar_t *__restrict str, const wchar_t *__restrict delim,
wchar_t **__restrict ptr)) {
- if (str == nullptr)
- str = *ptr;
-
- while (*str != L'\0') {
- bool inDelim = false;
- for (const wchar_t* delim_ptr = delim; delim_ptr != L'\0'; delim_ptr++) {
-
- }
+ if (str == nullptr)
+ str = *ptr;
+
+ bool foundTokenStart = false;
+ wchar_t *out = nullptr;
+ wchar_t *str_ptr;
+ for (str_ptr = str; *str_ptr != L'\0'; str_ptr++) {
+ bool inDelim = false;
+ for (const wchar_t *delim_ptr = delim; *delim_ptr != L'\0' && !inDelim;
+ delim_ptr++)
+ if (*str_ptr == *delim_ptr)
+ inDelim = true;
+
+ if (!inDelim && !foundTokenStart) {
+ foundTokenStart = true;
+ out = str_ptr;
+ } else if (inDelim && foundTokenStart) {
+ *str_ptr = L'\0';
+ *ptr = str_ptr + 1;
+ return out;
}
+ }
+
+ *ptr = str_ptr;
+ return out;
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index bf16fdd7f8c4d..8967cc1e8d353 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -111,6 +111,16 @@ add_libc_test(
libc.src.wchar.wcschr
)
+add_libc_test(
+ wcstok_test
+ SUITE
+ libc_wchar_unittests
+ SRCS
+ wcstok_test.cpp
+ DEPENDS
+ libc.src.wchar.wcstok
+)
+
add_libc_test(
wcsncmp_test
SUITE
diff --git a/libc/test/src/wchar/wcstok_test.cpp b/libc/test/src/wchar/wcstok_test.cpp
new file mode 100644
index 0000000000000..79153ccc0adad
--- /dev/null
+++ b/libc/test/src/wchar/wcstok_test.cpp
@@ -0,0 +1,145 @@
+//===-- Unittests for wcstok ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/wchar/wcstok.h"
+#include "test/UnitTest/Test.h"
+
+TEST(LlvmLibcStrTokTest, NoTokenFound) {
+ wchar_t empty[] = L"";
+ wchar_t *buf;
+ ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"", &buf), nullptr);
+ ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"_", &buf), nullptr);
+
+ wchar_t single[] = L"_";
+ wchar_t *token = LIBC_NAMESPACE::wcstok(single, L"", &buf);
+ ASSERT_TRUE(token[0] == L'_');
+ ASSERT_TRUE(token[1] == L'\0');
+
+ wchar_t multiple[] = L"1,2";
+ token = LIBC_NAMESPACE::wcstok(multiple, L":", &buf);
+ ASSERT_TRUE(multiple[0] == L'1');
+ ASSERT_TRUE(multiple[1] == L',');
+ ASSERT_TRUE(multiple[2] == L'2');
+ ASSERT_TRUE(multiple[3] == L'\0');
+}
+
+TEST(LlvmLibcStrTokTest, DelimiterAsFirstCharacterShouldBeIgnored) {
+ wchar_t *buf;
+ wchar_t src[] = L".123";
+ wchar_t *token = LIBC_NAMESPACE::wcstok(src, L".", &buf);
+ ASSERT_TRUE(token[0] == L'1');
+ ASSERT_TRUE(token[1] == L'2');
+ ASSERT_TRUE(token[2] == L'3');
+ ASSERT_TRUE(token[3] == L'\0');
+}
+
+TEST(LlvmLibcStrTokTest, DelimiterIsMiddleCharacter) {
+ wchar_t src[] = L"12,34";
+ wchar_t *buf;
+ wchar_t *token = LIBC_NAMESPACE::wcstok(src, L",", &buf);
+ ASSERT_TRUE(token[0] == L'1');
+ ASSERT_TRUE(token[1] == L'2');
+ ASSERT_TRUE(token[2] == L'\0');
+}
+
+TEST(LlvmLibcStrTokTest, DelimiterAsLastCharacterShouldBeIgnored) {
+ wchar_t src[] = L"1234:";
+ wchar_t *buf;
+ wchar_t *token = LIBC_NAMESPACE::wcstok(src, L":", &buf);
+ ASSERT_TRUE(token[0] == L'1');
+ ASSERT_TRUE(token[1] == L'2');
+ ASSERT_TRUE(token[2] == L'3');
+ ASSERT_TRUE(token[3] == L'4');
+ ASSERT_TRUE(token[4] == L'\0');
+}
+
+TEST(LlvmLibcStrTokTest, MultipleDelimiters) {
+ wchar_t src[] = L"12,.34";
+ wchar_t *buf;
+ wchar_t *token;
+
+ token = LIBC_NAMESPACE::wcstok(src, L".", &buf);
+ ASSERT_TRUE(token[0] == L'1');
+ ASSERT_TRUE(token[1] == L'2');
+ ASSERT_TRUE(token[2] == L',');
+ ASSERT_TRUE(token[3] == L'\0');
+
+ token = LIBC_NAMESPACE::wcstok(src, L".,", &buf);
+ ASSERT_TRUE(token[0] == L'1');
+ ASSERT_TRUE(token[1] == L'2');
+ ASSERT_TRUE(token[2] == L'\0');
+
+ token = LIBC_NAMESPACE::wcstok(src, L",.", &buf);
+ ASSERT_TRUE(token[0] == L'1');
+ ASSERT_TRUE(token[1] == L'2');
+ ASSERT_TRUE(token[2] == L'\0');
+
+ token = LIBC_NAMESPACE::wcstok(src, L":,.", &buf);
+ ASSERT_TRUE(token[0] == L'1');
+ ASSERT_TRUE(token[1] == L'2');
+ ASSERT_TRUE(token[2] == L'\0');
+}
+
+TEST(LlvmLibcStrTokTest, ShouldNotGoPastNullTerminator) {
+ wchar_t src[] = {L'1', L'2', L'\0', L',', L'3'};
+ wchar_t *buf;
+ wchar_t *token = LIBC_NAMESPACE::wcstok(src, L",", &buf);
+ ASSERT_TRUE(token[0] == L'1');
+ ASSERT_TRUE(token[1] == L'2');
+ ASSERT_TRUE(token[2] == L'\0');
+}
+
+TEST(LlvmLibcStrTokTest, SubsequentCallsShouldFindFollowingDelimiters) {
+ wchar_t src[] = L"12,34.56";
+ wchar_t *buf;
+ wchar_t *token = LIBC_NAMESPACE::wcstok(src, L",.", &buf);
+ ASSERT_TRUE(token[0] == L'1');
+ ASSERT_TRUE(token[1] == L'2');
+ ASSERT_TRUE(token[2] == L'\0');
+
+ token = LIBC_NAMESPACE::wcstok(nullptr, L",.", &buf);
+ ASSERT_TRUE(token[0] == L'3');
+ ASSERT_TRUE(token[1] == L'4');
+ ASSERT_TRUE(token[2] == L'\0');
+
+ token = LIBC_NAMESPACE::wcstok(nullptr, L",.", &buf);
+ ASSERT_TRUE(token[0] == L'5');
+ ASSERT_TRUE(token[1] == L'6');
+ ASSERT_TRUE(token[2] == L'\0');
+
+ token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &buf);
+ ASSERT_EQ(token, nullptr);
+ // Subsequent calls after hitting the end of the string should also return
+ // nullptr.
+ token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &buf);
+ ASSERT_EQ(token, nullptr);
+}
+
+TEST(LlvmLibcStrTokTest, DelimitersShouldNotBeIncludedInToken) {
+ wchar_t *buf;
+ wchar_t src[] = L"__ab__:_cd__:__ef__:__";
+ wchar_t *token = LIBC_NAMESPACE::wcstok(src, L"_:", &buf);
+ ASSERT_TRUE(token[0] == L'a');
+ ASSERT_TRUE(token[1] == L'b');
+ ASSERT_TRUE(token[2] == L'\0');
+
+ token = LIBC_NAMESPACE::wcstok(nullptr, L":_", &buf);
+ ASSERT_TRUE(token[0] == L'c');
+ ASSERT_TRUE(token[1] == L'd');
+ ASSERT_TRUE(token[2] == L'\0');
+
+ token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,", &buf);
+ ASSERT_TRUE(token[0] == L'e');
+ ASSERT_TRUE(token[1] == L'f');
+ ASSERT_TRUE(token[2] == L'\0');
+
+ token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &buf);
+ ASSERT_EQ(token, nullptr);
+}
>From a562d1c5c9ddb8b632b96c780134e05eced6877d Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Fri, 27 Jun 2025 16:27:39 +0000
Subject: [PATCH 4/5] refactored and replaced tests to mimic strtok_r
---
libc/src/wchar/wcstok.cpp | 52 +++----
libc/src/wchar/wcstok.h | 2 +-
libc/test/src/wchar/wcstok_test.cpp | 204 ++++++++++++++++------------
3 files changed, 149 insertions(+), 109 deletions(-)
diff --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp
index a02332ac1fbbd..b34f6613b732f 100644
--- a/libc/src/wchar/wcstok.cpp
+++ b/libc/src/wchar/wcstok.cpp
@@ -13,34 +13,38 @@
namespace LIBC_NAMESPACE_DECL {
+bool isADelimeter(wchar_t wc, const wchar_t *delimiters) {
+ for (const wchar_t *delim_ptr = delimiters; *delim_ptr != L'\0'; delim_ptr++)
+ if (wc == *delim_ptr)
+ return true;
+ return false;
+}
+
LLVM_LIBC_FUNCTION(wchar_t *, wcstok,
(wchar_t *__restrict str, const wchar_t *__restrict delim,
- wchar_t **__restrict ptr)) {
- if (str == nullptr)
- str = *ptr;
-
- bool foundTokenStart = false;
- wchar_t *out = nullptr;
- wchar_t *str_ptr;
- for (str_ptr = str; *str_ptr != L'\0'; str_ptr++) {
- bool inDelim = false;
- for (const wchar_t *delim_ptr = delim; *delim_ptr != L'\0' && !inDelim;
- delim_ptr++)
- if (*str_ptr == *delim_ptr)
- inDelim = true;
-
- if (!inDelim && !foundTokenStart) {
- foundTokenStart = true;
- out = str_ptr;
- } else if (inDelim && foundTokenStart) {
- *str_ptr = L'\0';
- *ptr = str_ptr + 1;
- return out;
- }
+ wchar_t **__restrict context)) {
+ if (str == nullptr) {
+ if (*context == nullptr)
+ return nullptr;
+
+ str = *context;
}
- *ptr = str_ptr;
- return out;
+ wchar_t *tok_start, *tok_end;
+ for (tok_start = str; *tok_start != L'\0' && isADelimeter(*tok_start, delim);
+ tok_start++)
+ ;
+
+ for (tok_end = tok_start; *tok_end != L'\0' && !isADelimeter(*tok_end, delim);
+ tok_end++)
+ ;
+
+ if (*tok_end != L'\0') {
+ *tok_end = L'\0';
+ tok_end++;
+ }
+ *context = tok_end;
+ return *tok_start == L'\0' ? nullptr : tok_start;
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcstok.h b/libc/src/wchar/wcstok.h
index 9f41ea37a947a..5e673ff4e89b9 100644
--- a/libc/src/wchar/wcstok.h
+++ b/libc/src/wchar/wcstok.h
@@ -15,7 +15,7 @@
namespace LIBC_NAMESPACE_DECL {
wchar_t *wcstok(wchar_t *__restrict str, const wchar_t *__restrict delim,
- wchar_t **__restrict ptr);
+ wchar_t **__restrict context);
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/src/wchar/wcstok_test.cpp b/libc/test/src/wchar/wcstok_test.cpp
index 79153ccc0adad..02b5b49d55ce3 100644
--- a/libc/test/src/wchar/wcstok_test.cpp
+++ b/libc/test/src/wchar/wcstok_test.cpp
@@ -11,135 +11,171 @@
#include "src/wchar/wcstok.h"
#include "test/UnitTest/Test.h"
-TEST(LlvmLibcStrTokTest, NoTokenFound) {
- wchar_t empty[] = L"";
- wchar_t *buf;
- ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"", &buf), nullptr);
- ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"_", &buf), nullptr);
-
- wchar_t single[] = L"_";
- wchar_t *token = LIBC_NAMESPACE::wcstok(single, L"", &buf);
- ASSERT_TRUE(token[0] == L'_');
- ASSERT_TRUE(token[1] == L'\0');
-
- wchar_t multiple[] = L"1,2";
- token = LIBC_NAMESPACE::wcstok(multiple, L":", &buf);
- ASSERT_TRUE(multiple[0] == L'1');
- ASSERT_TRUE(multiple[1] == L',');
- ASSERT_TRUE(multiple[2] == L'2');
- ASSERT_TRUE(multiple[3] == L'\0');
+TEST(LlvmLibcWCSTokReentrantTest, NoTokenFound) {
+ { // Empty source and delimiter string.
+ wchar_t empty[] = L"";
+ wchar_t *reserve = nullptr;
+ ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"", &reserve), nullptr);
+ // Another call to ensure that 'reserve' is not in a bad state.
+ ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"", &reserve), nullptr);
+ ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"", &reserve), nullptr);
+ }
+ { // Empty source and single character delimiter string.
+ wchar_t empty[] = L"";
+ wchar_t *reserve = nullptr;
+ ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"_", &reserve), nullptr);
+ // Another call to ensure that 'reserve' is not in a bad state.
+ ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"_", &reserve), nullptr);
+ ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr);
+ }
+ { // Same wchar_tacter source and delimiter string.
+ wchar_t single[] = L"_";
+ wchar_t *reserve = nullptr;
+ ASSERT_EQ(LIBC_NAMESPACE::wcstok(single, L"_", &reserve), nullptr);
+ // Another call to ensure that 'reserve' is not in a bad state.
+ ASSERT_EQ(LIBC_NAMESPACE::wcstok(single, L"_", &reserve), nullptr);
+ ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr);
+ }
+ { // Multiple wchar_tacter source and single wchar_tacter delimiter string.
+ wchar_t multiple[] = L"1,2";
+ wchar_t *reserve = nullptr;
+ wchar_t *tok = LIBC_NAMESPACE::wcstok(multiple, L":", &reserve);
+ ASSERT_TRUE(tok[0] == L'1');
+ ASSERT_TRUE(tok[1] == L',');
+ ASSERT_TRUE(tok[2] == L'2');
+ ASSERT_TRUE(tok[3] == L'\0');
+ // Another call to ensure that 'reserve' is not in a bad state.
+ tok = LIBC_NAMESPACE::wcstok(multiple, L":", &reserve);
+ ASSERT_TRUE(tok[0] == L'1');
+ ASSERT_TRUE(tok[1] == L',');
+ ASSERT_TRUE(tok[2] == L'2');
+ ASSERT_TRUE(tok[3] == L'\0');
+ ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L":", &reserve), nullptr);
+ }
}
-TEST(LlvmLibcStrTokTest, DelimiterAsFirstCharacterShouldBeIgnored) {
- wchar_t *buf;
+TEST(LlvmLibcWCSTokReentrantTest, DelimiterAsFirstCharacterShouldBeIgnored) {
wchar_t src[] = L".123";
- wchar_t *token = LIBC_NAMESPACE::wcstok(src, L".", &buf);
- ASSERT_TRUE(token[0] == L'1');
- ASSERT_TRUE(token[1] == L'2');
- ASSERT_TRUE(token[2] == L'3');
- ASSERT_TRUE(token[3] == L'\0');
+ wchar_t *reserve = nullptr;
+ wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L".", &reserve);
+ ASSERT_TRUE(tok[0] == L'1');
+ ASSERT_TRUE(tok[1] == L'2');
+ ASSERT_TRUE(tok[2] == L'3');
+ ASSERT_TRUE(tok[3] == L'\0');
+ // Another call to ensure that 'reserve' is not in a bad state.
+ tok = LIBC_NAMESPACE::wcstok(src, L".", &reserve);
+ ASSERT_TRUE(tok[0] == L'1');
+ ASSERT_TRUE(tok[1] == L'2');
+ ASSERT_TRUE(tok[2] == L'3');
+ ASSERT_TRUE(tok[3] == L'\0');
+ ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L".", &reserve), nullptr);
}
-TEST(LlvmLibcStrTokTest, DelimiterIsMiddleCharacter) {
+TEST(LlvmLibcWCSTokReentrantTest, DelimiterIsMiddleCharacter) {
wchar_t src[] = L"12,34";
- wchar_t *buf;
- wchar_t *token = LIBC_NAMESPACE::wcstok(src, L",", &buf);
- ASSERT_TRUE(token[0] == L'1');
- ASSERT_TRUE(token[1] == L'2');
- ASSERT_TRUE(token[2] == L'\0');
+ wchar_t *reserve = nullptr;
+ wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve);
+ ASSERT_TRUE(tok[0] == L'1');
+ ASSERT_TRUE(tok[1] == L'2');
+ ASSERT_TRUE(tok[2] == L'\0');
+ // Another call to ensure that 'reserve' is not in a bad state.
+ tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve);
+ ASSERT_TRUE(tok[0] == L'1');
+ ASSERT_TRUE(tok[1] == L'2');
+ ASSERT_TRUE(tok[2] == L'\0');
+ ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L",", &reserve), nullptr);
}
-TEST(LlvmLibcStrTokTest, DelimiterAsLastCharacterShouldBeIgnored) {
+TEST(LlvmLibcWCSTokReentrantTest, DelimiterAsLastCharacterShouldBeIgnored) {
wchar_t src[] = L"1234:";
- wchar_t *buf;
- wchar_t *token = LIBC_NAMESPACE::wcstok(src, L":", &buf);
- ASSERT_TRUE(token[0] == L'1');
- ASSERT_TRUE(token[1] == L'2');
- ASSERT_TRUE(token[2] == L'3');
- ASSERT_TRUE(token[3] == L'4');
- ASSERT_TRUE(token[4] == L'\0');
+ wchar_t *reserve = nullptr;
+ wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L":", &reserve);
+ ASSERT_TRUE(tok[0] == L'1');
+ ASSERT_TRUE(tok[1] == L'2');
+ ASSERT_TRUE(tok[2] == L'3');
+ ASSERT_TRUE(tok[3] == L'4');
+ ASSERT_TRUE(tok[4] == L'\0');
+ // Another call to ensure that 'reserve' is not in a bad state.
+ tok = LIBC_NAMESPACE::wcstok(src, L":", &reserve);
+ ASSERT_TRUE(tok[0] == L'1');
+ ASSERT_TRUE(tok[1] == L'2');
+ ASSERT_TRUE(tok[2] == L'3');
+ ASSERT_TRUE(tok[3] == L'4');
+ ASSERT_TRUE(tok[4] == L'\0');
+ ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L":", &reserve), nullptr);
}
-TEST(LlvmLibcStrTokTest, MultipleDelimiters) {
- wchar_t src[] = L"12,.34";
- wchar_t *buf;
- wchar_t *token;
-
- token = LIBC_NAMESPACE::wcstok(src, L".", &buf);
- ASSERT_TRUE(token[0] == L'1');
- ASSERT_TRUE(token[1] == L'2');
- ASSERT_TRUE(token[2] == L',');
- ASSERT_TRUE(token[3] == L'\0');
-
- token = LIBC_NAMESPACE::wcstok(src, L".,", &buf);
- ASSERT_TRUE(token[0] == L'1');
- ASSERT_TRUE(token[1] == L'2');
- ASSERT_TRUE(token[2] == L'\0');
-
- token = LIBC_NAMESPACE::wcstok(src, L",.", &buf);
- ASSERT_TRUE(token[0] == L'1');
- ASSERT_TRUE(token[1] == L'2');
- ASSERT_TRUE(token[2] == L'\0');
-
- token = LIBC_NAMESPACE::wcstok(src, L":,.", &buf);
- ASSERT_TRUE(token[0] == L'1');
- ASSERT_TRUE(token[1] == L'2');
- ASSERT_TRUE(token[2] == L'\0');
+TEST(LlvmLibcWCSTokReentrantTest, ShouldNotGoPastNullTerminator) {
+ wchar_t src[] = {L'1', L'2', L'\0', L',', L'3'};
+ wchar_t *reserve = nullptr;
+ wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve);
+ ASSERT_TRUE(tok[0] == L'1');
+ ASSERT_TRUE(tok[1] == L'2');
+ ASSERT_TRUE(tok[2] == L'\0');
+ // Another call to ensure that 'reserve' is not in a bad state.
+ tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve);
+ ASSERT_TRUE(tok[0] == L'1');
+ ASSERT_TRUE(tok[1] == L'2');
+ ASSERT_TRUE(tok[2] == L'\0');
+ ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L",", &reserve), nullptr);
}
-TEST(LlvmLibcStrTokTest, ShouldNotGoPastNullTerminator) {
- wchar_t src[] = {L'1', L'2', L'\0', L',', L'3'};
- wchar_t *buf;
- wchar_t *token = LIBC_NAMESPACE::wcstok(src, L",", &buf);
- ASSERT_TRUE(token[0] == L'1');
- ASSERT_TRUE(token[1] == L'2');
- ASSERT_TRUE(token[2] == L'\0');
+TEST(LlvmLibcWCSTokReentrantTest,
+ ShouldReturnNullptrWhenBothSrcAndSaveptrAreNull) {
+ wchar_t *src = nullptr;
+ wchar_t *reserve = nullptr;
+ // Ensure that instead of crashing if src and reserve are null, nullptr is
+ // returned
+ ASSERT_EQ(LIBC_NAMESPACE::wcstok(src, L",", &reserve), nullptr);
+ // And that neither src nor reserve are changed when that happens
+ ASSERT_EQ(src, nullptr);
+ ASSERT_EQ(reserve, nullptr);
}
-TEST(LlvmLibcStrTokTest, SubsequentCallsShouldFindFollowingDelimiters) {
+TEST(LlvmLibcWCSTokReentrantTest,
+ SubsequentCallsShouldFindFollowingDelimiters) {
wchar_t src[] = L"12,34.56";
- wchar_t *buf;
- wchar_t *token = LIBC_NAMESPACE::wcstok(src, L",.", &buf);
+ wchar_t *reserve = nullptr;
+ wchar_t *token = LIBC_NAMESPACE::wcstok(src, L",.", &reserve);
ASSERT_TRUE(token[0] == L'1');
ASSERT_TRUE(token[1] == L'2');
ASSERT_TRUE(token[2] == L'\0');
- token = LIBC_NAMESPACE::wcstok(nullptr, L",.", &buf);
+ token = LIBC_NAMESPACE::wcstok(nullptr, L",.", &reserve);
ASSERT_TRUE(token[0] == L'3');
ASSERT_TRUE(token[1] == L'4');
ASSERT_TRUE(token[2] == L'\0');
- token = LIBC_NAMESPACE::wcstok(nullptr, L",.", &buf);
+ token = LIBC_NAMESPACE::wcstok(nullptr, L",.", &reserve);
ASSERT_TRUE(token[0] == L'5');
ASSERT_TRUE(token[1] == L'6');
ASSERT_TRUE(token[2] == L'\0');
-
- token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &buf);
+ token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &reserve);
ASSERT_EQ(token, nullptr);
// Subsequent calls after hitting the end of the string should also return
// nullptr.
- token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &buf);
+ token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &reserve);
ASSERT_EQ(token, nullptr);
}
-TEST(LlvmLibcStrTokTest, DelimitersShouldNotBeIncludedInToken) {
- wchar_t *buf;
+TEST(LlvmLibcWCSTokReentrantTest, DelimitersShouldNotBeIncludedInToken) {
wchar_t src[] = L"__ab__:_cd__:__ef__:__";
- wchar_t *token = LIBC_NAMESPACE::wcstok(src, L"_:", &buf);
+ wchar_t *reserve = nullptr;
+ wchar_t *token = LIBC_NAMESPACE::wcstok(src, L"_:", &reserve);
ASSERT_TRUE(token[0] == L'a');
ASSERT_TRUE(token[1] == L'b');
ASSERT_TRUE(token[2] == L'\0');
- token = LIBC_NAMESPACE::wcstok(nullptr, L":_", &buf);
+ token = LIBC_NAMESPACE::wcstok(nullptr, L":_", &reserve);
ASSERT_TRUE(token[0] == L'c');
ASSERT_TRUE(token[1] == L'd');
ASSERT_TRUE(token[2] == L'\0');
- token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,", &buf);
+ token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,", &reserve);
ASSERT_TRUE(token[0] == L'e');
ASSERT_TRUE(token[1] == L'f');
ASSERT_TRUE(token[2] == L'\0');
- token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &buf);
+ token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &reserve);
ASSERT_EQ(token, nullptr);
}
>From 15f0166593dbe7193af95e675b7c7950285cb2aa Mon Sep 17 00:00:00 2001
From: Uzair Nawaz <uzairnawaz at google.com>
Date: Mon, 30 Jun 2025 17:06:18 +0000
Subject: [PATCH 5/5] formatting
---
libc/src/wchar/wcstok.cpp | 8 ++++----
libc/test/src/wchar/wcstok_test.cpp | 4 ++--
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp
index b34f6613b732f..291efc15e158a 100644
--- a/libc/src/wchar/wcstok.cpp
+++ b/libc/src/wchar/wcstok.cpp
@@ -14,7 +14,7 @@
namespace LIBC_NAMESPACE_DECL {
bool isADelimeter(wchar_t wc, const wchar_t *delimiters) {
- for (const wchar_t *delim_ptr = delimiters; *delim_ptr != L'\0'; delim_ptr++)
+ for (const wchar_t *delim_ptr = delimiters; *delim_ptr != L'\0'; ++delim_ptr)
if (wc == *delim_ptr)
return true;
return false;
@@ -32,16 +32,16 @@ LLVM_LIBC_FUNCTION(wchar_t *, wcstok,
wchar_t *tok_start, *tok_end;
for (tok_start = str; *tok_start != L'\0' && isADelimeter(*tok_start, delim);
- tok_start++)
+ ++tok_start)
;
for (tok_end = tok_start; *tok_end != L'\0' && !isADelimeter(*tok_end, delim);
- tok_end++)
+ ++tok_end)
;
if (*tok_end != L'\0') {
*tok_end = L'\0';
- tok_end++;
+ ++tok_end;
}
*context = tok_end;
return *tok_start == L'\0' ? nullptr : tok_start;
diff --git a/libc/test/src/wchar/wcstok_test.cpp b/libc/test/src/wchar/wcstok_test.cpp
index 02b5b49d55ce3..7106e9f2fab5e 100644
--- a/libc/test/src/wchar/wcstok_test.cpp
+++ b/libc/test/src/wchar/wcstok_test.cpp
@@ -28,7 +28,7 @@ TEST(LlvmLibcWCSTokReentrantTest, NoTokenFound) {
ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"_", &reserve), nullptr);
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr);
}
- { // Same wchar_tacter source and delimiter string.
+ { // Same character source and delimiter string.
wchar_t single[] = L"_";
wchar_t *reserve = nullptr;
ASSERT_EQ(LIBC_NAMESPACE::wcstok(single, L"_", &reserve), nullptr);
@@ -36,7 +36,7 @@ TEST(LlvmLibcWCSTokReentrantTest, NoTokenFound) {
ASSERT_EQ(LIBC_NAMESPACE::wcstok(single, L"_", &reserve), nullptr);
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr);
}
- { // Multiple wchar_tacter source and single wchar_tacter delimiter string.
+ { // Multiple character source and single character delimiter string.
wchar_t multiple[] = L"1,2";
wchar_t *reserve = nullptr;
wchar_t *tok = LIBC_NAMESPACE::wcstok(multiple, L":", &reserve);
More information about the libc-commits
mailing list