[libc-commits] [libc] 7a33b70 - [libc] wcstok implementation (#145989)

via libc-commits libc-commits at lists.llvm.org
Mon Jun 30 10:41:03 PDT 2025


Author: Uzair Nawaz
Date: 2025-06-30T10:41:00-07:00
New Revision: 7a33b709b1e2bd3032593b259a741905dce874f8

URL: https://github.com/llvm/llvm-project/commit/7a33b709b1e2bd3032593b259a741905dce874f8
DIFF: https://github.com/llvm/llvm-project/commit/7a33b709b1e2bd3032593b259a741905dce874f8.diff

LOG: [libc] wcstok implementation (#145989)

Implemented wcstok and added tests

Added: 
    libc/src/wchar/wcstok.cpp
    libc/src/wchar/wcstok.h
    libc/test/src/wchar/wcstok_test.cpp

Modified: 
    libc/config/linux/x86_64/entrypoints.txt
    libc/include/wchar.yaml
    libc/src/wchar/CMakeLists.txt
    libc/test/src/wchar/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 7a954a480e698..263669803c294 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -387,6 +387,7 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.wchar.wmemchr
     libc.src.wchar.wcpcpy
     libc.src.wchar.wcpncpy
+    libc.src.wchar.wcstok
 
     # sys/uio.h entrypoints
     libc.src.sys.uio.writev

diff  --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index 9e862ff984494..0430a33343ff3 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -196,6 +196,14 @@ functions:
     arguments:
       - type: wchar_t *__restrict
       - type: const wchar_t *__restrict
+  - name: wcstok
+    standards:
+      - stdc
+    return_type: wchar_t *
+    arguments:
+      - type: wchar_t *__restrict
+      - type: const wchar_t *__restrict
+      - type: wchar_t** __restrict
   - name: wcpcpy
     standards:
       - stdc

diff  --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index 867aab6755bf6..833735d716399 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -45,6 +45,16 @@ add_entrypoint_object(
     libc.src.__support.wctype_utils
 )
 
+add_entrypoint_object(
+  wcstok
+  SRCS
+    wcstok.cpp
+  HDRS
+    wcstok.h
+  DEPENDS
+    libc.hdr.types.wchar_t
+)
+
 add_entrypoint_object(
   wcrtomb
   SRCS

diff  --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp
new file mode 100644
index 0000000000000..291efc15e158a
--- /dev/null
+++ b/libc/src/wchar/wcstok.cpp
@@ -0,0 +1,50 @@
+//===-- Implementation of wcstok ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcstok.h"
+
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+bool isADelimeter(wchar_t wc, const wchar_t *delimiters) {
+  for (const wchar_t *delim_ptr = delimiters; *delim_ptr != L'\0'; ++delim_ptr)
+    if (wc == *delim_ptr)
+      return true;
+  return false;
+}
+
+LLVM_LIBC_FUNCTION(wchar_t *, wcstok,
+                   (wchar_t *__restrict str, const wchar_t *__restrict delim,
+                    wchar_t **__restrict context)) {
+  if (str == nullptr) {
+    if (*context == nullptr)
+      return nullptr;
+
+    str = *context;
+  }
+
+  wchar_t *tok_start, *tok_end;
+  for (tok_start = str; *tok_start != L'\0' && isADelimeter(*tok_start, delim);
+       ++tok_start)
+    ;
+
+  for (tok_end = tok_start; *tok_end != L'\0' && !isADelimeter(*tok_end, delim);
+       ++tok_end)
+    ;
+
+  if (*tok_end != L'\0') {
+    *tok_end = L'\0';
+    ++tok_end;
+  }
+  *context = tok_end;
+  return *tok_start == L'\0' ? nullptr : tok_start;
+}
+
+} // namespace LIBC_NAMESPACE_DECL

diff  --git a/libc/src/wchar/wcstok.h b/libc/src/wchar/wcstok.h
new file mode 100644
index 0000000000000..5e673ff4e89b9
--- /dev/null
+++ b/libc/src/wchar/wcstok.h
@@ -0,0 +1,22 @@
+//===-- Implementation header for wcstok ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_WCSTOK_H
+#define LLVM_LIBC_SRC_WCHAR_WCSTOK_H
+
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+wchar_t *wcstok(wchar_t *__restrict str, const wchar_t *__restrict delim,
+                wchar_t **__restrict context);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_WCSTOK_H

diff  --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index 02949c68d81dd..60f2c2e0f1667 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -123,6 +123,16 @@ add_libc_test(
     libc.src.wchar.wcschr
 )
 
+add_libc_test(
+  wcstok_test
+  SUITE
+    libc_wchar_unittests
+  SRCS
+    wcstok_test.cpp
+  DEPENDS
+    libc.src.wchar.wcstok
+)
+
 add_libc_test(
   wcsncmp_test
   SUITE

diff  --git a/libc/test/src/wchar/wcstok_test.cpp b/libc/test/src/wchar/wcstok_test.cpp
new file mode 100644
index 0000000000000..7106e9f2fab5e
--- /dev/null
+++ b/libc/test/src/wchar/wcstok_test.cpp
@@ -0,0 +1,181 @@
+//===-- Unittests for wcstok ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/wchar/wcstok.h"
+#include "test/UnitTest/Test.h"
+
+TEST(LlvmLibcWCSTokReentrantTest, NoTokenFound) {
+  { // Empty source and delimiter string.
+    wchar_t empty[] = L"";
+    wchar_t *reserve = nullptr;
+    ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"", &reserve), nullptr);
+    // Another call to ensure that 'reserve' is not in a bad state.
+    ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"", &reserve), nullptr);
+    ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"", &reserve), nullptr);
+  }
+  { // Empty source and single character delimiter string.
+    wchar_t empty[] = L"";
+    wchar_t *reserve = nullptr;
+    ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"_", &reserve), nullptr);
+    // Another call to ensure that 'reserve' is not in a bad state.
+    ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"_", &reserve), nullptr);
+    ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr);
+  }
+  { // Same character source and delimiter string.
+    wchar_t single[] = L"_";
+    wchar_t *reserve = nullptr;
+    ASSERT_EQ(LIBC_NAMESPACE::wcstok(single, L"_", &reserve), nullptr);
+    // Another call to ensure that 'reserve' is not in a bad state.
+    ASSERT_EQ(LIBC_NAMESPACE::wcstok(single, L"_", &reserve), nullptr);
+    ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr);
+  }
+  { // Multiple character source and single character delimiter string.
+    wchar_t multiple[] = L"1,2";
+    wchar_t *reserve = nullptr;
+    wchar_t *tok = LIBC_NAMESPACE::wcstok(multiple, L":", &reserve);
+    ASSERT_TRUE(tok[0] == L'1');
+    ASSERT_TRUE(tok[1] == L',');
+    ASSERT_TRUE(tok[2] == L'2');
+    ASSERT_TRUE(tok[3] == L'\0');
+    // Another call to ensure that 'reserve' is not in a bad state.
+    tok = LIBC_NAMESPACE::wcstok(multiple, L":", &reserve);
+    ASSERT_TRUE(tok[0] == L'1');
+    ASSERT_TRUE(tok[1] == L',');
+    ASSERT_TRUE(tok[2] == L'2');
+    ASSERT_TRUE(tok[3] == L'\0');
+    ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L":", &reserve), nullptr);
+  }
+}
+
+TEST(LlvmLibcWCSTokReentrantTest, DelimiterAsFirstCharacterShouldBeIgnored) {
+  wchar_t src[] = L".123";
+  wchar_t *reserve = nullptr;
+  wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L".", &reserve);
+  ASSERT_TRUE(tok[0] == L'1');
+  ASSERT_TRUE(tok[1] == L'2');
+  ASSERT_TRUE(tok[2] == L'3');
+  ASSERT_TRUE(tok[3] == L'\0');
+  // Another call to ensure that 'reserve' is not in a bad state.
+  tok = LIBC_NAMESPACE::wcstok(src, L".", &reserve);
+  ASSERT_TRUE(tok[0] == L'1');
+  ASSERT_TRUE(tok[1] == L'2');
+  ASSERT_TRUE(tok[2] == L'3');
+  ASSERT_TRUE(tok[3] == L'\0');
+  ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L".", &reserve), nullptr);
+}
+
+TEST(LlvmLibcWCSTokReentrantTest, DelimiterIsMiddleCharacter) {
+  wchar_t src[] = L"12,34";
+  wchar_t *reserve = nullptr;
+  wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve);
+  ASSERT_TRUE(tok[0] == L'1');
+  ASSERT_TRUE(tok[1] == L'2');
+  ASSERT_TRUE(tok[2] == L'\0');
+  // Another call to ensure that 'reserve' is not in a bad state.
+  tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve);
+  ASSERT_TRUE(tok[0] == L'1');
+  ASSERT_TRUE(tok[1] == L'2');
+  ASSERT_TRUE(tok[2] == L'\0');
+  ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L",", &reserve), nullptr);
+}
+
+TEST(LlvmLibcWCSTokReentrantTest, DelimiterAsLastCharacterShouldBeIgnored) {
+  wchar_t src[] = L"1234:";
+  wchar_t *reserve = nullptr;
+  wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L":", &reserve);
+  ASSERT_TRUE(tok[0] == L'1');
+  ASSERT_TRUE(tok[1] == L'2');
+  ASSERT_TRUE(tok[2] == L'3');
+  ASSERT_TRUE(tok[3] == L'4');
+  ASSERT_TRUE(tok[4] == L'\0');
+  // Another call to ensure that 'reserve' is not in a bad state.
+  tok = LIBC_NAMESPACE::wcstok(src, L":", &reserve);
+  ASSERT_TRUE(tok[0] == L'1');
+  ASSERT_TRUE(tok[1] == L'2');
+  ASSERT_TRUE(tok[2] == L'3');
+  ASSERT_TRUE(tok[3] == L'4');
+  ASSERT_TRUE(tok[4] == L'\0');
+  ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L":", &reserve), nullptr);
+}
+
+TEST(LlvmLibcWCSTokReentrantTest, ShouldNotGoPastNullTerminator) {
+  wchar_t src[] = {L'1', L'2', L'\0', L',', L'3'};
+  wchar_t *reserve = nullptr;
+  wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve);
+  ASSERT_TRUE(tok[0] == L'1');
+  ASSERT_TRUE(tok[1] == L'2');
+  ASSERT_TRUE(tok[2] == L'\0');
+  // Another call to ensure that 'reserve' is not in a bad state.
+  tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve);
+  ASSERT_TRUE(tok[0] == L'1');
+  ASSERT_TRUE(tok[1] == L'2');
+  ASSERT_TRUE(tok[2] == L'\0');
+  ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L",", &reserve), nullptr);
+}
+
+TEST(LlvmLibcWCSTokReentrantTest,
+     ShouldReturnNullptrWhenBothSrcAndSaveptrAreNull) {
+  wchar_t *src = nullptr;
+  wchar_t *reserve = nullptr;
+  // Ensure that instead of crashing if src and reserve are null, nullptr is
+  // returned
+  ASSERT_EQ(LIBC_NAMESPACE::wcstok(src, L",", &reserve), nullptr);
+  // And that neither src nor reserve are changed when that happens
+  ASSERT_EQ(src, nullptr);
+  ASSERT_EQ(reserve, nullptr);
+}
+
+TEST(LlvmLibcWCSTokReentrantTest,
+     SubsequentCallsShouldFindFollowingDelimiters) {
+  wchar_t src[] = L"12,34.56";
+  wchar_t *reserve = nullptr;
+  wchar_t *token = LIBC_NAMESPACE::wcstok(src, L",.", &reserve);
+  ASSERT_TRUE(token[0] == L'1');
+  ASSERT_TRUE(token[1] == L'2');
+  ASSERT_TRUE(token[2] == L'\0');
+
+  token = LIBC_NAMESPACE::wcstok(nullptr, L",.", &reserve);
+  ASSERT_TRUE(token[0] == L'3');
+  ASSERT_TRUE(token[1] == L'4');
+  ASSERT_TRUE(token[2] == L'\0');
+
+  token = LIBC_NAMESPACE::wcstok(nullptr, L",.", &reserve);
+  ASSERT_TRUE(token[0] == L'5');
+  ASSERT_TRUE(token[1] == L'6');
+  ASSERT_TRUE(token[2] == L'\0');
+  token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &reserve);
+  ASSERT_EQ(token, nullptr);
+  // Subsequent calls after hitting the end of the string should also return
+  // nullptr.
+  token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &reserve);
+  ASSERT_EQ(token, nullptr);
+}
+
+TEST(LlvmLibcWCSTokReentrantTest, DelimitersShouldNotBeIncludedInToken) {
+  wchar_t src[] = L"__ab__:_cd__:__ef__:__";
+  wchar_t *reserve = nullptr;
+  wchar_t *token = LIBC_NAMESPACE::wcstok(src, L"_:", &reserve);
+  ASSERT_TRUE(token[0] == L'a');
+  ASSERT_TRUE(token[1] == L'b');
+  ASSERT_TRUE(token[2] == L'\0');
+
+  token = LIBC_NAMESPACE::wcstok(nullptr, L":_", &reserve);
+  ASSERT_TRUE(token[0] == L'c');
+  ASSERT_TRUE(token[1] == L'd');
+  ASSERT_TRUE(token[2] == L'\0');
+
+  token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,", &reserve);
+  ASSERT_TRUE(token[0] == L'e');
+  ASSERT_TRUE(token[1] == L'f');
+  ASSERT_TRUE(token[2] == L'\0');
+
+  token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &reserve);
+  ASSERT_EQ(token, nullptr);
+}


        


More information about the libc-commits mailing list