[libc-commits] [libc] 667843c - [libc] Add strtok implementation.

via libc-commits libc-commits at lists.llvm.org
Tue Aug 11 10:45:21 PDT 2020


Author: cgyurgyik
Date: 2020-08-11T13:44:53-04:00
New Revision: 667843cc07c934b7fc20de0cc86323ae5fde48ce

URL: https://github.com/llvm/llvm-project/commit/667843cc07c934b7fc20de0cc86323ae5fde48ce
DIFF: https://github.com/llvm/llvm-project/commit/667843cc07c934b7fc20de0cc86323ae5fde48ce.diff

LOG: [libc] Add strtok implementation.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D85615

Added: 
    libc/src/string/strtok.cpp
    libc/src/string/strtok.h
    libc/test/src/string/strtok_test.cpp

Modified: 
    libc/config/linux/aarch64/entrypoints.txt
    libc/config/linux/x86_64/entrypoints.txt
    libc/src/string/CMakeLists.txt
    libc/src/string/string_utils.h
    libc/test/src/string/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index aad7819ef551..2be1d51bd788 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -34,6 +34,7 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.string.strrchr
     libc.src.string.strspn
     libc.src.string.strstr
+    libc.src.string.strtok
 )
 
 set(TARGET_LIBM_ENTRYPOINTS

diff  --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index ca3a2bd6c477..364fdc32c6f8 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -52,6 +52,7 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.string.strrchr
     libc.src.string.strspn
     libc.src.string.strstr
+    libc.src.string.strtok
 
     # sys/mman.h entrypoints
     libc.src.sys.mman.mmap

diff  --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt
index b6a15c3f460b..0193420ac52d 100644
--- a/libc/src/string/CMakeLists.txt
+++ b/libc/src/string/CMakeLists.txt
@@ -132,6 +132,16 @@ add_entrypoint_object(
     .string_utils
 )
 
+add_entrypoint_object(
+  strtok
+  SRCS
+    strtok.cpp
+  HDRS
+    strtok.h
+  DEPENDS
+    libc.utils.CPP.standalone_cpp
+)
+
 # Helper to define a function with multiple implementations
 # - Computes flags to satisfy required/rejected features and arch,
 # - Declares an entry point,

diff  --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index 54c1bac96985..c3b444a505d0 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -9,8 +9,6 @@
 #ifndef LIBC_SRC_STRING_STRING_UTILS_H
 #define LIBC_SRC_STRING_STRING_UTILS_H
 
-#include "src/string/memory_utils/utils.h"
-
 #include "utils/CPP/Bitset.h"
 #include <stddef.h> // size_t
 

diff  --git a/libc/src/string/strtok.cpp b/libc/src/string/strtok.cpp
new file mode 100644
index 000000000000..19d9133c15e7
--- /dev/null
+++ b/libc/src/string/strtok.cpp
@@ -0,0 +1,42 @@
+//===-- Implementation of strtok ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/string/strtok.h"
+
+#include "src/__support/common.h"
+#include "utils/CPP/Bitset.h"
+
+namespace __llvm_libc {
+
+static char *strtok_str = nullptr;
+
+char *LLVM_LIBC_ENTRYPOINT(strtok)(char *src, const char *delimiter_string) {
+  cpp::Bitset<256> delimiter_set;
+  for (; *delimiter_string; ++delimiter_string)
+    delimiter_set.set(*delimiter_string);
+
+  src = src ? src : strtok_str;
+  for (; *src && delimiter_set.test(*src); ++src)
+    ;
+  if (!*src) {
+    strtok_str = src;
+    return nullptr;
+  }
+  char *token = src;
+  for (; *src && !delimiter_set.test(*src); ++src)
+    ;
+
+  strtok_str = src;
+  if (*strtok_str) {
+    *strtok_str = '\0';
+    ++strtok_str;
+  }
+  return token;
+}
+
+} // namespace __llvm_libc

diff  --git a/libc/src/string/strtok.h b/libc/src/string/strtok.h
new file mode 100644
index 000000000000..c16e764d393c
--- /dev/null
+++ b/libc/src/string/strtok.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for strtok ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STRING_STRTOK_H
+#define LLVM_LIBC_SRC_STRING_STRTOK_H
+
+namespace __llvm_libc {
+
+char *strtok(char *src, const char *delimiter_string);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STRING_STRTOK_H

diff  --git a/libc/test/src/string/CMakeLists.txt b/libc/test/src/string/CMakeLists.txt
index f7c580dd1876..8dfffa65175c 100644
--- a/libc/test/src/string/CMakeLists.txt
+++ b/libc/test/src/string/CMakeLists.txt
@@ -132,6 +132,16 @@ add_libc_unittest(
     libc.src.string.strpbrk
 )
 
+add_libc_unittest(
+  strtok_test
+  SUITE
+    libc_string_unittests
+  SRCS
+    strtok_test.cpp
+  DEPENDS
+    libc.src.string.strtok
+)
+
 # Tests all implementations that can run on the host.
 function(add_libc_multi_impl_test name)
   get_property(fq_implementations GLOBAL PROPERTY ${name}_implementations)

diff  --git a/libc/test/src/string/strtok_test.cpp b/libc/test/src/string/strtok_test.cpp
new file mode 100644
index 000000000000..311b9cf28fd4
--- /dev/null
+++ b/libc/test/src/string/strtok_test.cpp
@@ -0,0 +1,78 @@
+//===-- Unittests for strtok ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/string/strtok.h"
+#include "utils/UnitTest/Test.h"
+
+TEST(StrTokTest, NoTokenFound) {
+  char empty[] = "";
+  ASSERT_STREQ(__llvm_libc::strtok(empty, ""), nullptr);
+  ASSERT_STREQ(__llvm_libc::strtok(empty, "_"), nullptr);
+
+  char single[] = "_";
+  ASSERT_STREQ(__llvm_libc::strtok(single, ""), "_");
+
+  char multiple[] = "1,2";
+  ASSERT_STREQ(__llvm_libc::strtok(multiple, ":"), "1,2");
+}
+
+TEST(StrTokTest, DelimiterAsFirstCharacterShouldBeIgnored) {
+  char src[] = ".123";
+  ASSERT_STREQ(__llvm_libc::strtok(src, "."), "123");
+}
+
+TEST(StrTokTest, DelimiterIsMiddleCharacter) {
+  char src[] = "12,34";
+  ASSERT_STREQ(__llvm_libc::strtok(src, ","), "12");
+}
+
+TEST(StrTokTest, DelimiterAsLastCharacterShouldBeIgnored) {
+  char src[] = "1234:";
+  ASSERT_STREQ(__llvm_libc::strtok(src, ":"), "1234");
+}
+
+TEST(StrTokTest, MultipleDelimiters) {
+  char src[] = "12,.34";
+  ASSERT_STREQ(__llvm_libc::strtok(src, "."), "12,");
+  ASSERT_STREQ(__llvm_libc::strtok(src, ".,"), "12");
+  ASSERT_STREQ(__llvm_libc::strtok(src, ",."), "12");
+  ASSERT_STREQ(__llvm_libc::strtok(src, ":,."), "12");
+}
+
+TEST(StrTokTest, ShouldNotGoPastNullTerminator) {
+  char src[] = {'1', '2', '\0', ',', '3'};
+  ASSERT_STREQ(__llvm_libc::strtok(src, ","), "12");
+}
+
+TEST(StrTokTest, SubsequentCallsShouldFindFollowingDelimiters) {
+  char src[] = "12,34.56";
+  char *token = __llvm_libc::strtok(src, ",.");
+  ASSERT_STREQ(token, "12");
+  token = __llvm_libc::strtok(nullptr, ",.");
+  ASSERT_STREQ(token, "34");
+  token = __llvm_libc::strtok(nullptr, ",.");
+  ASSERT_STREQ(token, "56");
+  token = __llvm_libc::strtok(nullptr, "_:,_");
+  ASSERT_STREQ(token, nullptr);
+  // Subsequent calls after hitting the end of the string should also return
+  // nullptr.
+  token = __llvm_libc::strtok(nullptr, "_:,_");
+  ASSERT_STREQ(token, nullptr);
+}
+
+TEST(StrTokTest, DelimitersShouldNotBeIncludedInToken) {
+  char src[] = "__ab__:_cd__:__ef__:__";
+  char *token = __llvm_libc::strtok(src, "_:");
+  ASSERT_STREQ(token, "ab");
+  token = __llvm_libc::strtok(nullptr, ":_");
+  ASSERT_STREQ(token, "cd");
+  token = __llvm_libc::strtok(nullptr, "_:,");
+  ASSERT_STREQ(token, "ef");
+  token = __llvm_libc::strtok(nullptr, "_:,_");
+  ASSERT_STREQ(token, nullptr);
+}


        


More information about the libc-commits mailing list