[libc-commits] [libc] bc45bab - [libc] Add strtok_r implementation.

via libc-commits libc-commits at lists.llvm.org
Thu Aug 13 12:52:51 PDT 2020


Author: parallels
Date: 2020-08-13T15:51:38-04:00
New Revision: bc45bab7eb6d6138442ba0dfab11faa734566ebf

URL: https://github.com/llvm/llvm-project/commit/bc45bab7eb6d6138442ba0dfab11faa734566ebf
DIFF: https://github.com/llvm/llvm-project/commit/bc45bab7eb6d6138442ba0dfab11faa734566ebf.diff

LOG: [libc] Add strtok_r implementation.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D85779

Added: 
    libc/src/string/strtok_r.cpp
    libc/src/string/strtok_r.h
    libc/test/src/string/strtok_r_test.cpp

Modified: 
    libc/config/linux/aarch64/entrypoints.txt
    libc/config/linux/api.td
    libc/config/linux/x86_64/entrypoints.txt
    libc/spec/posix.td
    libc/src/string/CMakeLists.txt
    libc/src/string/string_utils.h
    libc/src/string/strtok.cpp
    libc/test/src/string/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 2be1d51bd788..fe63403ae221 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -35,6 +35,7 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.string.strspn
     libc.src.string.strstr
     libc.src.string.strtok
+    libc.src.string.strtok_r
 )
 
 set(TARGET_LIBM_ENTRYPOINTS

diff  --git a/libc/config/linux/api.td b/libc/config/linux/api.td
index 151924748dc1..6b50c4284ae2 100644
--- a/libc/config/linux/api.td
+++ b/libc/config/linux/api.td
@@ -236,6 +236,7 @@ def StringAPI : PublicAPI<"string.h"> {
     "strspn",
     "strstr",
     "strtok",
+    "strtok_r",
     "strxfrm",   
   ];
 

diff  --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 364fdc32c6f8..35ca8bbeaf68 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -53,6 +53,7 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.string.strspn
     libc.src.string.strstr
     libc.src.string.strtok
+    libc.src.string.strtok_r
 
     # sys/mman.h entrypoints
     libc.src.sys.mman.mmap

diff  --git a/libc/spec/posix.td b/libc/spec/posix.td
index 732b6a6be250..9463169b6a65 100644
--- a/libc/spec/posix.td
+++ b/libc/spec/posix.td
@@ -11,8 +11,12 @@ def RestrictStructSigactionPtr : RestrictedPtrType<StructSigaction>;
 def ConstRestrictStructSigactionPtr : ConstType<RestrictStructSigactionPtr>;
 
 def POSIX : StandardSpec<"POSIX"> {
+  // TODO: Change naming so that they're consistent with other files.
   PtrType CharPtr = PtrType<CharType>;
   ConstType ConstCharPtr = ConstType<CharPtr>;
+  RestrictedPtrType RestrictedCharPtr = RestrictedPtrType<CharType>;
+  ConstType ConstRestrictedCharPtr = ConstType<RestrictedCharPtr>;
+  RestrictedPtrType CharRestrictedDoublePtr = RestrictedPtrType<CharPtr>;
   NamedType OffTType = NamedType<"off_t">;
   NamedType SSizeTType = NamedType<"ssize_t">;
 
@@ -221,6 +225,11 @@ def POSIX : StandardSpec<"POSIX"> {
              RetValSpec<SizeTType>,
              [ArgSpec<ConstCharPtr>, ArgSpec<SizeTType>]
         >,
+        FunctionSpec<
+            "strtok_r",
+            RetValSpec<CharPtr>,
+            [ArgSpec<ConstRestrictedCharPtr>, ArgSpec<CharRestrictedDoublePtr>]
+        >,
     ]
   >;
 

diff  --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt
index 0193420ac52d..8efe8c89e9e7 100644
--- a/libc/src/string/CMakeLists.txt
+++ b/libc/src/string/CMakeLists.txt
@@ -139,7 +139,17 @@ add_entrypoint_object(
   HDRS
     strtok.h
   DEPENDS
-    libc.utils.CPP.standalone_cpp
+    .string_utils
+)
+
+add_entrypoint_object(
+  strtok_r
+  SRCS
+    strtok_r.cpp
+  HDRS
+    strtok_r.h
+  DEPENDS
+    .string_utils
 )
 
 # Helper to define a function with multiple implementations

diff  --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index c3b444a505d0..93a26c8fda6a 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -28,6 +28,39 @@ static inline size_t complementary_span(const char *src, const char *segment) {
   return src - initial;
 }
 
+// Given the similarities between strtok and strtok_r, we can implement both
+// using a utility function. On the first call, 'src' is scanned for the
+// first character not found in 'delimiter_string'. Once found, it scans until
+// the first character in the 'delimiter_string' or the null terminator is
+// found. We define this span as a token. The end of the token is appended with
+// a null terminator, and the token is returned. The point where the last token
+// is found is then stored within 'context' for subsequent calls. Subsequent
+// calls will use 'context' when a nullptr is passed in for 'src'. Once the null
+// terminating character is reached, returns a nullptr.
+static inline char *string_token(char *src, const char *delimiter_string,
+                                 char **saveptr) {
+  cpp::Bitset<256> delimiter_set;
+  for (; *delimiter_string; ++delimiter_string)
+    delimiter_set.set(*delimiter_string);
+
+  src = src ? src : *saveptr;
+  for (; *src && delimiter_set.test(*src); ++src)
+    ;
+  if (!*src) {
+    *saveptr = src;
+    return nullptr;
+  }
+  char *token = src;
+  for (; *src && !delimiter_set.test(*src); ++src)
+    ;
+  if (*src) {
+    *src = '\0';
+    ++src;
+  }
+  *saveptr = src;
+  return token;
+}
+
 } // namespace internal
 } // namespace __llvm_libc
 

diff  --git a/libc/src/string/strtok.cpp b/libc/src/string/strtok.cpp
index 19d9133c15e7..6bd02e24c9a4 100644
--- a/libc/src/string/strtok.cpp
+++ b/libc/src/string/strtok.cpp
@@ -9,34 +9,16 @@
 #include "src/string/strtok.h"
 
 #include "src/__support/common.h"
-#include "utils/CPP/Bitset.h"
+#include "src/string/string_utils.h"
 
 namespace __llvm_libc {
 
 static char *strtok_str = nullptr;
 
+// TODO: Place restrict qualifier where necessary for this and other function
+// arguments.
 char *LLVM_LIBC_ENTRYPOINT(strtok)(char *src, const char *delimiter_string) {
-  cpp::Bitset<256> delimiter_set;
-  for (; *delimiter_string; ++delimiter_string)
-    delimiter_set.set(*delimiter_string);
-
-  src = src ? src : strtok_str;
-  for (; *src && delimiter_set.test(*src); ++src)
-    ;
-  if (!*src) {
-    strtok_str = src;
-    return nullptr;
-  }
-  char *token = src;
-  for (; *src && !delimiter_set.test(*src); ++src)
-    ;
-
-  strtok_str = src;
-  if (*strtok_str) {
-    *strtok_str = '\0';
-    ++strtok_str;
-  }
-  return token;
+  return internal::string_token(src, delimiter_string, &strtok_str);
 }
 
 } // namespace __llvm_libc

diff  --git a/libc/src/string/strtok_r.cpp b/libc/src/string/strtok_r.cpp
new file mode 100644
index 000000000000..61f39a0f647b
--- /dev/null
+++ b/libc/src/string/strtok_r.cpp
@@ -0,0 +1,21 @@
+//===-- Implementation of strtok_r ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/string/strtok_r.h"
+
+#include "src/__support/common.h"
+#include "src/string/string_utils.h"
+
+namespace __llvm_libc {
+
+char *LLVM_LIBC_ENTRYPOINT(strtok_r)(char *src, const char *delimiter_string,
+                                     char **saveptr) {
+  return internal::string_token(src, delimiter_string, saveptr);
+}
+
+} // namespace __llvm_libc

diff  --git a/libc/src/string/strtok_r.h b/libc/src/string/strtok_r.h
new file mode 100644
index 000000000000..28fc40f1e5ff
--- /dev/null
+++ b/libc/src/string/strtok_r.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for strtok_r ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STRING_STRTOK_R_H
+#define LLVM_LIBC_SRC_STRING_STRTOK_R_H
+
+namespace __llvm_libc {
+
+char *strtok_r(char *src, const char *delimiter_string, char **saveptr);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STRING_STRTOK_R_H

diff  --git a/libc/test/src/string/CMakeLists.txt b/libc/test/src/string/CMakeLists.txt
index 8dfffa65175c..df824cd18ecc 100644
--- a/libc/test/src/string/CMakeLists.txt
+++ b/libc/test/src/string/CMakeLists.txt
@@ -142,6 +142,16 @@ add_libc_unittest(
     libc.src.string.strtok
 )
 
+add_libc_unittest(
+  strtok_r_test
+  SUITE
+    libc_string_unittests
+  SRCS
+    strtok_r_test.cpp
+  DEPENDS
+    libc.src.string.strtok_r
+)
+
 # Tests all implementations that can run on the host.
 function(add_libc_multi_impl_test name)
   get_property(fq_implementations GLOBAL PROPERTY ${name}_implementations)

diff  --git a/libc/test/src/string/strtok_r_test.cpp b/libc/test/src/string/strtok_r_test.cpp
new file mode 100644
index 000000000000..62df2f563a1b
--- /dev/null
+++ b/libc/test/src/string/strtok_r_test.cpp
@@ -0,0 +1,111 @@
+//===-- Unittests for strtok_r -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/string/strtok_r.h"
+#include "utils/UnitTest/Test.h"
+
+TEST(StrTokReentrantTest, NoTokenFound) {
+  { // Empty source and delimiter string.
+    char empty[] = "";
+    char *reserve = nullptr;
+    ASSERT_STREQ(__llvm_libc::strtok_r(empty, "", &reserve), nullptr);
+    // Another call to ensure that 'reserve' is not in a bad state.
+    ASSERT_STREQ(__llvm_libc::strtok_r(empty, "", &reserve), nullptr);
+    ASSERT_STREQ(__llvm_libc::strtok_r(nullptr, "", &reserve), nullptr);
+  }
+  { // Empty source and single character delimiter string.
+    char empty[] = "";
+    char *reserve = nullptr;
+    ASSERT_STREQ(__llvm_libc::strtok_r(empty, "_", &reserve), nullptr);
+    // Another call to ensure that 'reserve' is not in a bad state.
+    ASSERT_STREQ(__llvm_libc::strtok_r(empty, "_", &reserve), nullptr);
+    ASSERT_STREQ(__llvm_libc::strtok_r(nullptr, "_", &reserve), nullptr);
+  }
+  { // Same character source and delimiter string.
+    char single[] = "_";
+    char *reserve = nullptr;
+    ASSERT_STREQ(__llvm_libc::strtok_r(single, "_", &reserve), nullptr);
+    // Another call to ensure that 'reserve' is not in a bad state.
+    ASSERT_STREQ(__llvm_libc::strtok_r(single, "_", &reserve), nullptr);
+    ASSERT_STREQ(__llvm_libc::strtok_r(nullptr, "_", &reserve), nullptr);
+  }
+  { // Multiple character source and single character delimiter string.
+    char multiple[] = "1,2";
+    char *reserve = nullptr;
+    ASSERT_STREQ(__llvm_libc::strtok_r(multiple, ":", &reserve), "1,2");
+    // Another call to ensure that 'reserve' is not in a bad state.
+    ASSERT_STREQ(__llvm_libc::strtok_r(multiple, ":", &reserve), "1,2");
+    ASSERT_STREQ(__llvm_libc::strtok_r(nullptr, ":", &reserve), nullptr);
+  }
+}
+
+TEST(StrTokReentrantTest, DelimiterAsFirstCharacterShouldBeIgnored) {
+  char src[] = ".123";
+  char *reserve = nullptr;
+  ASSERT_STREQ(__llvm_libc::strtok_r(src, ".", &reserve), "123");
+  // Another call to ensure that 'reserve' is not in a bad state.
+  ASSERT_STREQ(__llvm_libc::strtok_r(src, ".", &reserve), "123");
+  ASSERT_STREQ(__llvm_libc::strtok_r(nullptr, ".", &reserve), nullptr);
+}
+
+TEST(StrTokReentrantTest, DelimiterIsMiddleCharacter) {
+  char src[] = "12,34";
+  char *reserve = nullptr;
+  ASSERT_STREQ(__llvm_libc::strtok_r(src, ",", &reserve), "12");
+  // Another call to ensure that 'reserve' is not in a bad state.
+  ASSERT_STREQ(__llvm_libc::strtok_r(src, ",", &reserve), "12");
+  ASSERT_STREQ(__llvm_libc::strtok_r(nullptr, ",", &reserve), nullptr);
+}
+
+TEST(StrTokReentrantTest, DelimiterAsLastCharacterShouldBeIgnored) {
+  char src[] = "1234:";
+  char *reserve = nullptr;
+  ASSERT_STREQ(__llvm_libc::strtok_r(src, ":", &reserve), "1234");
+  // Another call to ensure that 'reserve' is not in a bad state.
+  ASSERT_STREQ(__llvm_libc::strtok_r(src, ":", &reserve), "1234");
+  ASSERT_STREQ(__llvm_libc::strtok_r(nullptr, ":", &reserve), nullptr);
+}
+
+TEST(StrTokReentrantTest, ShouldNotGoPastNullTerminator) {
+  char src[] = {'1', '2', '\0', ',', '3'};
+  char *reserve = nullptr;
+  ASSERT_STREQ(__llvm_libc::strtok_r(src, ",", &reserve), "12");
+  // Another call to ensure that 'reserve' is not in a bad state.
+  ASSERT_STREQ(__llvm_libc::strtok_r(src, ",", &reserve), "12");
+  ASSERT_STREQ(__llvm_libc::strtok_r(nullptr, ",", &reserve), nullptr);
+}
+
+TEST(StrTokReentrantTest, SubsequentCallsShouldFindFollowingDelimiters) {
+  char src[] = "12,34.56";
+  char *reserve = nullptr;
+  char *token = __llvm_libc::strtok_r(src, ",.", &reserve);
+  ASSERT_STREQ(token, "12");
+  token = __llvm_libc::strtok_r(nullptr, ",.", &reserve);
+  ASSERT_STREQ(token, "34");
+  token = __llvm_libc::strtok_r(nullptr, ",.", &reserve);
+  ASSERT_STREQ(token, "56");
+  token = __llvm_libc::strtok_r(nullptr, "_:,_", &reserve);
+  ASSERT_STREQ(token, nullptr);
+  // Subsequent calls after hitting the end of the string should also return
+  // nullptr.
+  token = __llvm_libc::strtok_r(nullptr, "_:,_", &reserve);
+  ASSERT_STREQ(token, nullptr);
+}
+
+TEST(StrTokReentrantTest, DelimitersShouldNotBeIncludedInToken) {
+  char src[] = "__ab__:_cd__:__ef__:__";
+  char *reserve = nullptr;
+  char *token = __llvm_libc::strtok_r(src, "_:", &reserve);
+  ASSERT_STREQ(token, "ab");
+  token = __llvm_libc::strtok_r(nullptr, ":_", &reserve);
+  ASSERT_STREQ(token, "cd");
+  token = __llvm_libc::strtok_r(nullptr, "_:,", &reserve);
+  ASSERT_STREQ(token, "ef");
+  token = __llvm_libc::strtok_r(nullptr, "_:,_", &reserve);
+  ASSERT_STREQ(token, nullptr);
+}


        


More information about the libc-commits mailing list