[libc-commits] [libc] bc45bab - [libc] Add strtok_r implementation.
via libc-commits
libc-commits at lists.llvm.org
Thu Aug 13 12:52:51 PDT 2020
Author: parallels
Date: 2020-08-13T15:51:38-04:00
New Revision: bc45bab7eb6d6138442ba0dfab11faa734566ebf
URL: https://github.com/llvm/llvm-project/commit/bc45bab7eb6d6138442ba0dfab11faa734566ebf
DIFF: https://github.com/llvm/llvm-project/commit/bc45bab7eb6d6138442ba0dfab11faa734566ebf.diff
LOG: [libc] Add strtok_r implementation.
Reviewed By: sivachandra
Differential Revision: https://reviews.llvm.org/D85779
Added:
libc/src/string/strtok_r.cpp
libc/src/string/strtok_r.h
libc/test/src/string/strtok_r_test.cpp
Modified:
libc/config/linux/aarch64/entrypoints.txt
libc/config/linux/api.td
libc/config/linux/x86_64/entrypoints.txt
libc/spec/posix.td
libc/src/string/CMakeLists.txt
libc/src/string/string_utils.h
libc/src/string/strtok.cpp
libc/test/src/string/CMakeLists.txt
Removed:
################################################################################
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 2be1d51bd788..fe63403ae221 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -35,6 +35,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.string.strspn
libc.src.string.strstr
libc.src.string.strtok
+ libc.src.string.strtok_r
)
set(TARGET_LIBM_ENTRYPOINTS
diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td
index 151924748dc1..6b50c4284ae2 100644
--- a/libc/config/linux/api.td
+++ b/libc/config/linux/api.td
@@ -236,6 +236,7 @@ def StringAPI : PublicAPI<"string.h"> {
"strspn",
"strstr",
"strtok",
+ "strtok_r",
"strxfrm",
];
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 364fdc32c6f8..35ca8bbeaf68 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -53,6 +53,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.string.strspn
libc.src.string.strstr
libc.src.string.strtok
+ libc.src.string.strtok_r
# sys/mman.h entrypoints
libc.src.sys.mman.mmap
diff --git a/libc/spec/posix.td b/libc/spec/posix.td
index 732b6a6be250..9463169b6a65 100644
--- a/libc/spec/posix.td
+++ b/libc/spec/posix.td
@@ -11,8 +11,12 @@ def RestrictStructSigactionPtr : RestrictedPtrType<StructSigaction>;
def ConstRestrictStructSigactionPtr : ConstType<RestrictStructSigactionPtr>;
def POSIX : StandardSpec<"POSIX"> {
+ // TODO: Change naming so that they're consistent with other files.
PtrType CharPtr = PtrType<CharType>;
ConstType ConstCharPtr = ConstType<CharPtr>;
+ RestrictedPtrType RestrictedCharPtr = RestrictedPtrType<CharType>;
+ ConstType ConstRestrictedCharPtr = ConstType<RestrictedCharPtr>;
+ RestrictedPtrType CharRestrictedDoublePtr = RestrictedPtrType<CharPtr>;
NamedType OffTType = NamedType<"off_t">;
NamedType SSizeTType = NamedType<"ssize_t">;
@@ -221,6 +225,11 @@ def POSIX : StandardSpec<"POSIX"> {
RetValSpec<SizeTType>,
[ArgSpec<ConstCharPtr>, ArgSpec<SizeTType>]
>,
+ FunctionSpec<
+ "strtok_r",
+ RetValSpec<CharPtr>,
+ [ArgSpec<ConstRestrictedCharPtr>, ArgSpec<CharRestrictedDoublePtr>]
+ >,
]
>;
diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt
index 0193420ac52d..8efe8c89e9e7 100644
--- a/libc/src/string/CMakeLists.txt
+++ b/libc/src/string/CMakeLists.txt
@@ -139,7 +139,17 @@ add_entrypoint_object(
HDRS
strtok.h
DEPENDS
- libc.utils.CPP.standalone_cpp
+ .string_utils
+)
+
+add_entrypoint_object(
+ strtok_r
+ SRCS
+ strtok_r.cpp
+ HDRS
+ strtok_r.h
+ DEPENDS
+ .string_utils
)
# Helper to define a function with multiple implementations
diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index c3b444a505d0..93a26c8fda6a 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -28,6 +28,39 @@ static inline size_t complementary_span(const char *src, const char *segment) {
return src - initial;
}
+// Given the similarities between strtok and strtok_r, we can implement both
+// using a utility function. On the first call, 'src' is scanned for the
+// first character not found in 'delimiter_string'. Once found, it scans until
+// the first character in the 'delimiter_string' or the null terminator is
+// found. We define this span as a token. The end of the token is appended with
+// a null terminator, and the token is returned. The point where the last token
+// is found is then stored within 'context' for subsequent calls. Subsequent
+// calls will use 'context' when a nullptr is passed in for 'src'. Once the null
+// terminating character is reached, returns a nullptr.
+static inline char *string_token(char *src, const char *delimiter_string,
+ char **saveptr) {
+ cpp::Bitset<256> delimiter_set;
+ for (; *delimiter_string; ++delimiter_string)
+ delimiter_set.set(*delimiter_string);
+
+ src = src ? src : *saveptr;
+ for (; *src && delimiter_set.test(*src); ++src)
+ ;
+ if (!*src) {
+ *saveptr = src;
+ return nullptr;
+ }
+ char *token = src;
+ for (; *src && !delimiter_set.test(*src); ++src)
+ ;
+ if (*src) {
+ *src = '\0';
+ ++src;
+ }
+ *saveptr = src;
+ return token;
+}
+
} // namespace internal
} // namespace __llvm_libc
diff --git a/libc/src/string/strtok.cpp b/libc/src/string/strtok.cpp
index 19d9133c15e7..6bd02e24c9a4 100644
--- a/libc/src/string/strtok.cpp
+++ b/libc/src/string/strtok.cpp
@@ -9,34 +9,16 @@
#include "src/string/strtok.h"
#include "src/__support/common.h"
-#include "utils/CPP/Bitset.h"
+#include "src/string/string_utils.h"
namespace __llvm_libc {
static char *strtok_str = nullptr;
+// TODO: Place restrict qualifier where necessary for this and other function
+// arguments.
char *LLVM_LIBC_ENTRYPOINT(strtok)(char *src, const char *delimiter_string) {
- cpp::Bitset<256> delimiter_set;
- for (; *delimiter_string; ++delimiter_string)
- delimiter_set.set(*delimiter_string);
-
- src = src ? src : strtok_str;
- for (; *src && delimiter_set.test(*src); ++src)
- ;
- if (!*src) {
- strtok_str = src;
- return nullptr;
- }
- char *token = src;
- for (; *src && !delimiter_set.test(*src); ++src)
- ;
-
- strtok_str = src;
- if (*strtok_str) {
- *strtok_str = '\0';
- ++strtok_str;
- }
- return token;
+ return internal::string_token(src, delimiter_string, &strtok_str);
}
} // namespace __llvm_libc
diff --git a/libc/src/string/strtok_r.cpp b/libc/src/string/strtok_r.cpp
new file mode 100644
index 000000000000..61f39a0f647b
--- /dev/null
+++ b/libc/src/string/strtok_r.cpp
@@ -0,0 +1,21 @@
+//===-- Implementation of strtok_r ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/string/strtok_r.h"
+
+#include "src/__support/common.h"
+#include "src/string/string_utils.h"
+
+namespace __llvm_libc {
+
+char *LLVM_LIBC_ENTRYPOINT(strtok_r)(char *src, const char *delimiter_string,
+ char **saveptr) {
+ return internal::string_token(src, delimiter_string, saveptr);
+}
+
+} // namespace __llvm_libc
diff --git a/libc/src/string/strtok_r.h b/libc/src/string/strtok_r.h
new file mode 100644
index 000000000000..28fc40f1e5ff
--- /dev/null
+++ b/libc/src/string/strtok_r.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for strtok_r ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STRING_STRTOK_R_H
+#define LLVM_LIBC_SRC_STRING_STRTOK_R_H
+
+namespace __llvm_libc {
+
+char *strtok_r(char *src, const char *delimiter_string, char **saveptr);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STRING_STRTOK_R_H
diff --git a/libc/test/src/string/CMakeLists.txt b/libc/test/src/string/CMakeLists.txt
index 8dfffa65175c..df824cd18ecc 100644
--- a/libc/test/src/string/CMakeLists.txt
+++ b/libc/test/src/string/CMakeLists.txt
@@ -142,6 +142,16 @@ add_libc_unittest(
libc.src.string.strtok
)
+add_libc_unittest(
+ strtok_r_test
+ SUITE
+ libc_string_unittests
+ SRCS
+ strtok_r_test.cpp
+ DEPENDS
+ libc.src.string.strtok_r
+)
+
# Tests all implementations that can run on the host.
function(add_libc_multi_impl_test name)
get_property(fq_implementations GLOBAL PROPERTY ${name}_implementations)
diff --git a/libc/test/src/string/strtok_r_test.cpp b/libc/test/src/string/strtok_r_test.cpp
new file mode 100644
index 000000000000..62df2f563a1b
--- /dev/null
+++ b/libc/test/src/string/strtok_r_test.cpp
@@ -0,0 +1,111 @@
+//===-- Unittests for strtok_r -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/string/strtok_r.h"
+#include "utils/UnitTest/Test.h"
+
+TEST(StrTokReentrantTest, NoTokenFound) {
+ { // Empty source and delimiter string.
+ char empty[] = "";
+ char *reserve = nullptr;
+ ASSERT_STREQ(__llvm_libc::strtok_r(empty, "", &reserve), nullptr);
+ // Another call to ensure that 'reserve' is not in a bad state.
+ ASSERT_STREQ(__llvm_libc::strtok_r(empty, "", &reserve), nullptr);
+ ASSERT_STREQ(__llvm_libc::strtok_r(nullptr, "", &reserve), nullptr);
+ }
+ { // Empty source and single character delimiter string.
+ char empty[] = "";
+ char *reserve = nullptr;
+ ASSERT_STREQ(__llvm_libc::strtok_r(empty, "_", &reserve), nullptr);
+ // Another call to ensure that 'reserve' is not in a bad state.
+ ASSERT_STREQ(__llvm_libc::strtok_r(empty, "_", &reserve), nullptr);
+ ASSERT_STREQ(__llvm_libc::strtok_r(nullptr, "_", &reserve), nullptr);
+ }
+ { // Same character source and delimiter string.
+ char single[] = "_";
+ char *reserve = nullptr;
+ ASSERT_STREQ(__llvm_libc::strtok_r(single, "_", &reserve), nullptr);
+ // Another call to ensure that 'reserve' is not in a bad state.
+ ASSERT_STREQ(__llvm_libc::strtok_r(single, "_", &reserve), nullptr);
+ ASSERT_STREQ(__llvm_libc::strtok_r(nullptr, "_", &reserve), nullptr);
+ }
+ { // Multiple character source and single character delimiter string.
+ char multiple[] = "1,2";
+ char *reserve = nullptr;
+ ASSERT_STREQ(__llvm_libc::strtok_r(multiple, ":", &reserve), "1,2");
+ // Another call to ensure that 'reserve' is not in a bad state.
+ ASSERT_STREQ(__llvm_libc::strtok_r(multiple, ":", &reserve), "1,2");
+ ASSERT_STREQ(__llvm_libc::strtok_r(nullptr, ":", &reserve), nullptr);
+ }
+}
+
+TEST(StrTokReentrantTest, DelimiterAsFirstCharacterShouldBeIgnored) {
+ char src[] = ".123";
+ char *reserve = nullptr;
+ ASSERT_STREQ(__llvm_libc::strtok_r(src, ".", &reserve), "123");
+ // Another call to ensure that 'reserve' is not in a bad state.
+ ASSERT_STREQ(__llvm_libc::strtok_r(src, ".", &reserve), "123");
+ ASSERT_STREQ(__llvm_libc::strtok_r(nullptr, ".", &reserve), nullptr);
+}
+
+TEST(StrTokReentrantTest, DelimiterIsMiddleCharacter) {
+ char src[] = "12,34";
+ char *reserve = nullptr;
+ ASSERT_STREQ(__llvm_libc::strtok_r(src, ",", &reserve), "12");
+ // Another call to ensure that 'reserve' is not in a bad state.
+ ASSERT_STREQ(__llvm_libc::strtok_r(src, ",", &reserve), "12");
+ ASSERT_STREQ(__llvm_libc::strtok_r(nullptr, ",", &reserve), nullptr);
+}
+
+TEST(StrTokReentrantTest, DelimiterAsLastCharacterShouldBeIgnored) {
+ char src[] = "1234:";
+ char *reserve = nullptr;
+ ASSERT_STREQ(__llvm_libc::strtok_r(src, ":", &reserve), "1234");
+ // Another call to ensure that 'reserve' is not in a bad state.
+ ASSERT_STREQ(__llvm_libc::strtok_r(src, ":", &reserve), "1234");
+ ASSERT_STREQ(__llvm_libc::strtok_r(nullptr, ":", &reserve), nullptr);
+}
+
+TEST(StrTokReentrantTest, ShouldNotGoPastNullTerminator) {
+ char src[] = {'1', '2', '\0', ',', '3'};
+ char *reserve = nullptr;
+ ASSERT_STREQ(__llvm_libc::strtok_r(src, ",", &reserve), "12");
+ // Another call to ensure that 'reserve' is not in a bad state.
+ ASSERT_STREQ(__llvm_libc::strtok_r(src, ",", &reserve), "12");
+ ASSERT_STREQ(__llvm_libc::strtok_r(nullptr, ",", &reserve), nullptr);
+}
+
+TEST(StrTokReentrantTest, SubsequentCallsShouldFindFollowingDelimiters) {
+ char src[] = "12,34.56";
+ char *reserve = nullptr;
+ char *token = __llvm_libc::strtok_r(src, ",.", &reserve);
+ ASSERT_STREQ(token, "12");
+ token = __llvm_libc::strtok_r(nullptr, ",.", &reserve);
+ ASSERT_STREQ(token, "34");
+ token = __llvm_libc::strtok_r(nullptr, ",.", &reserve);
+ ASSERT_STREQ(token, "56");
+ token = __llvm_libc::strtok_r(nullptr, "_:,_", &reserve);
+ ASSERT_STREQ(token, nullptr);
+ // Subsequent calls after hitting the end of the string should also return
+ // nullptr.
+ token = __llvm_libc::strtok_r(nullptr, "_:,_", &reserve);
+ ASSERT_STREQ(token, nullptr);
+}
+
+TEST(StrTokReentrantTest, DelimitersShouldNotBeIncludedInToken) {
+ char src[] = "__ab__:_cd__:__ef__:__";
+ char *reserve = nullptr;
+ char *token = __llvm_libc::strtok_r(src, "_:", &reserve);
+ ASSERT_STREQ(token, "ab");
+ token = __llvm_libc::strtok_r(nullptr, ":_", &reserve);
+ ASSERT_STREQ(token, "cd");
+ token = __llvm_libc::strtok_r(nullptr, "_:,", &reserve);
+ ASSERT_STREQ(token, "ef");
+ token = __llvm_libc::strtok_r(nullptr, "_:,_", &reserve);
+ ASSERT_STREQ(token, nullptr);
+}
More information about the libc-commits
mailing list