[libc-commits] [libc] 71dd4e1 - [libc] fix strsep()/strtok()/strtok_r() "subsequent searches" behavior. (#154370)
via libc-commits
libc-commits at lists.llvm.org
Thu Aug 21 06:32:39 PDT 2025
Author: enh-google
Date: 2025-08-21T09:32:35-04:00
New Revision: 71dd4e17dd6493986e4b3647288d8e4c22a86c44
URL: https://github.com/llvm/llvm-project/commit/71dd4e17dd6493986e4b3647288d8e4c22a86c44
DIFF: https://github.com/llvm/llvm-project/commit/71dd4e17dd6493986e4b3647288d8e4c22a86c44.diff
LOG: [libc] fix strsep()/strtok()/strtok_r() "subsequent searches" behavior. (#154370)
These functions turned out to have the same bug that was in wcstok()
(fixed by 4fc9801), so add the missing tests and fix the code in a way
that matches wcstok().
Also fix incorrect test expectations in existing tests.
Also update the BUILD.bazel files to actually build the strsep() test.
Added:
Modified:
libc/src/string/string_utils.h
libc/test/src/string/strsep_test.cpp
libc/test/src/string/strtok_r_test.cpp
libc/test/src/string/strtok_test.cpp
utils/bazel/llvm-project-overlay/libc/BUILD.bazel
utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel
Removed:
################################################################################
diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index ea2e7b2e9eb4c..ce461581b9d95 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -202,33 +202,36 @@ LIBC_INLINE size_t complementary_span(const char *src, const char *segment) {
template <bool SkipDelim = true>
LIBC_INLINE char *string_token(char *__restrict src,
const char *__restrict delimiter_string,
- char **__restrict saveptr) {
- // Return nullptr immediately if both src AND saveptr are nullptr
- if (LIBC_UNLIKELY(src == nullptr && ((src = *saveptr) == nullptr)))
+ char **__restrict context) {
+ // Return nullptr immediately if both src AND context are nullptr
+ if (LIBC_UNLIKELY(src == nullptr && ((src = *context) == nullptr)))
return nullptr;
static_assert(CHAR_BIT == 8, "bitset of 256 assumes char is 8 bits");
- cpp::bitset<256> delimiter_set;
+ cpp::bitset<256> delims;
for (; *delimiter_string != '\0'; ++delimiter_string)
- delimiter_set.set(static_cast<size_t>(*delimiter_string));
+ delims.set(static_cast<size_t>(*delimiter_string));
+ char *tok_start = src;
if constexpr (SkipDelim)
- for (; *src != '\0' && delimiter_set.test(static_cast<size_t>(*src)); ++src)
- ;
- if (*src == '\0') {
- *saveptr = src;
+ while (*tok_start != '\0' && delims.test(static_cast<size_t>(*tok_start)))
+ ++tok_start;
+ if (*tok_start == '\0' && SkipDelim) {
+ *context = nullptr;
return nullptr;
}
- char *token = src;
- for (; *src != '\0'; ++src) {
- if (delimiter_set.test(static_cast<size_t>(*src))) {
- *src = '\0';
- ++src;
- break;
- }
+
+ char *tok_end = tok_start;
+ while (*tok_end != '\0' && !delims.test(static_cast<size_t>(*tok_end)))
+ ++tok_end;
+
+ if (*tok_end == '\0') {
+ *context = nullptr;
+ } else {
+ *tok_end = '\0';
+ *context = tok_end + 1;
}
- *saveptr = src;
- return token;
+ return tok_start;
}
LIBC_INLINE size_t strlcpy(char *__restrict dst, const char *__restrict src,
diff --git a/libc/test/src/string/strsep_test.cpp b/libc/test/src/string/strsep_test.cpp
index 06318dea4cb68..e2a5d52bbeddb 100644
--- a/libc/test/src/string/strsep_test.cpp
+++ b/libc/test/src/string/strsep_test.cpp
@@ -18,21 +18,21 @@ TEST(LlvmLibcStrsepTest, NullSrc) {
TEST(LlvmLibcStrsepTest, NoTokenFound) {
{
char s[] = "";
- char *string = s, *orig = s;
+ char *string = s;
EXPECT_STREQ(LIBC_NAMESPACE::strsep(&string, ""), nullptr);
- EXPECT_EQ(orig, string);
+ EXPECT_TRUE(string == nullptr);
}
{
char s[] = "abcde";
char *string = s, *orig = s;
EXPECT_STREQ(LIBC_NAMESPACE::strsep(&string, ""), orig);
- EXPECT_EQ(string, orig + 5);
+ EXPECT_TRUE(string == nullptr);
}
{
char s[] = "abcde";
char *string = s, *orig = s;
EXPECT_STREQ(LIBC_NAMESPACE::strsep(&string, "fghijk"), orig);
- EXPECT_EQ(string, orig + 5);
+ EXPECT_TRUE(string == nullptr);
}
}
@@ -53,6 +53,14 @@ TEST(LlvmLibcStrsepTest, DelimitersShouldNotBeIncludedInToken) {
}
}
+TEST(LlvmLibcStrsepTest, SubsequentSearchesReturnNull) {
+ char s[] = "a";
+ char *string = s;
+ ASSERT_STREQ(LIBC_NAMESPACE::strsep(&string, ":"), "a");
+ ASSERT_EQ(LIBC_NAMESPACE::strsep(&string, ":"), nullptr);
+ ASSERT_EQ(LIBC_NAMESPACE::strsep(&string, ":"), nullptr);
+}
+
#if defined(LIBC_ADD_NULL_CHECKS)
TEST(LlvmLibcStrsepTest, CrashOnNullPtr) {
diff --git a/libc/test/src/string/strtok_r_test.cpp b/libc/test/src/string/strtok_r_test.cpp
index fdc27bae23c97..a19390d0b0c2d 100644
--- a/libc/test/src/string/strtok_r_test.cpp
+++ b/libc/test/src/string/strtok_r_test.cpp
@@ -122,3 +122,12 @@ TEST(LlvmLibcStrTokReentrantTest, DelimitersShouldNotBeIncludedInToken) {
token = LIBC_NAMESPACE::strtok_r(nullptr, "_:,_", &reserve);
ASSERT_STREQ(token, nullptr);
}
+
+TEST(LlvmLibcStrTokReentrantTest, SubsequentSearchesReturnNull) {
+ char src[] = "a";
+ char *reserve = nullptr;
+ char *token = LIBC_NAMESPACE::strtok_r(src, ":", &reserve);
+ ASSERT_STREQ(token, "a");
+ ASSERT_EQ(LIBC_NAMESPACE::strtok_r(nullptr, ":", &reserve), nullptr);
+ ASSERT_EQ(LIBC_NAMESPACE::strtok_r(nullptr, ":", &reserve), nullptr);
+}
diff --git a/libc/test/src/string/strtok_test.cpp b/libc/test/src/string/strtok_test.cpp
index b82065309e00c..76efeddda6f4a 100644
--- a/libc/test/src/string/strtok_test.cpp
+++ b/libc/test/src/string/strtok_test.cpp
@@ -76,3 +76,10 @@ TEST(LlvmLibcStrTokTest, DelimitersShouldNotBeIncludedInToken) {
token = LIBC_NAMESPACE::strtok(nullptr, "_:,_");
ASSERT_STREQ(token, nullptr);
}
+
+TEST(LlvmLibcStrTokTest, SubsequentSearchesReturnNull) {
+ char src[] = "a";
+ ASSERT_STREQ("a", LIBC_NAMESPACE::strtok(src, ":"));
+ ASSERT_EQ(LIBC_NAMESPACE::strtok(nullptr, ":"), nullptr);
+ ASSERT_EQ(LIBC_NAMESPACE::strtok(nullptr, ":"), nullptr);
+}
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index 6316f100f348c..830042a400f33 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -5140,6 +5140,19 @@ libc_function(
],
)
+libc_function(
+ name = "strsep",
+ srcs = ["src/string/strsep.cpp"],
+ hdrs = ["src/string/strsep.h"],
+ deps = [
+ ":__support_common",
+ ":__support_macros_null_check",
+ ":hdr_signal_macros",
+ ":string_memory_utils",
+ ":string_utils",
+ ],
+)
+
libc_function(
name = "strstr",
srcs = ["src/string/strstr.cpp"],
diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel
index 5f50b10516fb5..d90992417a721 100644
--- a/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel
@@ -59,6 +59,15 @@ libc_test(
],
)
+libc_test(
+ name = "strsep_test",
+ srcs = ["strsep_test.cpp"],
+ deps = [
+ "//libc:hdr_signal_macros",
+ "//libc:strsep",
+ ],
+)
+
libc_test(
name = "strstr_test",
srcs = ["strstr_test.cpp"],
More information about the libc-commits
mailing list