[libc-commits] [libc] 71dd4e1 - [libc] fix strsep()/strtok()/strtok_r() "subsequent searches" behavior. (#154370)

via libc-commits libc-commits at lists.llvm.org
Thu Aug 21 06:32:39 PDT 2025


Author: enh-google
Date: 2025-08-21T09:32:35-04:00
New Revision: 71dd4e17dd6493986e4b3647288d8e4c22a86c44

URL: https://github.com/llvm/llvm-project/commit/71dd4e17dd6493986e4b3647288d8e4c22a86c44
DIFF: https://github.com/llvm/llvm-project/commit/71dd4e17dd6493986e4b3647288d8e4c22a86c44.diff

LOG: [libc] fix strsep()/strtok()/strtok_r() "subsequent searches" behavior. (#154370)

These functions turned out to have the same bug that was in wcstok()
(fixed by 4fc9801), so add the missing tests and fix the code in a way
that matches wcstok().

Also fix incorrect test expectations in existing tests.

Also update the BUILD.bazel files to actually build the strsep() test.

Added: 
    

Modified: 
    libc/src/string/string_utils.h
    libc/test/src/string/strsep_test.cpp
    libc/test/src/string/strtok_r_test.cpp
    libc/test/src/string/strtok_test.cpp
    utils/bazel/llvm-project-overlay/libc/BUILD.bazel
    utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel

Removed: 
    


################################################################################
diff  --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index ea2e7b2e9eb4c..ce461581b9d95 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -202,33 +202,36 @@ LIBC_INLINE size_t complementary_span(const char *src, const char *segment) {
 template <bool SkipDelim = true>
 LIBC_INLINE char *string_token(char *__restrict src,
                                const char *__restrict delimiter_string,
-                               char **__restrict saveptr) {
-  // Return nullptr immediately if both src AND saveptr are nullptr
-  if (LIBC_UNLIKELY(src == nullptr && ((src = *saveptr) == nullptr)))
+                               char **__restrict context) {
+  // Return nullptr immediately if both src AND context are nullptr
+  if (LIBC_UNLIKELY(src == nullptr && ((src = *context) == nullptr)))
     return nullptr;
 
   static_assert(CHAR_BIT == 8, "bitset of 256 assumes char is 8 bits");
-  cpp::bitset<256> delimiter_set;
+  cpp::bitset<256> delims;
   for (; *delimiter_string != '\0'; ++delimiter_string)
-    delimiter_set.set(static_cast<size_t>(*delimiter_string));
+    delims.set(static_cast<size_t>(*delimiter_string));
 
+  char *tok_start = src;
   if constexpr (SkipDelim)
-    for (; *src != '\0' && delimiter_set.test(static_cast<size_t>(*src)); ++src)
-      ;
-  if (*src == '\0') {
-    *saveptr = src;
+    while (*tok_start != '\0' && delims.test(static_cast<size_t>(*tok_start)))
+      ++tok_start;
+  if (*tok_start == '\0' && SkipDelim) {
+    *context = nullptr;
     return nullptr;
   }
-  char *token = src;
-  for (; *src != '\0'; ++src) {
-    if (delimiter_set.test(static_cast<size_t>(*src))) {
-      *src = '\0';
-      ++src;
-      break;
-    }
+
+  char *tok_end = tok_start;
+  while (*tok_end != '\0' && !delims.test(static_cast<size_t>(*tok_end)))
+    ++tok_end;
+
+  if (*tok_end == '\0') {
+    *context = nullptr;
+  } else {
+    *tok_end = '\0';
+    *context = tok_end + 1;
   }
-  *saveptr = src;
-  return token;
+  return tok_start;
 }
 
 LIBC_INLINE size_t strlcpy(char *__restrict dst, const char *__restrict src,

diff  --git a/libc/test/src/string/strsep_test.cpp b/libc/test/src/string/strsep_test.cpp
index 06318dea4cb68..e2a5d52bbeddb 100644
--- a/libc/test/src/string/strsep_test.cpp
+++ b/libc/test/src/string/strsep_test.cpp
@@ -18,21 +18,21 @@ TEST(LlvmLibcStrsepTest, NullSrc) {
 TEST(LlvmLibcStrsepTest, NoTokenFound) {
   {
     char s[] = "";
-    char *string = s, *orig = s;
+    char *string = s;
     EXPECT_STREQ(LIBC_NAMESPACE::strsep(&string, ""), nullptr);
-    EXPECT_EQ(orig, string);
+    EXPECT_TRUE(string == nullptr);
   }
   {
     char s[] = "abcde";
     char *string = s, *orig = s;
     EXPECT_STREQ(LIBC_NAMESPACE::strsep(&string, ""), orig);
-    EXPECT_EQ(string, orig + 5);
+    EXPECT_TRUE(string == nullptr);
   }
   {
     char s[] = "abcde";
     char *string = s, *orig = s;
     EXPECT_STREQ(LIBC_NAMESPACE::strsep(&string, "fghijk"), orig);
-    EXPECT_EQ(string, orig + 5);
+    EXPECT_TRUE(string == nullptr);
   }
 }
 
@@ -53,6 +53,14 @@ TEST(LlvmLibcStrsepTest, DelimitersShouldNotBeIncludedInToken) {
   }
 }
 
+TEST(LlvmLibcStrsepTest, SubsequentSearchesReturnNull) {
+  char s[] = "a";
+  char *string = s;
+  ASSERT_STREQ(LIBC_NAMESPACE::strsep(&string, ":"), "a");
+  ASSERT_EQ(LIBC_NAMESPACE::strsep(&string, ":"), nullptr);
+  ASSERT_EQ(LIBC_NAMESPACE::strsep(&string, ":"), nullptr);
+}
+
 #if defined(LIBC_ADD_NULL_CHECKS)
 
 TEST(LlvmLibcStrsepTest, CrashOnNullPtr) {

diff  --git a/libc/test/src/string/strtok_r_test.cpp b/libc/test/src/string/strtok_r_test.cpp
index fdc27bae23c97..a19390d0b0c2d 100644
--- a/libc/test/src/string/strtok_r_test.cpp
+++ b/libc/test/src/string/strtok_r_test.cpp
@@ -122,3 +122,12 @@ TEST(LlvmLibcStrTokReentrantTest, DelimitersShouldNotBeIncludedInToken) {
   token = LIBC_NAMESPACE::strtok_r(nullptr, "_:,_", &reserve);
   ASSERT_STREQ(token, nullptr);
 }
+
+TEST(LlvmLibcStrTokReentrantTest, SubsequentSearchesReturnNull) {
+  char src[] = "a";
+  char *reserve = nullptr;
+  char *token = LIBC_NAMESPACE::strtok_r(src, ":", &reserve);
+  ASSERT_STREQ(token, "a");
+  ASSERT_EQ(LIBC_NAMESPACE::strtok_r(nullptr, ":", &reserve), nullptr);
+  ASSERT_EQ(LIBC_NAMESPACE::strtok_r(nullptr, ":", &reserve), nullptr);
+}

diff  --git a/libc/test/src/string/strtok_test.cpp b/libc/test/src/string/strtok_test.cpp
index b82065309e00c..76efeddda6f4a 100644
--- a/libc/test/src/string/strtok_test.cpp
+++ b/libc/test/src/string/strtok_test.cpp
@@ -76,3 +76,10 @@ TEST(LlvmLibcStrTokTest, DelimitersShouldNotBeIncludedInToken) {
   token = LIBC_NAMESPACE::strtok(nullptr, "_:,_");
   ASSERT_STREQ(token, nullptr);
 }
+
+TEST(LlvmLibcStrTokTest, SubsequentSearchesReturnNull) {
+  char src[] = "a";
+  ASSERT_STREQ("a", LIBC_NAMESPACE::strtok(src, ":"));
+  ASSERT_EQ(LIBC_NAMESPACE::strtok(nullptr, ":"), nullptr);
+  ASSERT_EQ(LIBC_NAMESPACE::strtok(nullptr, ":"), nullptr);
+}

diff  --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index 6316f100f348c..830042a400f33 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -5140,6 +5140,19 @@ libc_function(
     ],
 )
 
+libc_function(
+    name = "strsep",
+    srcs = ["src/string/strsep.cpp"],
+    hdrs = ["src/string/strsep.h"],
+    deps = [
+        ":__support_common",
+        ":__support_macros_null_check",
+        ":hdr_signal_macros",
+        ":string_memory_utils",
+        ":string_utils",
+    ],
+)
+
 libc_function(
     name = "strstr",
     srcs = ["src/string/strstr.cpp"],

diff  --git a/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel
index 5f50b10516fb5..d90992417a721 100644
--- a/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel
@@ -59,6 +59,15 @@ libc_test(
     ],
 )
 
+libc_test(
+    name = "strsep_test",
+    srcs = ["strsep_test.cpp"],
+    deps = [
+        "//libc:hdr_signal_macros",
+        "//libc:strsep",
+    ],
+)
+
 libc_test(
     name = "strstr_test",
     srcs = ["strstr_test.cpp"],


        


More information about the libc-commits mailing list