[libc-commits] [libc] [llvm] [libc] fixed signed char issues in strsep()/strtok()/strtok_r(). (PR #156705)
via libc-commits
libc-commits at lists.llvm.org
Wed Sep 3 09:16:46 PDT 2025
https://github.com/enh-google created https://github.com/llvm/llvm-project/pull/156705
Also add the missing tests for all the related functions (even the ones that were already right), and add the missing bazel build rules.
>From 125bbdb62884228ae657aae89f754803fbdcfcf7 Mon Sep 17 00:00:00 2001
From: Elliott Hughes <enh at google.com>
Date: Wed, 3 Sep 2025 16:13:46 +0000
Subject: [PATCH] [libc] fixed signed char issues in
strsep()/strtok()/strtok_r().
Also add the missing tests for all the related functions (even the ones
that were already right), and add the missing bazel build rules.
---
libc/src/string/string_utils.h | 14 +++++++-------
libc/test/src/string/strcspn_test.cpp | 4 ++++
libc/test/src/string/strpbrk_test.cpp | 4 ++++
libc/test/src/string/strsep_test.cpp | 8 ++++++++
libc/test/src/string/strspn_test.cpp | 4 ++++
libc/test/src/string/strtok_r_test.cpp | 8 ++++++++
libc/test/src/string/strtok_test.cpp | 7 +++++++
.../bazel/llvm-project-overlay/libc/BUILD.bazel | 10 ++++++++++
.../libc/test/src/string/BUILD.bazel | 16 ++++++++++++++++
9 files changed, 68 insertions(+), 7 deletions(-)
diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index 26e9adde0d66e..10803488b6cf5 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -212,28 +212,28 @@ LIBC_INLINE char *string_token(char *__restrict src,
static_assert(CHAR_BIT == 8, "bitset of 256 assumes char is 8 bits");
cpp::bitset<256> delims;
for (; *delimiter_string != '\0'; ++delimiter_string)
- delims.set(static_cast<size_t>(*delimiter_string));
+ delims.set(*reinterpret_cast<const unsigned char *>(delimiter_string));
- char *tok_start = src;
+ unsigned char *tok_start = reinterpret_cast<unsigned char *>(src);
if constexpr (SkipDelim)
- while (*tok_start != '\0' && delims.test(static_cast<size_t>(*tok_start)))
+ while (*tok_start != '\0' && delims.test(*tok_start))
++tok_start;
if (*tok_start == '\0' && SkipDelim) {
*context = nullptr;
return nullptr;
}
- char *tok_end = tok_start;
- while (*tok_end != '\0' && !delims.test(static_cast<size_t>(*tok_end)))
+ unsigned char *tok_end = tok_start;
+ while (*tok_end != '\0' && !delims.test(*tok_end))
++tok_end;
if (*tok_end == '\0') {
*context = nullptr;
} else {
*tok_end = '\0';
- *context = tok_end + 1;
+ *context = reinterpret_cast<char *>(tok_end + 1);
}
- return tok_start;
+ return reinterpret_cast<char *>(tok_start);
}
LIBC_INLINE size_t strlcpy(char *__restrict dst, const char *__restrict src,
diff --git a/libc/test/src/string/strcspn_test.cpp b/libc/test/src/string/strcspn_test.cpp
index d83b3cf4fdfe8..ec98f72e37113 100644
--- a/libc/test/src/string/strcspn_test.cpp
+++ b/libc/test/src/string/strcspn_test.cpp
@@ -48,3 +48,7 @@ TEST(LlvmLibcStrCSpnTest, DuplicatedCharactersNotPartOfComplementarySpan) {
EXPECT_EQ(LIBC_NAMESPACE::strcspn("aaaa", "aa"), size_t{0});
EXPECT_EQ(LIBC_NAMESPACE::strcspn("aaaa", "baa"), size_t{0});
}
+
+TEST(LlvmLibcStrCSpnTest, TopBitSet) {
+ EXPECT_EQ(LIBC_NAMESPACE::strcspn("hello\x80world", "\x80"), size_t{5});
+}
diff --git a/libc/test/src/string/strpbrk_test.cpp b/libc/test/src/string/strpbrk_test.cpp
index fbe14da12ac10..cc802460d10be 100644
--- a/libc/test/src/string/strpbrk_test.cpp
+++ b/libc/test/src/string/strpbrk_test.cpp
@@ -60,3 +60,7 @@ TEST(LlvmLibcStrPBrkTest, FindsFirstOfRepeated) {
TEST(LlvmLibcStrPBrkTest, FindsFirstInBreakset) {
EXPECT_STREQ(LIBC_NAMESPACE::strpbrk("12345", "34"), "345");
}
+
+TEST(LlvmLibcStrPBrkTest, TopBitSet) {
+ EXPECT_STREQ(LIBC_NAMESPACE::strpbrk("hello\x80world", "\x80 "), "\x80world");
+}
diff --git a/libc/test/src/string/strsep_test.cpp b/libc/test/src/string/strsep_test.cpp
index e2a5d52bbeddb..553edd99604ef 100644
--- a/libc/test/src/string/strsep_test.cpp
+++ b/libc/test/src/string/strsep_test.cpp
@@ -61,6 +61,14 @@ TEST(LlvmLibcStrsepTest, SubsequentSearchesReturnNull) {
ASSERT_EQ(LIBC_NAMESPACE::strsep(&string, ":"), nullptr);
}
+TEST(LlvmLibcStrsepTest, TopBitSet) {
+ char top_bit_set_str[] = "hello\x80world";
+ char *p = top_bit_set_str;
+ ASSERT_STREQ(LIBC_NAMESPACE::strsep(&p, "\x80"), "hello");
+ ASSERT_STREQ(LIBC_NAMESPACE::strsep(&p, "\x80"), "world");
+ ASSERT_EQ(LIBC_NAMESPACE::strsep(&p, "\x80"), nullptr);
+}
+
#if defined(LIBC_ADD_NULL_CHECKS)
TEST(LlvmLibcStrsepTest, CrashOnNullPtr) {
diff --git a/libc/test/src/string/strspn_test.cpp b/libc/test/src/string/strspn_test.cpp
index 82f9b2aef0dfd..813612f09fc16 100644
--- a/libc/test/src/string/strspn_test.cpp
+++ b/libc/test/src/string/strspn_test.cpp
@@ -85,6 +85,10 @@ TEST(LlvmLibcStrSpnTest, DuplicatedCharactersToBeSearchedForShouldStillMatch) {
EXPECT_EQ(LIBC_NAMESPACE::strspn("aaaa", "aa"), size_t{4});
}
+TEST(LlvmLibcStrSpnTest, TopBitSet) {
+ EXPECT_EQ(LIBC_NAMESPACE::strspn("hello\x80world", "helo\x80rld"), size_t{6});
+}
+
#if defined(LIBC_ADD_NULL_CHECKS)
TEST(LlvmLibcStrSpnTest, CrashOnNullPtr) {
diff --git a/libc/test/src/string/strtok_r_test.cpp b/libc/test/src/string/strtok_r_test.cpp
index a19390d0b0c2d..8c4d3c362f778 100644
--- a/libc/test/src/string/strtok_r_test.cpp
+++ b/libc/test/src/string/strtok_r_test.cpp
@@ -131,3 +131,11 @@ TEST(LlvmLibcStrTokReentrantTest, SubsequentSearchesReturnNull) {
ASSERT_EQ(LIBC_NAMESPACE::strtok_r(nullptr, ":", &reserve), nullptr);
ASSERT_EQ(LIBC_NAMESPACE::strtok_r(nullptr, ":", &reserve), nullptr);
}
+
+TEST(LlvmLibcStrTokReentrantTest, TopBitSet) {
+ char top_bit_set_str[] = "hello\x80world";
+ char *p;
+ ASSERT_STREQ(LIBC_NAMESPACE::strtok_r(top_bit_set_str, "\x80", &p), "hello");
+ ASSERT_STREQ(LIBC_NAMESPACE::strtok_r(nullptr, "\x80", &p), "world");
+ ASSERT_EQ(LIBC_NAMESPACE::strtok_r(nullptr, "\x80", &p), nullptr);
+}
diff --git a/libc/test/src/string/strtok_test.cpp b/libc/test/src/string/strtok_test.cpp
index 76efeddda6f4a..3c097fdee0713 100644
--- a/libc/test/src/string/strtok_test.cpp
+++ b/libc/test/src/string/strtok_test.cpp
@@ -83,3 +83,10 @@ TEST(LlvmLibcStrTokTest, SubsequentSearchesReturnNull) {
ASSERT_EQ(LIBC_NAMESPACE::strtok(nullptr, ":"), nullptr);
ASSERT_EQ(LIBC_NAMESPACE::strtok(nullptr, ":"), nullptr);
}
+
+TEST(LlvmLibcStrTokTest, TopBitSet) {
+ char top_bit_set_str[] = "hello\x80world";
+ ASSERT_STREQ(LIBC_NAMESPACE::strtok(top_bit_set_str, "\x80"), "hello");
+ ASSERT_STREQ(LIBC_NAMESPACE::strtok(nullptr, "\x80"), "world");
+ ASSERT_EQ(LIBC_NAMESPACE::strtok(nullptr, "\x80"), nullptr);
+}
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index b2cd3fdd468af..acfd0d96a28bf 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -5251,6 +5251,16 @@ libc_function(
],
)
+libc_function(
+ name = "strtok_r",
+ srcs = ["src/string/strtok_r.cpp"],
+ hdrs = ["src/string/strtok_r.h"],
+ deps = [
+ ":__support_common",
+ ":string_utils",
+ ],
+)
+
################################ fcntl targets #################################
libc_function(
diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel
index d90992417a721..1a95dece8bf20 100644
--- a/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel
@@ -59,6 +59,14 @@ libc_test(
],
)
+libc_test(
+ name = "strpbrk_test",
+ srcs = ["strpbrk_test.cpp"],
+ deps = [
+ "//libc:strpbrk",
+ ],
+)
+
libc_test(
name = "strsep_test",
srcs = ["strsep_test.cpp"],
@@ -127,6 +135,14 @@ libc_test(
],
)
+libc_test(
+ name = "strtok_r_test",
+ srcs = ["strtok_r_test.cpp"],
+ deps = [
+ "//libc:strtok_r",
+ ],
+)
+
libc_test_library(
name = "memory_check_utils",
hdrs = ["memory_utils/memory_check_utils.h"],
More information about the libc-commits
mailing list