[libc-commits] [libc] [libc] Fix stale char_ptr for find_first_character_wide read (PR #166594)
via libc-commits
libc-commits at lists.llvm.org
Wed Nov 5 12:54:26 PST 2025
https://github.com/Sterling-Augustine updated https://github.com/llvm/llvm-project/pull/166594
>From 2c3cac28ab48576def27c32d75ca70fc96c81784 Mon Sep 17 00:00:00 2001
From: Sterling Augustine <saugustine at google.com>
Date: Wed, 5 Nov 2025 08:53:33 -0800
Subject: [PATCH 1/3] Fix stale char_ptr for find_first_character_wide read
On exit from the loop, char_ptr had not been updated to match
block_ptr, resulting in erroneous results. Moving all updates out of
the loop fixes that.
---
libc/src/string/string_utils.h | 10 +++++-----
libc/test/src/string/memchr_test.cpp | 5 +++++
2 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index 7feef56fb3676..c9a720bef98a0 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -136,11 +136,11 @@ find_first_character_wide_read(const unsigned char *src, unsigned char ch,
const Word ch_mask = repeat_byte<Word>(ch);
// Step 2: read blocks
- for (const Word *block_ptr = reinterpret_cast<const Word *>(char_ptr);
- !has_zeroes<Word>((*block_ptr) ^ ch_mask) && cur < n;
- ++block_ptr, cur += sizeof(Word)) {
- char_ptr = reinterpret_cast<const unsigned char *>(block_ptr);
- }
+ const Word *block_ptr = reinterpret_cast<const Word *>(char_ptr);
+ for (; !has_zeroes<Word>((*block_ptr) ^ ch_mask) && cur < n;
+ ++block_ptr, cur += sizeof(Word))
+ ;
+ char_ptr = reinterpret_cast<const unsigned char *>(block_ptr);
// Step 3: find the match in the block
for (; *char_ptr != ch && cur < n; ++char_ptr, ++cur) {
diff --git a/libc/test/src/string/memchr_test.cpp b/libc/test/src/string/memchr_test.cpp
index ede841118fe03..1db5ecaed40cd 100644
--- a/libc/test/src/string/memchr_test.cpp
+++ b/libc/test/src/string/memchr_test.cpp
@@ -21,6 +21,11 @@ const char *call_memchr(const void *src, int c, size_t size) {
return reinterpret_cast<const char *>(LIBC_NAMESPACE::memchr(src, c, size));
}
+TEST(LlvmLibcMemChrTest, FromProtoC) {
+ const char *src = "protobuf_cpp_version$\n";
+ ASSERT_STREQ(call_memchr(src, '$', 22), "$\n");
+}
+
TEST(LlvmLibcMemChrTest, FindsCharacterAfterNullTerminator) {
// memchr should continue searching after a null terminator.
const size_t size = 5;
>From 1f8ef4b4e8f490ea1ee828d4ec529513002e51a0 Mon Sep 17 00:00:00 2001
From: Sterling Augustine <saugustine at google.com>
Date: Wed, 5 Nov 2025 09:44:03 -0800
Subject: [PATCH 2/3] Improve test.
---
libc/test/src/string/memchr_test.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/libc/test/src/string/memchr_test.cpp b/libc/test/src/string/memchr_test.cpp
index 1db5ecaed40cd..a92c5fe80be98 100644
--- a/libc/test/src/string/memchr_test.cpp
+++ b/libc/test/src/string/memchr_test.cpp
@@ -21,8 +21,8 @@ const char *call_memchr(const void *src, int c, size_t size) {
return reinterpret_cast<const char *>(LIBC_NAMESPACE::memchr(src, c, size));
}
-TEST(LlvmLibcMemChrTest, FromProtoC) {
- const char *src = "protobuf_cpp_version$\n";
+TEST(LlvmLibcMemChrTest, WideReadMultiIteration) {
+ const char *src = "abcdefghijklmnopqrst$\n";
ASSERT_STREQ(call_memchr(src, '$', 22), "$\n");
}
>From e55a8138f49f932c5547fbcd7594bc4267cf4147 Mon Sep 17 00:00:00 2001
From: Sterling Augustine <saugustine at google.com>
Date: Wed, 5 Nov 2025 12:53:16 -0800
Subject: [PATCH 3/3] [libc] Do bounds-check before any dereference
---
libc/src/string/string_utils.h | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index c9a720bef98a0..cbce62ead0328 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -127,8 +127,8 @@ find_first_character_wide_read(const unsigned char *src, unsigned char ch,
size_t cur = 0;
// Step 1: read 1 byte at a time to align to block size
- for (; reinterpret_cast<uintptr_t>(char_ptr) % sizeof(Word) != 0 && cur < n;
- ++char_ptr, ++cur) {
+ for (; cur < n && reinterpret_cast<uintptr_t>(char_ptr) % sizeof(Word) != 0;
+ ++cur, ++char_ptr) {
if (*char_ptr == ch)
return const_cast<unsigned char *>(char_ptr);
}
@@ -137,17 +137,17 @@ find_first_character_wide_read(const unsigned char *src, unsigned char ch,
// Step 2: read blocks
const Word *block_ptr = reinterpret_cast<const Word *>(char_ptr);
- for (; !has_zeroes<Word>((*block_ptr) ^ ch_mask) && cur < n;
- ++block_ptr, cur += sizeof(Word))
+ for (; cur < n && !has_zeroes<Word>((*block_ptr) ^ ch_mask);
+ cur += sizeof(Word), ++block_ptr)
;
char_ptr = reinterpret_cast<const unsigned char *>(block_ptr);
// Step 3: find the match in the block
- for (; *char_ptr != ch && cur < n; ++char_ptr, ++cur) {
+ for (; cur < n && *char_ptr != ch; ++cur, ++char_ptr) {
;
}
- if (*char_ptr != ch || cur >= n)
+ if (cur >= n || *char_ptr != ch)
return static_cast<void *>(nullptr);
return const_cast<unsigned char *>(char_ptr);
More information about the libc-commits
mailing list