[libc-commits] [libc] 7ff8a51 - [libc] Fix stale char_ptr for find_first_character_wide read (#166594)
via libc-commits
libc-commits at lists.llvm.org
Thu Nov 6 06:48:18 PST 2025
Author: Sterling-Augustine
Date: 2025-11-06T06:48:14-08:00
New Revision: 7ff8a5175428361e90c7bd7fd765192bec42be42
URL: https://github.com/llvm/llvm-project/commit/7ff8a5175428361e90c7bd7fd765192bec42be42
DIFF: https://github.com/llvm/llvm-project/commit/7ff8a5175428361e90c7bd7fd765192bec42be42.diff
LOG: [libc] Fix stale char_ptr for find_first_character_wide read (#166594)
On exit from the loop, char_ptr had not been updated to match block_ptr,
resulting in erroneous results. Moving all updates out of the loop fixes
that.
Adjust derefences to always be inside bounds checks.
Added:
Modified:
libc/src/string/string_utils.h
libc/test/src/string/memchr_test.cpp
Removed:
################################################################################
diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index 7feef56fb3676..cbce62ead0328 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -127,8 +127,8 @@ find_first_character_wide_read(const unsigned char *src, unsigned char ch,
size_t cur = 0;
// Step 1: read 1 byte at a time to align to block size
- for (; reinterpret_cast<uintptr_t>(char_ptr) % sizeof(Word) != 0 && cur < n;
- ++char_ptr, ++cur) {
+ for (; cur < n && reinterpret_cast<uintptr_t>(char_ptr) % sizeof(Word) != 0;
+ ++cur, ++char_ptr) {
if (*char_ptr == ch)
return const_cast<unsigned char *>(char_ptr);
}
@@ -136,18 +136,18 @@ find_first_character_wide_read(const unsigned char *src, unsigned char ch,
const Word ch_mask = repeat_byte<Word>(ch);
// Step 2: read blocks
- for (const Word *block_ptr = reinterpret_cast<const Word *>(char_ptr);
- !has_zeroes<Word>((*block_ptr) ^ ch_mask) && cur < n;
- ++block_ptr, cur += sizeof(Word)) {
- char_ptr = reinterpret_cast<const unsigned char *>(block_ptr);
- }
+ const Word *block_ptr = reinterpret_cast<const Word *>(char_ptr);
+ for (; cur < n && !has_zeroes<Word>((*block_ptr) ^ ch_mask);
+ cur += sizeof(Word), ++block_ptr)
+ ;
+ char_ptr = reinterpret_cast<const unsigned char *>(block_ptr);
// Step 3: find the match in the block
- for (; *char_ptr != ch && cur < n; ++char_ptr, ++cur) {
+ for (; cur < n && *char_ptr != ch; ++cur, ++char_ptr) {
;
}
- if (*char_ptr != ch || cur >= n)
+ if (cur >= n || *char_ptr != ch)
return static_cast<void *>(nullptr);
return const_cast<unsigned char *>(char_ptr);
diff --git a/libc/test/src/string/memchr_test.cpp b/libc/test/src/string/memchr_test.cpp
index ede841118fe03..a92c5fe80be98 100644
--- a/libc/test/src/string/memchr_test.cpp
+++ b/libc/test/src/string/memchr_test.cpp
@@ -21,6 +21,11 @@ const char *call_memchr(const void *src, int c, size_t size) {
return reinterpret_cast<const char *>(LIBC_NAMESPACE::memchr(src, c, size));
}
+TEST(LlvmLibcMemChrTest, WideReadMultiIteration) {
+ const char *src = "abcdefghijklmnopqrst$\n";
+ ASSERT_STREQ(call_memchr(src, '$', 22), "$\n");
+}
+
TEST(LlvmLibcMemChrTest, FindsCharacterAfterNullTerminator) {
// memchr should continue searching after a null terminator.
const size_t size = 5;
More information about the libc-commits
mailing list