[libc-commits] [libc] 7ff8a51 - [libc] Fix stale char_ptr for find_first_character_wide read (#166594)

via libc-commits libc-commits at lists.llvm.org
Thu Nov 6 06:48:18 PST 2025


Author: Sterling-Augustine
Date: 2025-11-06T06:48:14-08:00
New Revision: 7ff8a5175428361e90c7bd7fd765192bec42be42

URL: https://github.com/llvm/llvm-project/commit/7ff8a5175428361e90c7bd7fd765192bec42be42
DIFF: https://github.com/llvm/llvm-project/commit/7ff8a5175428361e90c7bd7fd765192bec42be42.diff

LOG: [libc] Fix stale char_ptr for find_first_character_wide read (#166594)

On exit from the loop, char_ptr had not been updated to match block_ptr,
resulting in erroneous results. Moving all updates out of the loop fixes
that.

Adjust derefences to always be inside bounds checks.

Added: 
    

Modified: 
    libc/src/string/string_utils.h
    libc/test/src/string/memchr_test.cpp

Removed: 
    


################################################################################
diff  --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index 7feef56fb3676..cbce62ead0328 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -127,8 +127,8 @@ find_first_character_wide_read(const unsigned char *src, unsigned char ch,
   size_t cur = 0;
 
   // Step 1: read 1 byte at a time to align to block size
-  for (; reinterpret_cast<uintptr_t>(char_ptr) % sizeof(Word) != 0 && cur < n;
-       ++char_ptr, ++cur) {
+  for (; cur < n && reinterpret_cast<uintptr_t>(char_ptr) % sizeof(Word) != 0;
+       ++cur, ++char_ptr) {
     if (*char_ptr == ch)
       return const_cast<unsigned char *>(char_ptr);
   }
@@ -136,18 +136,18 @@ find_first_character_wide_read(const unsigned char *src, unsigned char ch,
   const Word ch_mask = repeat_byte<Word>(ch);
 
   // Step 2: read blocks
-  for (const Word *block_ptr = reinterpret_cast<const Word *>(char_ptr);
-       !has_zeroes<Word>((*block_ptr) ^ ch_mask) && cur < n;
-       ++block_ptr, cur += sizeof(Word)) {
-    char_ptr = reinterpret_cast<const unsigned char *>(block_ptr);
-  }
+  const Word *block_ptr = reinterpret_cast<const Word *>(char_ptr);
+  for (; cur < n && !has_zeroes<Word>((*block_ptr) ^ ch_mask);
+       cur += sizeof(Word), ++block_ptr)
+    ;
+  char_ptr = reinterpret_cast<const unsigned char *>(block_ptr);
 
   // Step 3: find the match in the block
-  for (; *char_ptr != ch && cur < n; ++char_ptr, ++cur) {
+  for (; cur < n && *char_ptr != ch; ++cur, ++char_ptr) {
     ;
   }
 
-  if (*char_ptr != ch || cur >= n)
+  if (cur >= n || *char_ptr != ch)
     return static_cast<void *>(nullptr);
 
   return const_cast<unsigned char *>(char_ptr);

diff  --git a/libc/test/src/string/memchr_test.cpp b/libc/test/src/string/memchr_test.cpp
index ede841118fe03..a92c5fe80be98 100644
--- a/libc/test/src/string/memchr_test.cpp
+++ b/libc/test/src/string/memchr_test.cpp
@@ -21,6 +21,11 @@ const char *call_memchr(const void *src, int c, size_t size) {
   return reinterpret_cast<const char *>(LIBC_NAMESPACE::memchr(src, c, size));
 }
 
+TEST(LlvmLibcMemChrTest, WideReadMultiIteration) {
+  const char *src = "abcdefghijklmnopqrst$\n";
+  ASSERT_STREQ(call_memchr(src, '$', 22), "$\n");
+}
+
 TEST(LlvmLibcMemChrTest, FindsCharacterAfterNullTerminator) {
   // memchr should continue searching after a null terminator.
   const size_t size = 5;


        


More information about the libc-commits mailing list