[libc-commits] [libc] [libc] Fix stale char_ptr for find_first_character_wide read (PR #166594)

via libc-commits libc-commits at lists.llvm.org
Wed Nov 5 12:54:26 PST 2025


https://github.com/Sterling-Augustine updated https://github.com/llvm/llvm-project/pull/166594

>From 2c3cac28ab48576def27c32d75ca70fc96c81784 Mon Sep 17 00:00:00 2001
From: Sterling Augustine <saugustine at google.com>
Date: Wed, 5 Nov 2025 08:53:33 -0800
Subject: [PATCH 1/3] Fix stale char_ptr for find_first_character_wide read

On exit from the loop, char_ptr had not been updated to match
block_ptr, resulting in erroneous results. Moving all updates out of
the loop fixes that.
---
 libc/src/string/string_utils.h       | 10 +++++-----
 libc/test/src/string/memchr_test.cpp |  5 +++++
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index 7feef56fb3676..c9a720bef98a0 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -136,11 +136,11 @@ find_first_character_wide_read(const unsigned char *src, unsigned char ch,
   const Word ch_mask = repeat_byte<Word>(ch);
 
   // Step 2: read blocks
-  for (const Word *block_ptr = reinterpret_cast<const Word *>(char_ptr);
-       !has_zeroes<Word>((*block_ptr) ^ ch_mask) && cur < n;
-       ++block_ptr, cur += sizeof(Word)) {
-    char_ptr = reinterpret_cast<const unsigned char *>(block_ptr);
-  }
+  const Word *block_ptr = reinterpret_cast<const Word *>(char_ptr);
+  for (; !has_zeroes<Word>((*block_ptr) ^ ch_mask) && cur < n;
+       ++block_ptr, cur += sizeof(Word))
+    ;
+  char_ptr = reinterpret_cast<const unsigned char *>(block_ptr);
 
   // Step 3: find the match in the block
   for (; *char_ptr != ch && cur < n; ++char_ptr, ++cur) {
diff --git a/libc/test/src/string/memchr_test.cpp b/libc/test/src/string/memchr_test.cpp
index ede841118fe03..1db5ecaed40cd 100644
--- a/libc/test/src/string/memchr_test.cpp
+++ b/libc/test/src/string/memchr_test.cpp
@@ -21,6 +21,11 @@ const char *call_memchr(const void *src, int c, size_t size) {
   return reinterpret_cast<const char *>(LIBC_NAMESPACE::memchr(src, c, size));
 }
 
+TEST(LlvmLibcMemChrTest, FromProtoC) {
+  const char *src = "protobuf_cpp_version$\n";
+  ASSERT_STREQ(call_memchr(src, '$', 22), "$\n");
+}
+
 TEST(LlvmLibcMemChrTest, FindsCharacterAfterNullTerminator) {
   // memchr should continue searching after a null terminator.
   const size_t size = 5;

>From 1f8ef4b4e8f490ea1ee828d4ec529513002e51a0 Mon Sep 17 00:00:00 2001
From: Sterling Augustine <saugustine at google.com>
Date: Wed, 5 Nov 2025 09:44:03 -0800
Subject: [PATCH 2/3] Improve test.

---
 libc/test/src/string/memchr_test.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libc/test/src/string/memchr_test.cpp b/libc/test/src/string/memchr_test.cpp
index 1db5ecaed40cd..a92c5fe80be98 100644
--- a/libc/test/src/string/memchr_test.cpp
+++ b/libc/test/src/string/memchr_test.cpp
@@ -21,8 +21,8 @@ const char *call_memchr(const void *src, int c, size_t size) {
   return reinterpret_cast<const char *>(LIBC_NAMESPACE::memchr(src, c, size));
 }
 
-TEST(LlvmLibcMemChrTest, FromProtoC) {
-  const char *src = "protobuf_cpp_version$\n";
+TEST(LlvmLibcMemChrTest, WideReadMultiIteration) {
+  const char *src = "abcdefghijklmnopqrst$\n";
   ASSERT_STREQ(call_memchr(src, '$', 22), "$\n");
 }
 

>From e55a8138f49f932c5547fbcd7594bc4267cf4147 Mon Sep 17 00:00:00 2001
From: Sterling Augustine <saugustine at google.com>
Date: Wed, 5 Nov 2025 12:53:16 -0800
Subject: [PATCH 3/3] [libc] Do bounds-check before any dereference

---
 libc/src/string/string_utils.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index c9a720bef98a0..cbce62ead0328 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -127,8 +127,8 @@ find_first_character_wide_read(const unsigned char *src, unsigned char ch,
   size_t cur = 0;
 
   // Step 1: read 1 byte at a time to align to block size
-  for (; reinterpret_cast<uintptr_t>(char_ptr) % sizeof(Word) != 0 && cur < n;
-       ++char_ptr, ++cur) {
+  for (; cur < n && reinterpret_cast<uintptr_t>(char_ptr) % sizeof(Word) != 0;
+       ++cur, ++char_ptr) {
     if (*char_ptr == ch)
       return const_cast<unsigned char *>(char_ptr);
   }
@@ -137,17 +137,17 @@ find_first_character_wide_read(const unsigned char *src, unsigned char ch,
 
   // Step 2: read blocks
   const Word *block_ptr = reinterpret_cast<const Word *>(char_ptr);
-  for (; !has_zeroes<Word>((*block_ptr) ^ ch_mask) && cur < n;
-       ++block_ptr, cur += sizeof(Word))
+  for (; cur < n && !has_zeroes<Word>((*block_ptr) ^ ch_mask);
+       cur += sizeof(Word), ++block_ptr)
     ;
   char_ptr = reinterpret_cast<const unsigned char *>(block_ptr);
 
   // Step 3: find the match in the block
-  for (; *char_ptr != ch && cur < n; ++char_ptr, ++cur) {
+  for (; cur < n && *char_ptr != ch; ++cur, ++char_ptr) {
     ;
   }
 
-  if (*char_ptr != ch || cur >= n)
+  if (cur >= n || *char_ptr != ch)
     return static_cast<void *>(nullptr);
 
   return const_cast<unsigned char *>(char_ptr);



More information about the libc-commits mailing list