[libc-commits] [libc] Create a poor-developer's msan for libc wide read functions. (PR #170586)
via libc-commits
libc-commits at lists.llvm.org
Wed Jan 14 09:59:27 PST 2026
https://github.com/Sterling-Augustine updated https://github.com/llvm/llvm-project/pull/170586
>From 33b3c44babbb8c888864f1be6b824b624d3370f4 Mon Sep 17 00:00:00 2001
From: Sterling Augustine <saugustine at google.com>
Date: Wed, 3 Dec 2025 16:33:47 -0800
Subject: [PATCH 1/5] Create a poor-developer's msan for libc wide read
functions.
Most libcs optimize functions like strlen by reading in chunks larger
than a single character. As part of "the implementation", they can
legally do this as long as they are careful not to read invalid
memory.
However, such tricks prevents those functions from being tested under
the various sanitizers.
This PR creates a test framework that can report when one of these
functions read or write in an invalid way without using the
sanitizers.
---
libc/test/src/strings/CMakeLists.txt | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/libc/test/src/strings/CMakeLists.txt b/libc/test/src/strings/CMakeLists.txt
index 5f70dc024f6ce..0ccd0dc302943 100644
--- a/libc/test/src/strings/CMakeLists.txt
+++ b/libc/test/src/strings/CMakeLists.txt
@@ -110,5 +110,15 @@ add_libc_test(
libc.src.strings.strncasecmp_l
)
+add_libc_test(
+ wide_read_memory_test
+ SUITE
+ libc-strings-tests
+ SRCS
+ wide_read_memory_test.cpp
+ DEPENDS
+ libc.src.string.strlen
+)
+
add_libc_multi_impl_test(bcmp libc-strings-tests SRCS bcmp_test.cpp)
add_libc_multi_impl_test(bzero libc-strings-tests SRCS bzero_test.cpp)
>From 498391a2406b4cdda0df9093fe85151c4813ff97 Mon Sep 17 00:00:00 2001
From: Sterling Augustine <saugustine at google.com>
Date: Thu, 4 Dec 2025 09:47:32 -0800
Subject: [PATCH 2/5] Add missing file.
---
.../src/strings/wide_read_memory_test.cpp | 98 +++++++++++++++++++
1 file changed, 98 insertions(+)
create mode 100644 libc/test/src/strings/wide_read_memory_test.cpp
diff --git a/libc/test/src/strings/wide_read_memory_test.cpp b/libc/test/src/strings/wide_read_memory_test.cpp
new file mode 100644
index 0000000000000..e353d49dd864e
--- /dev/null
+++ b/libc/test/src/strings/wide_read_memory_test.cpp
@@ -0,0 +1,98 @@
+// For performance, some vector-based libc functions read data outside of, but
+// adjacent to, the input address. For example, string_length can read both
+// before and after the data in its src parameter. As part of the
+// implementation, it is allowed to do this. However, the code must take care
+// to avoid address errors. The sanitizers can't distinguish between "the
+// implementation" and user-code, and so report an error. Therefore we can't use
+// them to check if functions like thees have memory errors.
+//
+// This test uses mprotect to simulate address sanitization. Tests that read too
+// far outside data will segfault.
+//
+// It creates three adjacent pages in memory. The outer two are mprotected
+// unreadable, the middle usable normally. By placing test data at the edges
+// between the middle page and the others, we can test for bad accesses.
+
+#include <cstddef>
+#include <type_traits>
+#include <vector>
+
+#include <assert.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "src/string/string_utils.h"
+#include "test/UnitTest/MemoryMatcher.h"
+#include "test/UnitTest/Test.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+
+class LlvmLibcWideAccessMemoryTest : public testing::Test {
+ char *page0_;
+ char *page1_;
+ char *page2_;
+ size_t page_size;
+
+ public:
+ void SetUp() override {
+ page_size = getpagesize();
+ page0_ =
+ static_cast<char *>(mmap(nullptr, page_size * 3, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
+ ASSERT_NE(static_cast<void *>(page0_), MAP_FAILED);
+ page1_ = page0_ + page_size;
+ page2_ = page1_ + page_size;
+ mprotect(page0_, page_size, PROT_NONE);
+ mprotect(page2_, page_size, PROT_NONE);
+ }
+
+ void TearDown() override { munmap(page0_, page_size * 3); }
+
+ // So we don't depend on system memcpy, which may itself be under test.
+ void BasicMemCopy(char *dst, const char *src, size_t len) {
+ while (len--)
+ *dst++ = *src++;
+ }
+
+ // Repeatedly runs "func" on copies of the data in "buf", each progressively
+ // closer to the boundary of valid memory. Test will segfault if function
+ // under test examines invalid memory.
+ //
+ // Func should test the function in question just as normal. Recommend making
+ // the amount of data just over 1.5k, which guarantees a wind-up, multiple
+ // iterations of the inner loop, and a wind-down, even on systems with
+ // 512-byte arrays. The termination condition, eg, end-of string or character
+ // being searched for, should be near the end of the data.
+ template <typename TestFunc>
+ void TestMemoryAccess(const std::vector<char>& buf, TestFunc func) {
+ // Run func on data near the start boundary of valid memory.
+ for (unsigned long offset = 0;
+ offset < std::alignment_of<std::max_align_t>::value; ++offset) {
+ char *test_addr = page1_ + offset;
+ BasicMemCopy(test_addr, buf.data(), buf.size());
+ func(test_addr);
+ }
+ // Run func on data near the end boundary of valid memory.
+ for (unsigned long offset = 0;
+ offset < std::alignment_of<std::max_align_t>::value; ++offset) {
+ char *test_addr = page2_ - buf.size() - offset - 1;
+ assert(test_addr + buf.size() < page2_);
+ BasicMemCopy(test_addr, buf.data(), buf.size());
+ func(test_addr);
+ }
+ }
+};
+
+TEST_F(LlvmLibcWideAccessMemoryTest, StringLength) {
+ // 1.5 k long vector of a's.
+ std::vector<char> buf(1536, 'a');
+ // Make sure it is null terminated.
+ buf.push_back('\0');
+ this->TestMemoryAccess(buf, [this, buf](const char* test_data) {
+ // -1 for the null character.
+ ASSERT_EQ(internal::string_length(test_data), size_t(buf.size() - 1));
+ });
+}
+
+}
>From 54152258bdb25ef2da4d5fb5b1b1d26788f1367f Mon Sep 17 00:00:00 2001
From: Sterling Augustine <saugustine at google.com>
Date: Thu, 4 Dec 2025 10:35:37 -0800
Subject: [PATCH 3/5] Fix formatting.
---
libc/test/src/strings/wide_read_memory_test.cpp | 15 +++++++--------
1 file changed, 7 insertions(+), 8 deletions(-)
diff --git a/libc/test/src/strings/wide_read_memory_test.cpp b/libc/test/src/strings/wide_read_memory_test.cpp
index e353d49dd864e..a7694dad570ab 100644
--- a/libc/test/src/strings/wide_read_memory_test.cpp
+++ b/libc/test/src/strings/wide_read_memory_test.cpp
@@ -27,14 +27,13 @@
namespace LIBC_NAMESPACE_DECL {
-
class LlvmLibcWideAccessMemoryTest : public testing::Test {
char *page0_;
char *page1_;
char *page2_;
size_t page_size;
- public:
+public:
void SetUp() override {
page_size = getpagesize();
page0_ =
@@ -57,15 +56,15 @@ class LlvmLibcWideAccessMemoryTest : public testing::Test {
// Repeatedly runs "func" on copies of the data in "buf", each progressively
// closer to the boundary of valid memory. Test will segfault if function
- // under test examines invalid memory.
+ // under test accesses invalid memory.
//
// Func should test the function in question just as normal. Recommend making
// the amount of data just over 1.5k, which guarantees a wind-up, multiple
// iterations of the inner loop, and a wind-down, even on systems with
- // 512-byte arrays. The termination condition, eg, end-of string or character
+ // 512-byte vectors. The termination condition, eg, end-of string or character
// being searched for, should be near the end of the data.
template <typename TestFunc>
- void TestMemoryAccess(const std::vector<char>& buf, TestFunc func) {
+ void TestMemoryAccess(const std::vector<char> &buf, TestFunc func) {
// Run func on data near the start boundary of valid memory.
for (unsigned long offset = 0;
offset < std::alignment_of<std::max_align_t>::value; ++offset) {
@@ -76,7 +75,7 @@ class LlvmLibcWideAccessMemoryTest : public testing::Test {
// Run func on data near the end boundary of valid memory.
for (unsigned long offset = 0;
offset < std::alignment_of<std::max_align_t>::value; ++offset) {
- char *test_addr = page2_ - buf.size() - offset - 1;
+ char *test_addr = page2_ - buf.size() - offset - 1;
assert(test_addr + buf.size() < page2_);
BasicMemCopy(test_addr, buf.data(), buf.size());
func(test_addr);
@@ -89,10 +88,10 @@ TEST_F(LlvmLibcWideAccessMemoryTest, StringLength) {
std::vector<char> buf(1536, 'a');
// Make sure it is null terminated.
buf.push_back('\0');
- this->TestMemoryAccess(buf, [this, buf](const char* test_data) {
+ this->TestMemoryAccess(buf, [this, buf](const char *test_data) {
// -1 for the null character.
ASSERT_EQ(internal::string_length(test_data), size_t(buf.size() - 1));
});
}
-}
+} // namespace LIBC_NAMESPACE_DECL
>From 12cfaeff85bb5235d5524572e28a397deb1e24d0 Mon Sep 17 00:00:00 2001
From: Sterling Augustine <saugustine at google.com>
Date: Tue, 16 Dec 2025 09:52:32 -0800
Subject: [PATCH 4/5] Address comments.
---
.../src/strings/wide_read_memory_test.cpp | 30 ++++++++-----------
1 file changed, 12 insertions(+), 18 deletions(-)
diff --git a/libc/test/src/strings/wide_read_memory_test.cpp b/libc/test/src/strings/wide_read_memory_test.cpp
index a7694dad570ab..f772d821417f8 100644
--- a/libc/test/src/strings/wide_read_memory_test.cpp
+++ b/libc/test/src/strings/wide_read_memory_test.cpp
@@ -13,14 +13,14 @@
// unreadable, the middle usable normally. By placing test data at the edges
// between the middle page and the others, we can test for bad accesses.
+#include <assert.h>
#include <cstddef>
#include <type_traits>
-#include <vector>
-
-#include <assert.h>
-#include <sys/mman.h>
-#include <unistd.h>
+#include "src/unistd/getpagesize.h"
+#include "src/sys/mman/mmap.h"
+#include "src/sys/mman/munmap.h"
+#include "src/sys/mman/mprotect.h"
#include "src/string/string_utils.h"
#include "test/UnitTest/MemoryMatcher.h"
#include "test/UnitTest/Test.h"
@@ -35,24 +35,18 @@ class LlvmLibcWideAccessMemoryTest : public testing::Test {
public:
void SetUp() override {
- page_size = getpagesize();
- page0_ =
- static_cast<char *>(mmap(nullptr, page_size * 3, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
+ page_size = LIBC_NAMESPACE::getpagesize();
+ page0_ = static_cast<char *>(
+ LIBC_NAMESPACE::mmap(nullptr, page_size * 3, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
ASSERT_NE(static_cast<void *>(page0_), MAP_FAILED);
page1_ = page0_ + page_size;
page2_ = page1_ + page_size;
- mprotect(page0_, page_size, PROT_NONE);
- mprotect(page2_, page_size, PROT_NONE);
+ LIBC_NAMESPACE::mprotect(page0_, page_size, PROT_NONE);
+ LIBC_NAMESPACE::mprotect(page2_, page_size, PROT_NONE);
}
- void TearDown() override { munmap(page0_, page_size * 3); }
-
- // So we don't depend on system memcpy, which may itself be under test.
- void BasicMemCopy(char *dst, const char *src, size_t len) {
- while (len--)
- *dst++ = *src++;
- }
+ void TearDown() override { LIBC_NAMESPACE::munmap(page0_, page_size * 3); }
// Repeatedly runs "func" on copies of the data in "buf", each progressively
// closer to the boundary of valid memory. Test will segfault if function
>From 7faff32682f1918b700892c6e6743ebf21c2a870 Mon Sep 17 00:00:00 2001
From: Sterling Augustine <saugustine at google.com>
Date: Wed, 14 Jan 2026 09:58:47 -0800
Subject: [PATCH 5/5] Fix include order. Use internal versions of functions.
---
libc/test/src/strings/CMakeLists.txt | 5 ++++
.../src/strings/wide_read_memory_test.cpp | 30 +++++++++++--------
2 files changed, 22 insertions(+), 13 deletions(-)
diff --git a/libc/test/src/strings/CMakeLists.txt b/libc/test/src/strings/CMakeLists.txt
index 0ccd0dc302943..e3d6ec1f134d8 100644
--- a/libc/test/src/strings/CMakeLists.txt
+++ b/libc/test/src/strings/CMakeLists.txt
@@ -118,6 +118,11 @@ add_libc_test(
wide_read_memory_test.cpp
DEPENDS
libc.src.string.strlen
+ libc.src.sys.mman.mmap
+ libc.src.sys.mman.mprotect
+ libc.src.sys.mman.munmap
+ libc.src.unistd.linux.getpagesize
+ libc.src.__support.CPP.array
)
add_libc_multi_impl_test(bcmp libc-strings-tests SRCS bcmp_test.cpp)
diff --git a/libc/test/src/strings/wide_read_memory_test.cpp b/libc/test/src/strings/wide_read_memory_test.cpp
index f772d821417f8..06d8289812303 100644
--- a/libc/test/src/strings/wide_read_memory_test.cpp
+++ b/libc/test/src/strings/wide_read_memory_test.cpp
@@ -13,20 +13,23 @@
// unreadable, the middle usable normally. By placing test data at the edges
// between the middle page and the others, we can test for bad accesses.
-#include <assert.h>
#include <cstddef>
#include <type_traits>
-#include "src/unistd/getpagesize.h"
+#include "src/__support/CPP/array.h"
+#include "src/string/memory_utils/inline_memset.h"
+#include "src/string/string_utils.h"
#include "src/sys/mman/mmap.h"
-#include "src/sys/mman/munmap.h"
#include "src/sys/mman/mprotect.h"
-#include "src/string/string_utils.h"
+#include "src/sys/mman/munmap.h"
+#include "src/unistd/getpagesize.h"
#include "test/UnitTest/MemoryMatcher.h"
#include "test/UnitTest/Test.h"
namespace LIBC_NAMESPACE_DECL {
+using TwoKilobyteBuffer = cpp::array<char, 2048>;
+
class LlvmLibcWideAccessMemoryTest : public testing::Test {
char *page0_;
char *page1_;
@@ -53,25 +56,25 @@ class LlvmLibcWideAccessMemoryTest : public testing::Test {
// under test accesses invalid memory.
//
// Func should test the function in question just as normal. Recommend making
- // the amount of data just over 1.5k, which guarantees a wind-up, multiple
+ // the amount of test data at least 1.5k, which guarantees a wind-up, multiple
// iterations of the inner loop, and a wind-down, even on systems with
// 512-byte vectors. The termination condition, eg, end-of string or character
// being searched for, should be near the end of the data.
template <typename TestFunc>
- void TestMemoryAccess(const std::vector<char> &buf, TestFunc func) {
+ void TestMemoryAccess(const TwoKilobyteBuffer &buf, TestFunc func) {
// Run func on data near the start boundary of valid memory.
for (unsigned long offset = 0;
- offset < std::alignment_of<std::max_align_t>::value; ++offset) {
+ offset < std::alignment_of<max_align_t>::value; ++offset) {
char *test_addr = page1_ + offset;
- BasicMemCopy(test_addr, buf.data(), buf.size());
+ inline_memcpy(test_addr, buf.data(), buf.size());
func(test_addr);
}
// Run func on data near the end boundary of valid memory.
for (unsigned long offset = 0;
- offset < std::alignment_of<std::max_align_t>::value; ++offset) {
+ offset < std::alignment_of<max_align_t>::value; ++offset) {
char *test_addr = page2_ - buf.size() - offset - 1;
- assert(test_addr + buf.size() < page2_);
- BasicMemCopy(test_addr, buf.data(), buf.size());
+ ASSERT_LE(test_addr + buf.size(), page2_);
+ inline_memcpy(test_addr, buf.data(), buf.size());
func(test_addr);
}
}
@@ -79,9 +82,10 @@ class LlvmLibcWideAccessMemoryTest : public testing::Test {
TEST_F(LlvmLibcWideAccessMemoryTest, StringLength) {
// 1.5 k long vector of a's.
- std::vector<char> buf(1536, 'a');
+ TwoKilobyteBuffer buf;
+ inline_memset(buf.data(), 'a', buf.size());
// Make sure it is null terminated.
- buf.push_back('\0');
+ buf[buf.size() - 1] = '\0';
this->TestMemoryAccess(buf, [this, buf](const char *test_data) {
// -1 for the null character.
ASSERT_EQ(internal::string_length(test_data), size_t(buf.size() - 1));
More information about the libc-commits
mailing list