[libc-commits] [libc] [libc] Add fuzzers for `memcpy` and `memset` (PR #90591)
Guillaume Chatelet via libc-commits
libc-commits at lists.llvm.org
Tue Apr 30 04:11:05 PDT 2024
https://github.com/gchatelet updated https://github.com/llvm/llvm-project/pull/90591
>From 83ab25f7971a008ea9bfdae0f01a8dd141caab6a Mon Sep 17 00:00:00 2001
From: Guillaume Chatelet <gchatelet at google.com>
Date: Tue, 30 Apr 2024 10:55:58 +0000
Subject: [PATCH 1/4] [libc] Add fuzzers for `memcpy` and `memset`
---
libc/fuzzing/string/CMakeLists.txt | 16 ++++++
libc/fuzzing/string/memcpy_fuzz.cpp | 55 ++++++++++++++++++++
libc/fuzzing/string/memset_fuzz.cpp | 44 ++++++++++++++++
libc/fuzzing/string/protected_pages.h | 74 +++++++++++++++++++++++++++
4 files changed, 189 insertions(+)
create mode 100644 libc/fuzzing/string/memcpy_fuzz.cpp
create mode 100644 libc/fuzzing/string/memset_fuzz.cpp
create mode 100644 libc/fuzzing/string/protected_pages.h
diff --git a/libc/fuzzing/string/CMakeLists.txt b/libc/fuzzing/string/CMakeLists.txt
index 9dd4fceee3b596..6147ebfcb5013f 100644
--- a/libc/fuzzing/string/CMakeLists.txt
+++ b/libc/fuzzing/string/CMakeLists.txt
@@ -25,6 +25,22 @@ add_libc_fuzzer(
libc.src.string.strlen
)
+add_libc_fuzzer(
+ memcpy_fuzz
+ SRCS
+ memcpy_fuzz.cpp
+ DEPENDS
+ libc.src.string.memcpy
+)
+
+add_libc_fuzzer(
+ memset_fuzz
+ SRCS
+ memset_fuzz.cpp
+ DEPENDS
+ libc.src.string.memset
+)
+
add_libc_fuzzer(
memcmp_fuzz
SRCS
diff --git a/libc/fuzzing/string/memcpy_fuzz.cpp b/libc/fuzzing/string/memcpy_fuzz.cpp
new file mode 100644
index 00000000000000..f9d98613cd7e23
--- /dev/null
+++ b/libc/fuzzing/string/memcpy_fuzz.cpp
@@ -0,0 +1,55 @@
+//===-- memcpy_fuzz.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// Fuzzing test for llvm-libc memcpy implementation.
+///
+//===----------------------------------------------------------------------===//
+#include "protected_pages.h"
+#include "src/string/memcpy.h"
+#include <stddef.h> // size_t
+#include <stdint.h> // uint8_t
+#include <stdlib.h> // rand
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t data_size) {
+ static constexpr size_t MAX_SIZE = 1024;
+ static ProtectedPages regions;
+ static const Page write_buffer = regions.GetPageA().WithAccess(PROT_WRITE);
+ static const Page read_buffer = [&]() {
+ // We fetch page B in write mode.
+ auto region = regions.GetPageB().WithAccess(PROT_WRITE);
+ // And fill it with random numbers.
+ for (size_t i = 0; i < region.page_size; ++i)
+ region.page_ptr[i] = rand();
+ // Then return it in read mode.
+ return region.WithAccess(PROT_READ);
+ }();
+ // We fill 'size' with data coming from lib_fuzzer, this limits exploration to
+ // 2 bytes.
+ uint16_t size = 0;
+ if (data_size != sizeof(size))
+ return 0;
+ __builtin_memcpy(&size, data, sizeof(size));
+ if (size >= MAX_SIZE || size >= GetPageSize())
+ return 0;
+ // We cross-check the function from two sources and two destinations.
+ // The first of them (bottom) is always page aligned.
+ // The second one (top) is not necessarily aligned.
+ // Both sources and destinations are checked for out of bound accesses.
+ const uint8_t *sources[2] = {read_buffer.bottom(size), read_buffer.top(size)};
+ uint8_t *destinations[2] = {write_buffer.bottom(size),
+ write_buffer.top(size)};
+ for (const uint8_t *src : sources) {
+ for (uint8_t *dst : destinations) {
+ LIBC_NAMESPACE::memcpy(dst, src, size);
+ for (size_t i = 0; i < size; ++i)
+ if (src[i] != dst[i])
+ __builtin_trap();
+ }
+ }
+ return 0;
+}
diff --git a/libc/fuzzing/string/memset_fuzz.cpp b/libc/fuzzing/string/memset_fuzz.cpp
new file mode 100644
index 00000000000000..445bb7b6fa2d6b
--- /dev/null
+++ b/libc/fuzzing/string/memset_fuzz.cpp
@@ -0,0 +1,44 @@
+//===-- memset_fuzz.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// Fuzzing test for llvm-libc memcset implementation.
+///
+//===----------------------------------------------------------------------===//
+#include "protected_pages.h"
+#include "src/string/memset.h"
+#include <stddef.h> // size_t
+#include <stdint.h> // uint8_t
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t data_size) {
+ static constexpr size_t MAX_SIZE = 1024;
+ static ProtectedPages regions;
+ static const Page write_buffer = regions.GetPageA().WithAccess(PROT_WRITE);
+ // We fill 'size' with data coming from lib_fuzzer, this limits exploration to
+ // 2 bytes.
+ uint16_t size = 0;
+ uint8_t fill_char = 0;
+ if (data_size != sizeof(size) + sizeof(fill_char))
+ return 0;
+ __builtin_memcpy(&size, data, sizeof(size));
+ __builtin_memcpy(&fill_char, data + sizeof(size), sizeof(fill_char));
+ if (size >= MAX_SIZE || size >= GetPageSize())
+ return 0;
+ // We cross-check the function from two sources and two destinations.
+ // The first of them (bottom) is always page aligned.
+ // The second one (top) is not necessarily aligned.
+ // Both sources and destinations are checked for out of bound accesses.
+ uint8_t *destinations[2] = {write_buffer.bottom(size),
+ write_buffer.top(size)};
+ for (uint8_t *dst : destinations) {
+ LIBC_NAMESPACE::memset(dst, fill_char, size);
+ for (size_t i = 0; i < size; ++i)
+ if (dst[i] != fill_char)
+ __builtin_trap();
+ }
+ return 0;
+}
diff --git a/libc/fuzzing/string/protected_pages.h b/libc/fuzzing/string/protected_pages.h
new file mode 100644
index 00000000000000..fa09f9ac7947f1
--- /dev/null
+++ b/libc/fuzzing/string/protected_pages.h
@@ -0,0 +1,74 @@
+#ifndef LIBC_FUZZING_STRING_PROTECTED_PAGES_H
+#define LIBC_FUZZING_STRING_PROTECTED_PAGES_H
+
+#include <stddef.h> // size_t
+#include <stdint.h> // uint8_t
+#include <sys/mman.h> // mmap, munmap
+#include <unistd.h> // sysconf, _SC_PAGESIZE
+
+// Returns mmap page size.
+size_t GetPageSize() { return sysconf(_SC_PAGESIZE); }
+
+// Represents a page of memory which access can be configured throught the
+// 'WithAccess' function. Accessing data above or below this page will trap as
+// it is sandwiched between two pages with no read / write access.
+struct Page {
+ // Returns an aligned pointer that can be accessed up to page_size. Accessing
+ // data at ptr[-1] will fault.
+ uint8_t *bottom(size_t size) const {
+ if (size >= page_size)
+ __builtin_trap();
+ return page_ptr;
+ }
+ // Returns a pointer to a buffer that can be accessed up to size. Accessing
+ // data at ptr[size] will fault.
+ uint8_t *top(size_t size) const { return page_ptr + page_size - size; }
+
+ Page &WithAccess(int protection) {
+ if (mprotect(page_ptr, page_size, protection) != 0)
+ __builtin_trap();
+ return *this;
+ }
+
+ const size_t page_size;
+ uint8_t *const page_ptr;
+};
+
+// Allocates 5 consecutive pages that will trap if accessed.
+// +-----------------+
+// | page 0 (FAULT) |
+// | page 1 (CUSTOM) |
+// | page 2 (FAULT) |
+// | page 3 (CUSTOM) |
+// | page 4 (FAULT) |
+// +-----------------+
+// The pages 1 and 3 can be retrieved as with 'GetPageA' / 'GetPageB' and their
+// accesses can be customized through the 'WithAccess' function.
+struct ProtectedPages {
+ static constexpr size_t PAGES = 5;
+
+ ProtectedPages()
+ : page_size(GetPageSize()),
+ ptr(mmap(/*address*/ nullptr, /*length*/ PAGES * page_size,
+ /*protection*/ PROT_NONE,
+ /*flags*/ MAP_PRIVATE | MAP_ANONYMOUS, /*fd*/ -1,
+ /*offset*/ 0)) {
+ if (reinterpret_cast<intptr_t>(ptr) == -1)
+ __builtin_trap();
+ }
+ ~ProtectedPages() { munmap(ptr, PAGES * page_size); }
+
+ Page GetPageA() const { return Page{page_size, page<1>()}; }
+ Page GetPageB() const { return Page{page_size, page<3>()}; }
+
+private:
+ template <size_t index> uint8_t *page() const {
+ static_assert(index < PAGES);
+ return static_cast<uint8_t *>(ptr) + (index * page_size);
+ }
+
+ const size_t page_size;
+ void *const ptr = nullptr;
+};
+
+#endif // LIBC_FUZZING_STRING_PROTECTED_PAGES_H
>From 768448b35a009af0ec2987b414af438cd72f9196 Mon Sep 17 00:00:00 2001
From: Guillaume Chatelet <gchatelet at google.com>
Date: Tue, 30 Apr 2024 11:00:11 +0000
Subject: [PATCH 2/4] rename `regions` to `pages`
---
libc/fuzzing/string/memcpy_fuzz.cpp | 12 ++++++------
libc/fuzzing/string/memset_fuzz.cpp | 4 ++--
2 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/libc/fuzzing/string/memcpy_fuzz.cpp b/libc/fuzzing/string/memcpy_fuzz.cpp
index f9d98613cd7e23..fea6eb8909bb6e 100644
--- a/libc/fuzzing/string/memcpy_fuzz.cpp
+++ b/libc/fuzzing/string/memcpy_fuzz.cpp
@@ -17,16 +17,16 @@
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t data_size) {
static constexpr size_t MAX_SIZE = 1024;
- static ProtectedPages regions;
- static const Page write_buffer = regions.GetPageA().WithAccess(PROT_WRITE);
+ static ProtectedPages pages;
+ static const Page write_buffer = pages.GetPageA().WithAccess(PROT_WRITE);
static const Page read_buffer = [&]() {
// We fetch page B in write mode.
- auto region = regions.GetPageB().WithAccess(PROT_WRITE);
+ auto page = pages.GetPageB().WithAccess(PROT_WRITE);
// And fill it with random numbers.
- for (size_t i = 0; i < region.page_size; ++i)
- region.page_ptr[i] = rand();
+ for (size_t i = 0; i < page.page_size; ++i)
+ page.page_ptr[i] = rand();
// Then return it in read mode.
- return region.WithAccess(PROT_READ);
+ return page.WithAccess(PROT_READ);
}();
// We fill 'size' with data coming from lib_fuzzer, this limits exploration to
// 2 bytes.
diff --git a/libc/fuzzing/string/memset_fuzz.cpp b/libc/fuzzing/string/memset_fuzz.cpp
index 445bb7b6fa2d6b..af55841d3d449a 100644
--- a/libc/fuzzing/string/memset_fuzz.cpp
+++ b/libc/fuzzing/string/memset_fuzz.cpp
@@ -16,8 +16,8 @@
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t data_size) {
static constexpr size_t MAX_SIZE = 1024;
- static ProtectedPages regions;
- static const Page write_buffer = regions.GetPageA().WithAccess(PROT_WRITE);
+ static ProtectedPages pages;
+ static const Page write_buffer = pages.GetPageA().WithAccess(PROT_WRITE);
// We fill 'size' with data coming from lib_fuzzer, this limits exploration to
// 2 bytes.
uint16_t size = 0;
>From 8fa3e161e7db4c0195e82e43dea08e557bde8e50 Mon Sep 17 00:00:00 2001
From: Guillaume Chatelet <gchatelet at google.com>
Date: Tue, 30 Apr 2024 11:05:50 +0000
Subject: [PATCH 3/4] Rephrase comments
---
libc/fuzzing/string/memcpy_fuzz.cpp | 9 +++++----
libc/fuzzing/string/memset_fuzz.cpp | 13 +++++++------
2 files changed, 12 insertions(+), 10 deletions(-)
diff --git a/libc/fuzzing/string/memcpy_fuzz.cpp b/libc/fuzzing/string/memcpy_fuzz.cpp
index fea6eb8909bb6e..b22f55e07f0d0c 100644
--- a/libc/fuzzing/string/memcpy_fuzz.cpp
+++ b/libc/fuzzing/string/memcpy_fuzz.cpp
@@ -36,10 +36,11 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t data_size) {
__builtin_memcpy(&size, data, sizeof(size));
if (size >= MAX_SIZE || size >= GetPageSize())
return 0;
- // We cross-check the function from two sources and two destinations.
- // The first of them (bottom) is always page aligned.
- // The second one (top) is not necessarily aligned.
- // Both sources and destinations are checked for out of bound accesses.
+ // We cross-check the function with two sources and two destinations.
+ // - The first of them (bottom) is always page aligned and faults when
+ // accessing bytes before it.
+ // - The second one (top) is not necessarily aligned and faults when
+ // accessing bytes after it.
const uint8_t *sources[2] = {read_buffer.bottom(size), read_buffer.top(size)};
uint8_t *destinations[2] = {write_buffer.bottom(size),
write_buffer.top(size)};
diff --git a/libc/fuzzing/string/memset_fuzz.cpp b/libc/fuzzing/string/memset_fuzz.cpp
index af55841d3d449a..7bcadf8fd3bc84 100644
--- a/libc/fuzzing/string/memset_fuzz.cpp
+++ b/libc/fuzzing/string/memset_fuzz.cpp
@@ -18,8 +18,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t data_size) {
static constexpr size_t MAX_SIZE = 1024;
static ProtectedPages pages;
static const Page write_buffer = pages.GetPageA().WithAccess(PROT_WRITE);
- // We fill 'size' with data coming from lib_fuzzer, this limits exploration to
- // 2 bytes.
+ // We fill 'size' and 'fill_char' with data coming from lib_fuzzer, this
+ // limits exploration to 3 bytes.
uint16_t size = 0;
uint8_t fill_char = 0;
if (data_size != sizeof(size) + sizeof(fill_char))
@@ -28,10 +28,11 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t data_size) {
__builtin_memcpy(&fill_char, data + sizeof(size), sizeof(fill_char));
if (size >= MAX_SIZE || size >= GetPageSize())
return 0;
- // We cross-check the function from two sources and two destinations.
- // The first of them (bottom) is always page aligned.
- // The second one (top) is not necessarily aligned.
- // Both sources and destinations are checked for out of bound accesses.
+ // We cross-check the function with two destinations.
+ // - The first of them (bottom) is always page aligned and faults when
+ // accessing bytes before it.
+ // - The second one (top) is not necessarily aligned and faults when accessing
+ // bytes after it.
uint8_t *destinations[2] = {write_buffer.bottom(size),
write_buffer.top(size)};
for (uint8_t *dst : destinations) {
>From 6c9e5e1099182ef3b58f9d98606ee992a30c37fb Mon Sep 17 00:00:00 2001
From: Guillaume Chatelet <gchatelet at google.com>
Date: Tue, 30 Apr 2024 11:10:49 +0000
Subject: [PATCH 4/4] Update protected_pages header and documentation
---
libc/fuzzing/string/protected_pages.h | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/libc/fuzzing/string/protected_pages.h b/libc/fuzzing/string/protected_pages.h
index fa09f9ac7947f1..67b88074b4f929 100644
--- a/libc/fuzzing/string/protected_pages.h
+++ b/libc/fuzzing/string/protected_pages.h
@@ -1,3 +1,15 @@
+//===-- protected_pages.h -------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// This file provides protected pages that fault when accessing prior or past
+// it. This is useful to check memory functions that must not access outside of
+// the provided size limited buffer.
+//===----------------------------------------------------------------------===//
+
#ifndef LIBC_FUZZING_STRING_PROTECTED_PAGES_H
#define LIBC_FUZZING_STRING_PROTECTED_PAGES_H
More information about the libc-commits
mailing list