[libc-commits] [libc] [libc] Add fuzzers for `memcpy` and `memset` (PR #90591)

Guillaume Chatelet via libc-commits libc-commits at lists.llvm.org
Tue May 14 02:48:07 PDT 2024


https://github.com/gchatelet updated https://github.com/llvm/llvm-project/pull/90591

>From 83ab25f7971a008ea9bfdae0f01a8dd141caab6a Mon Sep 17 00:00:00 2001
From: Guillaume Chatelet <gchatelet at google.com>
Date: Tue, 30 Apr 2024 10:55:58 +0000
Subject: [PATCH 1/5] [libc] Add fuzzers for `memcpy` and `memset`

---
 libc/fuzzing/string/CMakeLists.txt    | 16 ++++++
 libc/fuzzing/string/memcpy_fuzz.cpp   | 55 ++++++++++++++++++++
 libc/fuzzing/string/memset_fuzz.cpp   | 44 ++++++++++++++++
 libc/fuzzing/string/protected_pages.h | 74 +++++++++++++++++++++++++++
 4 files changed, 189 insertions(+)
 create mode 100644 libc/fuzzing/string/memcpy_fuzz.cpp
 create mode 100644 libc/fuzzing/string/memset_fuzz.cpp
 create mode 100644 libc/fuzzing/string/protected_pages.h

diff --git a/libc/fuzzing/string/CMakeLists.txt b/libc/fuzzing/string/CMakeLists.txt
index 9dd4fceee3b59..6147ebfcb5013 100644
--- a/libc/fuzzing/string/CMakeLists.txt
+++ b/libc/fuzzing/string/CMakeLists.txt
@@ -25,6 +25,22 @@ add_libc_fuzzer(
     libc.src.string.strlen
 )
 
+add_libc_fuzzer(
+  memcpy_fuzz
+  SRCS
+    memcpy_fuzz.cpp
+  DEPENDS
+    libc.src.string.memcpy
+)
+
+add_libc_fuzzer(
+  memset_fuzz
+  SRCS
+    memset_fuzz.cpp
+  DEPENDS
+    libc.src.string.memset
+)
+
 add_libc_fuzzer(
   memcmp_fuzz
   SRCS
diff --git a/libc/fuzzing/string/memcpy_fuzz.cpp b/libc/fuzzing/string/memcpy_fuzz.cpp
new file mode 100644
index 0000000000000..f9d98613cd7e2
--- /dev/null
+++ b/libc/fuzzing/string/memcpy_fuzz.cpp
@@ -0,0 +1,55 @@
+//===-- memcpy_fuzz.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// Fuzzing test for llvm-libc memcpy implementation.
+///
+//===----------------------------------------------------------------------===//
+#include "protected_pages.h"
+#include "src/string/memcpy.h"
+#include <stddef.h> // size_t
+#include <stdint.h> // uint8_t
+#include <stdlib.h> // rand
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t data_size) {
+  static constexpr size_t MAX_SIZE = 1024;
+  static ProtectedPages regions;
+  static const Page write_buffer = regions.GetPageA().WithAccess(PROT_WRITE);
+  static const Page read_buffer = [&]() {
+    // We fetch page B in write mode.
+    auto region = regions.GetPageB().WithAccess(PROT_WRITE);
+    // And fill it with random numbers.
+    for (size_t i = 0; i < region.page_size; ++i)
+      region.page_ptr[i] = rand();
+    // Then return it in read mode.
+    return region.WithAccess(PROT_READ);
+  }();
+  // We fill 'size' with data coming from lib_fuzzer, this limits exploration to
+  // 2 bytes.
+  uint16_t size = 0;
+  if (data_size != sizeof(size))
+    return 0;
+  __builtin_memcpy(&size, data, sizeof(size));
+  if (size >= MAX_SIZE || size >= GetPageSize())
+    return 0;
+  // We cross-check the function from two sources and two destinations.
+  // The first of them (bottom) is always page aligned.
+  // The second one (top) is not necessarily aligned.
+  // Both sources and destinations are checked for out of bound accesses.
+  const uint8_t *sources[2] = {read_buffer.bottom(size), read_buffer.top(size)};
+  uint8_t *destinations[2] = {write_buffer.bottom(size),
+                              write_buffer.top(size)};
+  for (const uint8_t *src : sources) {
+    for (uint8_t *dst : destinations) {
+      LIBC_NAMESPACE::memcpy(dst, src, size);
+      for (size_t i = 0; i < size; ++i)
+        if (src[i] != dst[i])
+          __builtin_trap();
+    }
+  }
+  return 0;
+}
diff --git a/libc/fuzzing/string/memset_fuzz.cpp b/libc/fuzzing/string/memset_fuzz.cpp
new file mode 100644
index 0000000000000..445bb7b6fa2d6
--- /dev/null
+++ b/libc/fuzzing/string/memset_fuzz.cpp
@@ -0,0 +1,44 @@
+//===-- memset_fuzz.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// Fuzzing test for llvm-libc memcset implementation.
+///
+//===----------------------------------------------------------------------===//
+#include "protected_pages.h"
+#include "src/string/memset.h"
+#include <stddef.h> // size_t
+#include <stdint.h> // uint8_t
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t data_size) {
+  static constexpr size_t MAX_SIZE = 1024;
+  static ProtectedPages regions;
+  static const Page write_buffer = regions.GetPageA().WithAccess(PROT_WRITE);
+  // We fill 'size' with data coming from lib_fuzzer, this limits exploration to
+  // 2 bytes.
+  uint16_t size = 0;
+  uint8_t fill_char = 0;
+  if (data_size != sizeof(size) + sizeof(fill_char))
+    return 0;
+  __builtin_memcpy(&size, data, sizeof(size));
+  __builtin_memcpy(&fill_char, data + sizeof(size), sizeof(fill_char));
+  if (size >= MAX_SIZE || size >= GetPageSize())
+    return 0;
+  // We cross-check the function from two sources and two destinations.
+  // The first of them (bottom) is always page aligned.
+  // The second one (top) is not necessarily aligned.
+  // Both sources and destinations are checked for out of bound accesses.
+  uint8_t *destinations[2] = {write_buffer.bottom(size),
+                              write_buffer.top(size)};
+  for (uint8_t *dst : destinations) {
+    LIBC_NAMESPACE::memset(dst, fill_char, size);
+    for (size_t i = 0; i < size; ++i)
+      if (dst[i] != fill_char)
+        __builtin_trap();
+  }
+  return 0;
+}
diff --git a/libc/fuzzing/string/protected_pages.h b/libc/fuzzing/string/protected_pages.h
new file mode 100644
index 0000000000000..fa09f9ac7947f
--- /dev/null
+++ b/libc/fuzzing/string/protected_pages.h
@@ -0,0 +1,74 @@
+#ifndef LIBC_FUZZING_STRING_PROTECTED_PAGES_H
+#define LIBC_FUZZING_STRING_PROTECTED_PAGES_H
+
+#include <stddef.h>   // size_t
+#include <stdint.h>   // uint8_t
+#include <sys/mman.h> // mmap, munmap
+#include <unistd.h>   // sysconf, _SC_PAGESIZE
+
+// Returns mmap page size.
+size_t GetPageSize() { return sysconf(_SC_PAGESIZE); }
+
+// Represents a page of memory which access can be configured throught the
+// 'WithAccess' function. Accessing data above or below this page will trap as
+// it is sandwiched between two pages with no read / write access.
+struct Page {
+  // Returns an aligned pointer that can be accessed up to page_size. Accessing
+  // data at ptr[-1] will fault.
+  uint8_t *bottom(size_t size) const {
+    if (size >= page_size)
+      __builtin_trap();
+    return page_ptr;
+  }
+  // Returns a pointer to a buffer that can be accessed up to size. Accessing
+  // data at ptr[size] will fault.
+  uint8_t *top(size_t size) const { return page_ptr + page_size - size; }
+
+  Page &WithAccess(int protection) {
+    if (mprotect(page_ptr, page_size, protection) != 0)
+      __builtin_trap();
+    return *this;
+  }
+
+  const size_t page_size;
+  uint8_t *const page_ptr;
+};
+
+// Allocates 5 consecutive pages that will trap if accessed.
+// +-----------------+
+// | page 0 (FAULT)  |
+// | page 1 (CUSTOM) |
+// | page 2 (FAULT)  |
+// | page 3 (CUSTOM) |
+// | page 4 (FAULT)  |
+// +-----------------+
+// The pages 1 and 3 can be retrieved as with 'GetPageA' / 'GetPageB' and their
+// accesses can be customized through the 'WithAccess' function.
+struct ProtectedPages {
+  static constexpr size_t PAGES = 5;
+
+  ProtectedPages()
+      : page_size(GetPageSize()),
+        ptr(mmap(/*address*/ nullptr, /*length*/ PAGES * page_size,
+                 /*protection*/ PROT_NONE,
+                 /*flags*/ MAP_PRIVATE | MAP_ANONYMOUS, /*fd*/ -1,
+                 /*offset*/ 0)) {
+    if (reinterpret_cast<intptr_t>(ptr) == -1)
+      __builtin_trap();
+  }
+  ~ProtectedPages() { munmap(ptr, PAGES * page_size); }
+
+  Page GetPageA() const { return Page{page_size, page<1>()}; }
+  Page GetPageB() const { return Page{page_size, page<3>()}; }
+
+private:
+  template <size_t index> uint8_t *page() const {
+    static_assert(index < PAGES);
+    return static_cast<uint8_t *>(ptr) + (index * page_size);
+  }
+
+  const size_t page_size;
+  void *const ptr = nullptr;
+};
+
+#endif // LIBC_FUZZING_STRING_PROTECTED_PAGES_H

>From 768448b35a009af0ec2987b414af438cd72f9196 Mon Sep 17 00:00:00 2001
From: Guillaume Chatelet <gchatelet at google.com>
Date: Tue, 30 Apr 2024 11:00:11 +0000
Subject: [PATCH 2/5] rename `regions` to `pages`

---
 libc/fuzzing/string/memcpy_fuzz.cpp | 12 ++++++------
 libc/fuzzing/string/memset_fuzz.cpp |  4 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/libc/fuzzing/string/memcpy_fuzz.cpp b/libc/fuzzing/string/memcpy_fuzz.cpp
index f9d98613cd7e2..fea6eb8909bb6 100644
--- a/libc/fuzzing/string/memcpy_fuzz.cpp
+++ b/libc/fuzzing/string/memcpy_fuzz.cpp
@@ -17,16 +17,16 @@
 
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t data_size) {
   static constexpr size_t MAX_SIZE = 1024;
-  static ProtectedPages regions;
-  static const Page write_buffer = regions.GetPageA().WithAccess(PROT_WRITE);
+  static ProtectedPages pages;
+  static const Page write_buffer = pages.GetPageA().WithAccess(PROT_WRITE);
   static const Page read_buffer = [&]() {
     // We fetch page B in write mode.
-    auto region = regions.GetPageB().WithAccess(PROT_WRITE);
+    auto page = pages.GetPageB().WithAccess(PROT_WRITE);
     // And fill it with random numbers.
-    for (size_t i = 0; i < region.page_size; ++i)
-      region.page_ptr[i] = rand();
+    for (size_t i = 0; i < page.page_size; ++i)
+      page.page_ptr[i] = rand();
     // Then return it in read mode.
-    return region.WithAccess(PROT_READ);
+    return page.WithAccess(PROT_READ);
   }();
   // We fill 'size' with data coming from lib_fuzzer, this limits exploration to
   // 2 bytes.
diff --git a/libc/fuzzing/string/memset_fuzz.cpp b/libc/fuzzing/string/memset_fuzz.cpp
index 445bb7b6fa2d6..af55841d3d449 100644
--- a/libc/fuzzing/string/memset_fuzz.cpp
+++ b/libc/fuzzing/string/memset_fuzz.cpp
@@ -16,8 +16,8 @@
 
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t data_size) {
   static constexpr size_t MAX_SIZE = 1024;
-  static ProtectedPages regions;
-  static const Page write_buffer = regions.GetPageA().WithAccess(PROT_WRITE);
+  static ProtectedPages pages;
+  static const Page write_buffer = pages.GetPageA().WithAccess(PROT_WRITE);
   // We fill 'size' with data coming from lib_fuzzer, this limits exploration to
   // 2 bytes.
   uint16_t size = 0;

>From 8fa3e161e7db4c0195e82e43dea08e557bde8e50 Mon Sep 17 00:00:00 2001
From: Guillaume Chatelet <gchatelet at google.com>
Date: Tue, 30 Apr 2024 11:05:50 +0000
Subject: [PATCH 3/5] Rephrase comments

---
 libc/fuzzing/string/memcpy_fuzz.cpp |  9 +++++----
 libc/fuzzing/string/memset_fuzz.cpp | 13 +++++++------
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/libc/fuzzing/string/memcpy_fuzz.cpp b/libc/fuzzing/string/memcpy_fuzz.cpp
index fea6eb8909bb6..b22f55e07f0d0 100644
--- a/libc/fuzzing/string/memcpy_fuzz.cpp
+++ b/libc/fuzzing/string/memcpy_fuzz.cpp
@@ -36,10 +36,11 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t data_size) {
   __builtin_memcpy(&size, data, sizeof(size));
   if (size >= MAX_SIZE || size >= GetPageSize())
     return 0;
-  // We cross-check the function from two sources and two destinations.
-  // The first of them (bottom) is always page aligned.
-  // The second one (top) is not necessarily aligned.
-  // Both sources and destinations are checked for out of bound accesses.
+  // We cross-check the function with two sources and two destinations.
+  //  - The first of them (bottom) is always page aligned and faults when
+  //    accessing bytes before it.
+  //  - The second one (top) is not necessarily aligned and faults when
+  //    accessing bytes after it.
   const uint8_t *sources[2] = {read_buffer.bottom(size), read_buffer.top(size)};
   uint8_t *destinations[2] = {write_buffer.bottom(size),
                               write_buffer.top(size)};
diff --git a/libc/fuzzing/string/memset_fuzz.cpp b/libc/fuzzing/string/memset_fuzz.cpp
index af55841d3d449..7bcadf8fd3bc8 100644
--- a/libc/fuzzing/string/memset_fuzz.cpp
+++ b/libc/fuzzing/string/memset_fuzz.cpp
@@ -18,8 +18,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t data_size) {
   static constexpr size_t MAX_SIZE = 1024;
   static ProtectedPages pages;
   static const Page write_buffer = pages.GetPageA().WithAccess(PROT_WRITE);
-  // We fill 'size' with data coming from lib_fuzzer, this limits exploration to
-  // 2 bytes.
+  // We fill 'size' and 'fill_char' with data coming from lib_fuzzer, this
+  // limits exploration to 3 bytes.
   uint16_t size = 0;
   uint8_t fill_char = 0;
   if (data_size != sizeof(size) + sizeof(fill_char))
@@ -28,10 +28,11 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t data_size) {
   __builtin_memcpy(&fill_char, data + sizeof(size), sizeof(fill_char));
   if (size >= MAX_SIZE || size >= GetPageSize())
     return 0;
-  // We cross-check the function from two sources and two destinations.
-  // The first of them (bottom) is always page aligned.
-  // The second one (top) is not necessarily aligned.
-  // Both sources and destinations are checked for out of bound accesses.
+  // We cross-check the function with two destinations.
+  // - The first of them (bottom) is always page aligned and faults when
+  //   accessing bytes before it.
+  // - The second one (top) is not necessarily aligned and faults when accessing
+  //   bytes after it.
   uint8_t *destinations[2] = {write_buffer.bottom(size),
                               write_buffer.top(size)};
   for (uint8_t *dst : destinations) {

>From 6c9e5e1099182ef3b58f9d98606ee992a30c37fb Mon Sep 17 00:00:00 2001
From: Guillaume Chatelet <gchatelet at google.com>
Date: Tue, 30 Apr 2024 11:10:49 +0000
Subject: [PATCH 4/5] Update protected_pages header and documentation

---
 libc/fuzzing/string/protected_pages.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/libc/fuzzing/string/protected_pages.h b/libc/fuzzing/string/protected_pages.h
index fa09f9ac7947f..67b88074b4f92 100644
--- a/libc/fuzzing/string/protected_pages.h
+++ b/libc/fuzzing/string/protected_pages.h
@@ -1,3 +1,15 @@
+//===-- protected_pages.h -------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// This file provides protected pages that fault when accessing prior or past
+// it. This is useful to check memory functions that must not access outside of
+// the provided size limited buffer.
+//===----------------------------------------------------------------------===//
+
 #ifndef LIBC_FUZZING_STRING_PROTECTED_PAGES_H
 #define LIBC_FUZZING_STRING_PROTECTED_PAGES_H
 

>From e0deb5624f00a37915b7b03670c9b44c9dcb767d Mon Sep 17 00:00:00 2001
From: Guillaume Chatelet <gchatelet at google.com>
Date: Tue, 14 May 2024 09:47:49 +0000
Subject: [PATCH 5/5] Improve documentation

---
 libc/fuzzing/string/protected_pages.h | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/libc/fuzzing/string/protected_pages.h b/libc/fuzzing/string/protected_pages.h
index 67b88074b4f92..3bb84fe4940d2 100644
--- a/libc/fuzzing/string/protected_pages.h
+++ b/libc/fuzzing/string/protected_pages.h
@@ -21,7 +21,7 @@
 // Returns mmap page size.
 size_t GetPageSize() { return sysconf(_SC_PAGESIZE); }
 
-// Represents a page of memory which access can be configured throught the
+// Represents a page of memory whose access can be configured throught the
 // 'WithAccess' function. Accessing data above or below this page will trap as
 // it is sandwiched between two pages with no read / write access.
 struct Page {
@@ -33,9 +33,10 @@ struct Page {
     return page_ptr;
   }
   // Returns a pointer to a buffer that can be accessed up to size. Accessing
-  // data at ptr[size] will fault.
+  // data at ptr[size] will trap.
   uint8_t *top(size_t size) const { return page_ptr + page_size - size; }
 
+  // protection is one of PROT_READ / PROT_WRITE.
   Page &WithAccess(int protection) {
     if (mprotect(page_ptr, page_size, protection) != 0)
       __builtin_trap();
@@ -47,14 +48,15 @@ struct Page {
 };
 
 // Allocates 5 consecutive pages that will trap if accessed.
-// +-----------------+
-// | page 0 (FAULT)  |
-// | page 1 (CUSTOM) |
-// | page 2 (FAULT)  |
-// | page 3 (CUSTOM) |
-// | page 4 (FAULT)  |
-// +-----------------+
-// The pages 1 and 3 can be retrieved as with 'GetPageA' / 'GetPageB' and their
+// | page layout | access | page name |
+// |-------------|--------|:---------:|
+// | 1           | trap   |           |
+// | 2           | custom |     A     |
+// | 3           | trap   |           |
+// | 4           | custom |     B     |
+// | 5           | trap   |           |
+//
+// The pages A and B can be retrieved as with 'GetPageA' / 'GetPageB' and their
 // accesses can be customized through the 'WithAccess' function.
 struct ProtectedPages {
   static constexpr size_t PAGES = 5;



More information about the libc-commits mailing list