[libc-commits] [libc] [libc] add hashtable fuzzing (PR #87949)
Schrodinger ZHU Yifan via libc-commits
libc-commits at lists.llvm.org
Mon Apr 8 09:51:17 PDT 2024
https://github.com/SchrodingerZhu updated https://github.com/llvm/llvm-project/pull/87949
>From 697b8811f50d0b7a76fd202a113d28b0a5b66048 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Thu, 4 Apr 2024 15:58:57 -0400
Subject: [PATCH 1/7] [libc] avoid c++ includes from MPFR
---
libc/utils/MPFRWrapper/mpfr_inc.h | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/libc/utils/MPFRWrapper/mpfr_inc.h b/libc/utils/MPFRWrapper/mpfr_inc.h
index 58fa7b25a9f210..0d8afbed39859f 100644
--- a/libc/utils/MPFRWrapper/mpfr_inc.h
+++ b/libc/utils/MPFRWrapper/mpfr_inc.h
@@ -17,7 +17,21 @@
// MPFR header can be included in manner allowed in that repo.
#include "CustomMPFRIncluder.h"
#else
+
+extern "C" {
+#pragma push_macro("__cplusplus")
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wbuiltin-macro-redefined"
+#endif
+#undef __cplusplus
#include <mpfr.h>
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
+#pragma pop_macro("__cplusplus")
+}
+
#endif
#endif // LLVM_LIBC_UTILS_MPFRWRAPPER_MPFR_INC_H
>From 133bb48d22caef6bc0da11a3b028108000d56b91 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Sun, 7 Apr 2024 17:11:10 -0400
Subject: [PATCH 2/7] [libc] remove MPFR and related tests in full build
---
libc/cmake/modules/LLVMLibCCheckMPFR.cmake | 2 +-
libc/utils/MPFRWrapper/CMakeLists.txt | 8 +-------
libc/utils/MPFRWrapper/mpfr_inc.h | 14 --------------
3 files changed, 2 insertions(+), 22 deletions(-)
diff --git a/libc/cmake/modules/LLVMLibCCheckMPFR.cmake b/libc/cmake/modules/LLVMLibCCheckMPFR.cmake
index bbaeb9f0dc053f..532b0b9bfae392 100644
--- a/libc/cmake/modules/LLVMLibCCheckMPFR.cmake
+++ b/libc/cmake/modules/LLVMLibCCheckMPFR.cmake
@@ -2,7 +2,7 @@ set(LLVM_LIBC_MPFR_INSTALL_PATH "" CACHE PATH "Path to where MPFR is installed (
if(LLVM_LIBC_MPFR_INSTALL_PATH)
set(LIBC_TESTS_CAN_USE_MPFR TRUE)
-elseif(LIBC_TARGET_OS_IS_GPU)
+elseif(LIBC_TARGET_OS_IS_GPU OR LLVM_LIBC_FULL_BUILD)
set(LIBC_TESTS_CAN_USE_MPFR FALSE)
else()
try_compile(
diff --git a/libc/utils/MPFRWrapper/CMakeLists.txt b/libc/utils/MPFRWrapper/CMakeLists.txt
index 2f2b0ac09df9a8..6af6fd77070418 100644
--- a/libc/utils/MPFRWrapper/CMakeLists.txt
+++ b/libc/utils/MPFRWrapper/CMakeLists.txt
@@ -5,12 +5,6 @@ if(LIBC_TESTS_CAN_USE_MPFR)
mpfr_inc.h
)
target_compile_options(libcMPFRWrapper PRIVATE -O3)
- if (LLVM_LIBC_FULL_BUILD)
- # It is not easy to make libcMPFRWrapper a standalone library because gmp.h may unconditionally
- # pull in some STL headers. As a result, targets using this library will need to link against
- # C++ and unwind libraries. Since we are using MPFR anyway, we directly specifies the GNU toolchain.
- target_link_libraries(libcMPFRWrapper PUBLIC -lstdc++ -lgcc_s)
- endif()
add_dependencies(
libcMPFRWrapper
libc.src.__support.CPP.string_view
@@ -24,6 +18,6 @@ if(LIBC_TESTS_CAN_USE_MPFR)
target_link_directories(libcMPFRWrapper PUBLIC ${LLVM_LIBC_MPFR_INSTALL_PATH}/lib)
endif()
target_link_libraries(libcMPFRWrapper PUBLIC LibcFPTestHelpers.unit LibcTest.unit mpfr gmp)
-elseif(NOT LIBC_TARGET_OS_IS_GPU)
+elseif(NOT LIBC_TARGET_OS_IS_GPU AND NOT LLVM_LIBC_FULL_BUILD)
message(WARNING "Math tests using MPFR will be skipped.")
endif()
diff --git a/libc/utils/MPFRWrapper/mpfr_inc.h b/libc/utils/MPFRWrapper/mpfr_inc.h
index 0d8afbed39859f..58fa7b25a9f210 100644
--- a/libc/utils/MPFRWrapper/mpfr_inc.h
+++ b/libc/utils/MPFRWrapper/mpfr_inc.h
@@ -17,21 +17,7 @@
// MPFR header can be included in manner allowed in that repo.
#include "CustomMPFRIncluder.h"
#else
-
-extern "C" {
-#pragma push_macro("__cplusplus")
-#if defined(__clang__)
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wbuiltin-macro-redefined"
-#endif
-#undef __cplusplus
#include <mpfr.h>
-#if defined(__clang__)
-#pragma clang diagnostic pop
-#endif
-#pragma pop_macro("__cplusplus")
-}
-
#endif
#endif // LLVM_LIBC_UTILS_MPFRWRAPPER_MPFR_INC_H
>From 58ec1616c24303cb9d34bdf4704bcd4d51e74adb Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Mon, 8 Apr 2024 12:41:26 -0400
Subject: [PATCH 3/7] comment
---
libc/cmake/modules/LLVMLibCCheckMPFR.cmake | 2 ++
1 file changed, 2 insertions(+)
diff --git a/libc/cmake/modules/LLVMLibCCheckMPFR.cmake b/libc/cmake/modules/LLVMLibCCheckMPFR.cmake
index 532b0b9bfae392..45334a54431ef4 100644
--- a/libc/cmake/modules/LLVMLibCCheckMPFR.cmake
+++ b/libc/cmake/modules/LLVMLibCCheckMPFR.cmake
@@ -3,6 +3,8 @@ set(LLVM_LIBC_MPFR_INSTALL_PATH "" CACHE PATH "Path to where MPFR is installed (
if(LLVM_LIBC_MPFR_INSTALL_PATH)
set(LIBC_TESTS_CAN_USE_MPFR TRUE)
elseif(LIBC_TARGET_OS_IS_GPU OR LLVM_LIBC_FULL_BUILD)
+ # In full build mode, the MPFR library should be built using our own facilities,
+ # which is currently not possible.
set(LIBC_TESTS_CAN_USE_MPFR FALSE)
else()
try_compile(
>From 01e7ded89073f6227b9c1c63e6ce2e19f675cef3 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Sun, 7 Apr 2024 18:26:16 -0400
Subject: [PATCH 4/7] [libc] add hashtable fuzzing
---
libc/fuzzing/__support/CMakeLists.txt | 9 ++
libc/fuzzing/__support/hashtable_fuzz.cpp | 157 ++++++++++++++++++++++
2 files changed, 166 insertions(+)
create mode 100644 libc/fuzzing/__support/hashtable_fuzz.cpp
diff --git a/libc/fuzzing/__support/CMakeLists.txt b/libc/fuzzing/__support/CMakeLists.txt
index d4f6db71fdd849..b5d2b488447fc5 100644
--- a/libc/fuzzing/__support/CMakeLists.txt
+++ b/libc/fuzzing/__support/CMakeLists.txt
@@ -5,3 +5,12 @@ add_libc_fuzzer(
DEPENDS
libc.src.__support.big_int
)
+
+add_libc_fuzzer(
+ hashtable_fuzz
+ SRCS
+ hashtable_fuzz.cpp
+ DEPENDS
+ libc.src.__support.HashTable.table
+ libc.src.string.memcpy
+)
diff --git a/libc/fuzzing/__support/hashtable_fuzz.cpp b/libc/fuzzing/__support/hashtable_fuzz.cpp
new file mode 100644
index 00000000000000..4b862b03b9d309
--- /dev/null
+++ b/libc/fuzzing/__support/hashtable_fuzz.cpp
@@ -0,0 +1,157 @@
+#include "src/__support/CPP/new.h"
+#include "src/__support/CPP/optional.h"
+#include "src/__support/HashTable/table.h"
+#include "src/string/memcpy.h"
+#include <search.h>
+#include <stdint.h>
+namespace LIBC_NAMESPACE {
+
+enum class Action { Find, Insert, CrossCheck };
+static uint8_t *global_buffer = nullptr;
+static size_t remaining = 0;
+
+static cpp::optional<uint8_t> next_u8() {
+ if (remaining == 0)
+ return cpp::nullopt;
+ uint8_t result = *global_buffer;
+ global_buffer++;
+ remaining--;
+ return result;
+}
+
+static cpp::optional<uint64_t> next_uint64() {
+ uint64_t result;
+ if (remaining < sizeof(result))
+ return cpp::nullopt;
+ memcpy(&result, global_buffer, sizeof(result));
+ global_buffer += sizeof(result);
+ remaining -= sizeof(result);
+ return result;
+}
+
+static cpp::optional<Action> next_action() {
+ if (cpp::optional<uint8_t> action = next_u8()) {
+ switch (*action % 3) {
+ case 0:
+ return Action::Find;
+ case 1:
+ return Action::Insert;
+ case 2:
+ return Action::CrossCheck;
+ }
+ }
+ return cpp::nullopt;
+}
+
+static cpp::optional<char *> next_cstr() {
+ char *result = reinterpret_cast<char *>(global_buffer);
+ if (cpp::optional<uint64_t> len = next_uint64()) {
+ uint64_t length;
+ for (length = 0; length < *len % 128; length++) {
+ if (length >= remaining)
+ return cpp::nullopt;
+ if (*global_buffer == '\0')
+ break;
+ }
+ if (length >= remaining)
+ return cpp::nullopt;
+ global_buffer[length] = '\0';
+ global_buffer += length + 1;
+ remaining -= length + 1;
+ return result;
+ }
+ return cpp::nullopt;
+}
+
+#define GET_VAL(op) \
+ __extension__({ \
+ auto val = op(); \
+ if (!val) \
+ return 0; \
+ *val; \
+ })
+
+template <typename Fn> struct CleanUpHook {
+ cpp::optional<Fn> fn;
+ ~CleanUpHook() {
+ if (fn)
+ (*fn)();
+ }
+ CleanUpHook(Fn fn) : fn(cpp::move(fn)) {}
+ CleanUpHook(const CleanUpHook &) = delete;
+ CleanUpHook(CleanUpHook &&other) : fn(cpp::move(other.fn)) {
+ other.fn = cpp::nullopt;
+ }
+};
+
+#define register_cleanup(ID, ...) \
+ auto cleanup_hook##ID = __extension__({ \
+ auto a = __VA_ARGS__; \
+ CleanUpHook<decltype(a)>{a}; \
+ });
+
+static void trap_with_message(const char *msg) { __builtin_trap(); }
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ AllocChecker ac;
+ global_buffer = static_cast<uint8_t *>(::operator new(size, ac));
+ register_cleanup(0, [global_buffer = global_buffer, size] {
+ ::operator delete(global_buffer, size);
+ });
+ if (!ac)
+ return 0;
+ memcpy(global_buffer, data, size);
+
+ remaining = size;
+ uint64_t size_a = GET_VAL(next_uint64) % 256;
+ uint64_t size_b = GET_VAL(next_uint64) % 256;
+ uint64_t rand_a = GET_VAL(next_uint64);
+ uint64_t rand_b = GET_VAL(next_uint64);
+ internal::HashTable *table_a = internal::HashTable::allocate(size_a, rand_a);
+ register_cleanup(1, [&table_a] { internal::HashTable::deallocate(table_a); });
+ internal::HashTable *table_b = internal::HashTable::allocate(size_b, rand_b);
+ register_cleanup(2, [&table_b] { internal::HashTable::deallocate(table_b); });
+ if (!table_a || !table_b)
+ return 0;
+ for (;;) {
+ Action action = GET_VAL(next_action);
+ switch (action) {
+ case Action::Find: {
+ const char *key = GET_VAL(next_cstr);
+ if (!key)
+ return 0;
+ if (static_cast<bool>(table_a->find(key)) !=
+ static_cast<bool>(table_b->find(key)))
+ trap_with_message(key);
+ break;
+ }
+ case Action::Insert: {
+ char *key = GET_VAL(next_cstr);
+ if (!key)
+ return 0;
+ ENTRY *a = internal::HashTable::insert(table_a, ENTRY{key, key});
+ ENTRY *b = internal::HashTable::insert(table_b, ENTRY{key, key});
+ if (a->data != b->data)
+ __builtin_trap();
+ break;
+ }
+ case Action::CrossCheck: {
+ for (ENTRY a : *table_a) {
+ if (const ENTRY *b = table_b->find(a.key)) {
+ if (a.data != b->data)
+ __builtin_trap();
+ }
+ }
+ for (ENTRY b : *table_b) {
+ if (const ENTRY *a = table_a->find(b.key)) {
+ if (a->data != b.data)
+ __builtin_trap();
+ }
+ }
+ break;
+ }
+ }
+ }
+}
+
+} // namespace LIBC_NAMESPACE
>From 15880a9bc73505e9a45fba0ae2e6b9fca66224a9 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Sun, 7 Apr 2024 18:35:17 -0400
Subject: [PATCH 5/7] remove extra code
---
libc/fuzzing/__support/hashtable_fuzz.cpp | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/libc/fuzzing/__support/hashtable_fuzz.cpp b/libc/fuzzing/__support/hashtable_fuzz.cpp
index 4b862b03b9d309..758c8d1aae01bf 100644
--- a/libc/fuzzing/__support/hashtable_fuzz.cpp
+++ b/libc/fuzzing/__support/hashtable_fuzz.cpp
@@ -108,9 +108,15 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
uint64_t rand_a = GET_VAL(next_uint64);
uint64_t rand_b = GET_VAL(next_uint64);
internal::HashTable *table_a = internal::HashTable::allocate(size_a, rand_a);
- register_cleanup(1, [&table_a] { internal::HashTable::deallocate(table_a); });
+ register_cleanup(1, [&table_a] {
+ if (table_a)
+ internal::HashTable::deallocate(table_a);
+ });
internal::HashTable *table_b = internal::HashTable::allocate(size_b, rand_b);
- register_cleanup(2, [&table_b] { internal::HashTable::deallocate(table_b); });
+ register_cleanup(2, [&table_b] {
+ if (table_b)
+ internal::HashTable::deallocate(table_b);
+ });
if (!table_a || !table_b)
return 0;
for (;;) {
@@ -118,8 +124,6 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
switch (action) {
case Action::Find: {
const char *key = GET_VAL(next_cstr);
- if (!key)
- return 0;
if (static_cast<bool>(table_a->find(key)) !=
static_cast<bool>(table_b->find(key)))
trap_with_message(key);
@@ -127,8 +131,6 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
}
case Action::Insert: {
char *key = GET_VAL(next_cstr);
- if (!key)
- return 0;
ENTRY *a = internal::HashTable::insert(table_a, ENTRY{key, key});
ENTRY *b = internal::HashTable::insert(table_b, ENTRY{key, key});
if (a->data != b->data)
>From 27ddfe0871ec0d1656a0df3ea8e1a916e8705a58 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Mon, 8 Apr 2024 09:18:48 -0400
Subject: [PATCH 6/7] make style consistent
---
libc/fuzzing/__support/hashtable_fuzz.cpp | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/libc/fuzzing/__support/hashtable_fuzz.cpp b/libc/fuzzing/__support/hashtable_fuzz.cpp
index 758c8d1aae01bf..d5c64970b53234 100644
--- a/libc/fuzzing/__support/hashtable_fuzz.cpp
+++ b/libc/fuzzing/__support/hashtable_fuzz.cpp
@@ -63,7 +63,7 @@ static cpp::optional<char *> next_cstr() {
return cpp::nullopt;
}
-#define GET_VAL(op) \
+#define get_value(op) \
__extension__({ \
auto val = op(); \
if (!val) \
@@ -103,10 +103,10 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
memcpy(global_buffer, data, size);
remaining = size;
- uint64_t size_a = GET_VAL(next_uint64) % 256;
- uint64_t size_b = GET_VAL(next_uint64) % 256;
- uint64_t rand_a = GET_VAL(next_uint64);
- uint64_t rand_b = GET_VAL(next_uint64);
+ uint64_t size_a = get_value(next_uint64) % 256;
+ uint64_t size_b = get_value(next_uint64) % 256;
+ uint64_t rand_a = get_value(next_uint64);
+ uint64_t rand_b = get_value(next_uint64);
internal::HashTable *table_a = internal::HashTable::allocate(size_a, rand_a);
register_cleanup(1, [&table_a] {
if (table_a)
@@ -120,17 +120,17 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
if (!table_a || !table_b)
return 0;
for (;;) {
- Action action = GET_VAL(next_action);
+ Action action = get_value(next_action);
switch (action) {
case Action::Find: {
- const char *key = GET_VAL(next_cstr);
+ const char *key = get_value(next_cstr);
if (static_cast<bool>(table_a->find(key)) !=
static_cast<bool>(table_b->find(key)))
trap_with_message(key);
break;
}
case Action::Insert: {
- char *key = GET_VAL(next_cstr);
+ char *key = get_value(next_cstr);
ENTRY *a = internal::HashTable::insert(table_a, ENTRY{key, key});
ENTRY *b = internal::HashTable::insert(table_b, ENTRY{key, key});
if (a->data != b->data)
>From aa4ac54bed3c68349b6ecfc10ed586d71d294917 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Mon, 8 Apr 2024 12:50:59 -0400
Subject: [PATCH 7/7] make information more concentrated
---
libc/fuzzing/__support/hashtable_fuzz.cpp | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/libc/fuzzing/__support/hashtable_fuzz.cpp b/libc/fuzzing/__support/hashtable_fuzz.cpp
index d5c64970b53234..f346d2b79c6db4 100644
--- a/libc/fuzzing/__support/hashtable_fuzz.cpp
+++ b/libc/fuzzing/__support/hashtable_fuzz.cpp
@@ -45,9 +45,9 @@ static cpp::optional<Action> next_action() {
static cpp::optional<char *> next_cstr() {
char *result = reinterpret_cast<char *>(global_buffer);
- if (cpp::optional<uint64_t> len = next_uint64()) {
+ if (cpp::optional<uint8_t> len = next_u8()) {
uint64_t length;
- for (length = 0; length < *len % 128; length++) {
+ for (length = 0; length < *len; length++) {
if (length >= remaining)
return cpp::nullopt;
if (*global_buffer == '\0')
@@ -87,7 +87,7 @@ template <typename Fn> struct CleanUpHook {
#define register_cleanup(ID, ...) \
auto cleanup_hook##ID = __extension__({ \
auto a = __VA_ARGS__; \
- CleanUpHook<decltype(a)>{a}; \
+ CleanUpHook<decltype(a)>(cpp::move(a)); \
});
static void trap_with_message(const char *msg) { __builtin_trap(); }
@@ -103,8 +103,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
memcpy(global_buffer, data, size);
remaining = size;
- uint64_t size_a = get_value(next_uint64) % 256;
- uint64_t size_b = get_value(next_uint64) % 256;
+ uint64_t size_a = get_value(next_u8);
+ uint64_t size_b = get_value(next_u8);
uint64_t rand_a = get_value(next_uint64);
uint64_t rand_b = get_value(next_uint64);
internal::HashTable *table_a = internal::HashTable::allocate(size_a, rand_a);
More information about the libc-commits
mailing list