[libc-commits] [libc] f1990fe - [libc] add fuzz target for strtointeger functions

Michael Jones via libc-commits libc-commits at lists.llvm.org
Tue Dec 20 10:48:39 PST 2022


Author: Michael Jones
Date: 2022-12-20T10:48:34-08:00
New Revision: f1990feb35e835ab81d6351dd4b6ef3dccc4aca5

URL: https://github.com/llvm/llvm-project/commit/f1990feb35e835ab81d6351dd4b6ef3dccc4aca5
DIFF: https://github.com/llvm/llvm-project/commit/f1990feb35e835ab81d6351dd4b6ef3dccc4aca5.diff

LOG: [libc] add fuzz target for strtointeger functions

The string to integer conversion functions are well suited to
differential fuzzing, and this patch adds a target to enable just that.
It also fixes a bug in the fuzzing comparison logic and changes atoi
slightly to match the behavior described in the C standard.

Reviewed By: sivachandra, lntue

Differential Revision: https://reviews.llvm.org/D140178

Added: 
    libc/fuzzing/stdlib/strtointeger_differential_fuzz.cpp

Modified: 
    libc/cmake/modules/LLVMLibCTestRules.cmake
    libc/fuzzing/math/Compare.h
    libc/fuzzing/stdlib/CMakeLists.txt
    libc/fuzzing/stdlib/StringParserOutputDiff.h

Removed: 
    


################################################################################
diff  --git a/libc/cmake/modules/LLVMLibCTestRules.cmake b/libc/cmake/modules/LLVMLibCTestRules.cmake
index 00974c86a08d1..10e3823cb5c9a 100644
--- a/libc/cmake/modules/LLVMLibCTestRules.cmake
+++ b/libc/cmake/modules/LLVMLibCTestRules.cmake
@@ -319,7 +319,7 @@ function(add_libc_fuzzer target_name)
     "LIBC_FUZZER"
     "" # No optional arguments
     "" # Single value arguments
-    "SRCS;HDRS;DEPENDS" # Multi-value arguments
+    "SRCS;HDRS;DEPENDS;COMPILE_OPTIONS" # Multi-value arguments
     ${ARGN}
   )
   if(NOT LIBC_FUZZER_SRCS)
@@ -374,6 +374,11 @@ function(add_libc_fuzzer target_name)
     ${fq_deps_list}
   )
   add_dependencies(libc-fuzzer ${fq_target_name})
+
+  target_compile_options(${fq_target_name}
+    PRIVATE
+    ${LIBC_FUZZER_COMPILE_OPTIONS})
+
 endfunction(add_libc_fuzzer)
 
 # Rule to add an integration test. An integration test is like a unit test

diff  --git a/libc/fuzzing/math/Compare.h b/libc/fuzzing/math/Compare.h
index 7a194383f4df8..e690e5168d27b 100644
--- a/libc/fuzzing/math/Compare.h
+++ b/libc/fuzzing/math/Compare.h
@@ -28,7 +28,7 @@ ValuesEqual(T x1, T x2) {
 template <typename T>
 __llvm_libc::cpp::enable_if_t<__llvm_libc::cpp::is_integral_v<T>, bool>
 ValuesEqual(T x1, T x2) {
-  return x1 == x1;
+  return x1 == x2;
 }
 
 #endif // LLVM_LIBC_FUZZING_MATH_COMPARE_H

diff  --git a/libc/fuzzing/stdlib/CMakeLists.txt b/libc/fuzzing/stdlib/CMakeLists.txt
index 0016f4f7ccbfd..d28a0d52ad178 100644
--- a/libc/fuzzing/stdlib/CMakeLists.txt
+++ b/libc/fuzzing/stdlib/CMakeLists.txt
@@ -16,3 +16,36 @@ add_libc_fuzzer(
     libc.src.stdlib.atof
 )
 
+add_libc_fuzzer(
+  strtointeger_
diff erential_fuzz
+  SRCS
+    strtointeger_
diff erential_fuzz.cpp
+  HDRS
+    StringParserOutputDiff.h
+  DEPENDS
+    libc.src.stdlib.atoi
+    libc.src.stdlib.atol
+    libc.src.stdlib.atoll
+    libc.src.stdlib.strtol
+    libc.src.stdlib.strtoll
+    libc.src.stdlib.strtoul
+    libc.src.stdlib.strtoull
+)
+
+add_libc_fuzzer(
+  strtointeger_
diff erential_fuzz_cleaner
+  SRCS
+    strtointeger_
diff erential_fuzz.cpp
+  HDRS
+    StringParserOutputDiff.h
+  DEPENDS
+    libc.src.stdlib.atoi
+    libc.src.stdlib.atol
+    libc.src.stdlib.atoll
+    libc.src.stdlib.strtol
+    libc.src.stdlib.strtoll
+    libc.src.stdlib.strtoul
+    libc.src.stdlib.strtoull
+  COMPILE_OPTIONS
+    -DLLVM_LIBC_FUZZ_ATOI_CLEANER_INPUT
+)

diff  --git a/libc/fuzzing/stdlib/StringParserOutputDiff.h b/libc/fuzzing/stdlib/StringParserOutputDiff.h
index 457da3b5674ba..9e5accfdfc986 100644
--- a/libc/fuzzing/stdlib/StringParserOutputDiff.h
+++ b/libc/fuzzing/stdlib/StringParserOutputDiff.h
@@ -32,4 +32,28 @@ void StringParserOutputDiff(StringInputSingleOutputFunc<T> func1,
     __builtin_trap();
 }
 
+template <typename T>
+using StringToNumberFunc = T (*)(const char *, char **, int);
+
+template <typename T>
+void StringToNumberOutputDiff(StringToNumberFunc<T> func1,
+                              StringToNumberFunc<T> func2, const uint8_t *data,
+                              size_t size) {
+  if (size < sizeof(T))
+    return;
+
+  const char *x = reinterpret_cast<const char *>(data + 1);
+  int base = data[0] % 36;
+  base = base + ((base == 0) ? 0 : 1);
+
+  char *outPtr1 = nullptr;
+  char *outPtr2 = nullptr;
+
+  T result1 = func1(x, &outPtr1, base);
+  T result2 = func2(x, &outPtr2, base);
+
+  if (!(ValuesEqual(result1, result2) && (*outPtr1 == *outPtr2)))
+    __builtin_trap();
+}
+
 #endif // LLVM_LIBC_FUZZING_STDLIB_STRING_PARSER_OUTPUT_DIFF_H

diff  --git a/libc/fuzzing/stdlib/strtointeger_
diff erential_fuzz.cpp b/libc/fuzzing/stdlib/strtointeger_
diff erential_fuzz.cpp
new file mode 100644
index 0000000000000..45a45cdd5615a
--- /dev/null
+++ b/libc/fuzzing/stdlib/strtointeger_
diff erential_fuzz.cpp
@@ -0,0 +1,81 @@
+//===-- strtointeger_
diff erential_fuzz.cpp --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// Fuzzing test for llvm-libc atof implementation.
+///
+//===----------------------------------------------------------------------===//
+#include "src/stdlib/atoi.h"
+#include "src/stdlib/atol.h"
+#include "src/stdlib/atoll.h"
+#include "src/stdlib/strtol.h"
+#include "src/stdlib/strtoll.h"
+#include "src/stdlib/strtoul.h"
+#include "src/stdlib/strtoull.h"
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "fuzzing/stdlib/StringParserOutputDiff.h"
+
+// This list contains (almost) all character that can possibly be accepted by a
+// string to integer conversion. Those are: space, tab, +/- signs, any digit,
+// and any letter. Technically there are some space characters accepted by
+// isspace that aren't in this list, but since space characters are just skipped
+// over anyways I'm not really worried.
+[[maybe_unused]] constexpr char VALID_CHARS[] = {
+    ' ', '\t', '-', '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+    'a', 'A',  'b', 'B', 'c', 'C', 'd', 'D', 'e', 'E', 'f', 'F', 'g', 'G',
+    'h', 'H',  'i', 'I', 'j', 'J', 'k', 'K', 'l', 'L', 'm', 'M', 'n', 'N',
+    'o', 'O',  'p', 'P', 'q', 'Q', 'r', 'R', 's', 'S', 't', 'T', 'u', 'U',
+    'v', 'V',  'w', 'W', 'x', 'X', 'y', 'Y', 'z', 'Z'};
+
+// This takes the randomized bytes in data and interprets the first byte as the
+// base for the string to integer conversion and the rest of them as a string to
+// be passed to the string to integer conversion.
+// If the CLEANER_INPUT flag is set, the string is modified so that it's only
+// made of characters that the string to integer functions could accept. This is
+// because every other character is effectively identical, and will be treated
+// as the end of the integer. For the fully randomized string this gives a
+// greater than 50% chance for each character to end the string, making the odds
+// of getting long numbers very low.
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+  uint8_t *container = new uint8_t[size + 1];
+  if (!container)
+    __builtin_trap();
+  size_t i;
+
+  for (i = 0; i < size; ++i) {
+#ifdef LLVM_LIBC_FUZZ_ATOI_CLEANER_INPUT
+    container[i] = VALID_CHARS[data[i] % sizeof(VALID_CHARS)];
+#else
+    container[i] = data[i];
+#endif
+  }
+  container[size] = '\0'; // Add null terminator to container.
+  // the first character is interpreted as the base, so it should be fully
+  // random even when the input is cleaned.
+  container[0] = data[0];
+
+  StringParserOutputDiff<int>(&__llvm_libc::atoi, &::atoi, container, size);
+  StringParserOutputDiff<long>(&__llvm_libc::atol, &::atol, container, size);
+  StringParserOutputDiff<long long>(&__llvm_libc::atoll, &::atoll, container,
+                                    size);
+
+  StringToNumberOutputDiff<long>(&__llvm_libc::strtol, &::strtol, container,
+                                 size);
+  StringToNumberOutputDiff<long long>(&__llvm_libc::strtoll, &::strtoll,
+                                      container, size);
+
+  StringToNumberOutputDiff<unsigned long>(&__llvm_libc::strtoul, &::strtoul,
+                                          container, size);
+  StringToNumberOutputDiff<unsigned long long>(&__llvm_libc::strtoull,
+                                               &::strtoull, container, size);
+
+  delete[] container;
+  return 0;
+}


        


More information about the libc-commits mailing list