[libc-commits] [libc] b07feef - [libc] This commit fixes the strcmp fuzzing test. It uses a single input and

Sun Jun 28 10:51:00 PDT 2020

Author: cgyurgyik
Date: 2020-06-28T12:50:04-05:00
New Revision: b07feef8736d52e11c41631d3813a57a3b3ce6e5

URL: https://github.com/llvm/llvm-project/commit/b07feef8736d52e11c41631d3813a57a3b3ce6e5
DIFF: https://github.com/llvm/llvm-project/commit/b07feef8736d52e11c41631d3813a57a3b3ce6e5.diff

LOG: [libc] This commit fixes the strcmp fuzzing test. It uses a single input and
splits it into two by using the value of the first byte to determine the
length of the first string. Reviewed-by: PaulkaToast, Differential
Revision: https://reviews.llvm.org/D82427

Summary:
[libc] Since only one input is given, it is necessary to split the string into two containers so that they can be compared for the purposes of this fuzz test. This is done in the following manner:

1. Take the value of the first byte; this is size1. (Credits to @PaulkaToast for this idea).
2. size2 is the value of size - size1.
3. Copy the characters to new containers, data1 and data2 with corresponding sizes.
4. Add a null terminator to the first container, and verify the second container has a null terminator.
5. Verify output of strcmp.

A simpler alternative considered was simply splitting the input data into two, but this means the two strings are always within +- 1 character of each other. This above implementation avoids this.

ninja check-libc was run; no issues.

Reviewers: PaulkaToast, sivachandra

Reviewed By: PaulkaToast

Subscribers: mgorny, tschuett, ecnelises, libc-commits, PaulkaToast

Tags: #libc-project

Differential Revision: https://reviews.llvm.org/D82427

Added: 
    

Modified: 
    libc/fuzzing/string/CMakeLists.txt
    libc/fuzzing/string/strcmp_fuzz.cpp

Removed: 
    


################################################################################
diff  --git a/libc/fuzzing/string/CMakeLists.txt b/libc/fuzzing/string/CMakeLists.txt
index 326bf9c07175..db3def324fcc 100644

--- a/libc/fuzzing/string/CMakeLists.txt
+++ b/libc/fuzzing/string/CMakeLists.txt
@@ -7,3 +7,11 @@ add_libc_fuzzer(
     libc.src.string.strcpy
     libc.src.string.strlen
 )
+
+add_libc_fuzzer(
+  strcmp_fuzz
+  SRCS
+    strcmp_fuzz.cpp
+  DEPENDS
+    libc.src.string.strcmp
+)

diff  --git a/libc/fuzzing/string/strcmp_fuzz.cpp b/libc/fuzzing/string/strcmp_fuzz.cpp
index 01d5d53330a7..c1b497dfb206 100644
--- a/libc/fuzzing/string/strcmp_fuzz.cpp
+++ b/libc/fuzzing/string/strcmp_fuzz.cpp
@@ -13,45 +13,66 @@
 #include <stddef.h>
 #include <stdint.h>
 
-extern "C" int LLVMFuzzerTestTwoInputs(const uint8_t *data1, size_t size1,
-                                       const uint8_t *data2, size_t size2) {
-  // Verify each data source contains at least one character.
-  if (!size1 || !size2)
-    return 0;
-  // Verify that the final character is the null terminator.
-  if (data1[size1 - 1] != '\0' || data2[size2 - 1] != '\0')
+// The general structure is to take the value of the first byte, set size1 to
+// that value, and add the null terminator. size2 will then contain the rest of
+// the bytes in data.
+// For example, with inputs (data={2, 6, 4, 8, 0}, size=5):
+//         size1: data[0] = 2
+//         data1: {2, 6} + '\0' = {2, 6, '\0'}
+//         size2: size - size1 = 3
+//         data2: {4, 8, '\0'}
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+  // Verify the size is at least 1 and the data is null terminated.
+  if (!size || data[size - 1] != '\0')
     return 0;
 
-  const char *s1 = reinterpret_cast<const char *>(data1);
-  const char *s2 = reinterpret_cast<const char *>(data2);
+  const size_t size1 = (data[0] <= size ? data[0] : size);
+  const size_t size2 = size - size1;
 
-  const size_t minimum_size = size1 < size2 ? size1 : size2;
+  // The first size will always be at least 1 since
+  // we need to append the null terminator. The second size
+  // needs to be checked since it must also contain the null
+  // terminator.
+  if (!size2)
+    return 0;
+
+  // Copy the data into new containers.
+  // Add one to data1 for null terminator.
+  uint8_t *data1 = new uint8_t[size1 + 1];
+  uint8_t *data2 = new uint8_t[size2];
+  if (!data1 || !data2)
+    __builtin_trap();
 
-  // Iterate through until either the minimum size is hit,
-  // a character is the null terminator, or the first set
-  // of 
diff ered bytes between s1 and s2 are found.
-  // No bytes following a null byte should be compared.
   size_t i;
-  for (i = 0; i < minimum_size; ++i) {
-    if (!s1[i] || s1[i] != s2[i])
-      break;
-  }
+  for (i = 0; i < size1; ++i)
+    data1[i] = data[i];
+  data1[size1] = '\0'; // Add null terminator to data1.
+
+  for (size_t j = 0; j < size2; ++j)
+    data2[j] = data[i++];
 
-  int expected_result = s1[i] - s2[i];
-  int actual_result = __llvm_libc::strcmp(s1, s2);
+  const char *s1 = reinterpret_cast<const char *>(data1);
+  const char *s2 = reinterpret_cast<const char *>(data2);
+  size_t k = 0;
+  // Iterate until a null terminator is hit or the character comparison is
+  // 
diff erent.
+  while (s1[k] && s2[k] && s1[k] == s2[k])
+    ++k;
 
+  const unsigned char ch1 = static_cast<unsigned char>(s1[k]);
+  const unsigned char ch2 = static_cast<unsigned char>(s2[k]);
   // The expected result should be the 
diff erence between the first non-equal
   // characters of s1 and s2. If all characters are equal, the expected result
   // should be '\0' - '\0' = 0.
-  if (expected_result != actual_result)
+  if (__llvm_libc::strcmp(s1, s2) != ch1 - ch2)
     __builtin_trap();
 
   // Verify reversed operands. This should be the negated value of the previous
   // result, except of course if the previous result was zero.
-  expected_result = s2[i] - s1[i];
-  actual_result = __llvm_libc::strcmp(s2, s1);
-  if (expected_result != actual_result)
+  if (__llvm_libc::strcmp(s2, s1) != ch2 - ch1)
     __builtin_trap();
 
+  delete[] data1;
+  delete[] data2;
   return 0;
 }