[compiler-rt] r358311 - [libFuzzer] Fix DataFlow.cpp logic when tracing long inputs.

Fri Apr 12 14:00:12 PDT 2019

Author: dor1s
Date: Fri Apr 12 14:00:12 2019
New Revision: 358311

URL: http://llvm.org/viewvc/llvm-project?rev=358311&view=rev
Log:
[libFuzzer] Fix DataFlow.cpp logic when tracing long inputs.

Summary:
1. Do not create DFSan labels for the bytes which we do not trace. This is where we run out of labels at the first place.
2. When dumping the traces on the disk, make sure to offset the label identifiers by the number of the first byte in the trace range.
3. For the last label, make sure to write it at the last position of the trace bit string, as that label represents the input size, not any particular byte.

Also fixed the bug with division in python which I've introduced when migrated the scripts to Python3 (`//` is required for integral division).

Otherwise, the scripts are wasting too much time unsuccessfully trying to
collect and process traces from the long inputs. For more context, see
https://github.com/google/oss-fuzz/issues/1632#issuecomment-481761789

Reviewers: kcc

Reviewed By: kcc

Subscribers: delcypher, #sanitizers, llvm-commits

Tags: #llvm, #sanitizers

Differential Revision: https://reviews.llvm.org/D60538

Modified:
    compiler-rt/trunk/lib/fuzzer/dataflow/DataFlow.cpp
    compiler-rt/trunk/lib/fuzzer/scripts/collect_data_flow.py
    compiler-rt/trunk/test/fuzzer/dataflow.test

Modified: compiler-rt/trunk/lib/fuzzer/dataflow/DataFlow.cpp
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/fuzzer/dataflow/DataFlow.cpp?rev=358311&r1=358310&r2=358311&view=diff
==============================================================================

--- compiler-rt/trunk/lib/fuzzer/dataflow/DataFlow.cpp (original)
+++ compiler-rt/trunk/lib/fuzzer/dataflow/DataFlow.cpp Fri Apr 12 14:00:12 2019
@@ -63,6 +63,9 @@ __attribute__((weak)) extern int LLVMFuz
 } // extern "C"
 
 static size_t InputLen;
+static size_t InputLabelBeg;
+static size_t InputLabelEnd;
+static size_t InputSizeLabel;
 static size_t NumFuncs;
 static const uintptr_t *FuncsBeg;
 static __thread size_t CurrentFunc;
@@ -95,8 +98,10 @@ void SetBytesForLabel(dfsan_label L, cha
     return;
   LabelSeen[L] = true;
   assert(L);
-  if (L <= InputLen + 1) {
-    Bytes[L - 1] = '1';
+  if (L < InputSizeLabel) {
+    Bytes[L + InputLabelBeg - 1] = '1';
+  } else if (L == InputSizeLabel) {
+    Bytes[InputLen] = '1';
   } else {
     auto *DLI = dfsan_get_label_info(L);
     SetBytesForLabel(DLI->l1, Bytes);
@@ -124,9 +129,9 @@ int main(int argc, char **argv) {
   if (argc == 1)
     return PrintFunctions();
   assert(argc == 4 || argc == 5);
-  size_t Beg = atoi(argv[1]);
-  size_t End = atoi(argv[2]);
-  assert(Beg < End);
+  InputLabelBeg = atoi(argv[1]);
+  InputLabelEnd = atoi(argv[2]);
+  assert(InputLabelBeg < InputLabelEnd);
 
   const char *Input = argv[3];
   fprintf(stderr, "INFO: reading '%s'\n", Input);
@@ -143,14 +148,16 @@ int main(int argc, char **argv) {
 
   fprintf(stderr, "INFO: running '%s'\n", Input);
   for (size_t I = 1; I <= InputLen; I++) {
-    dfsan_label L = dfsan_create_label("", nullptr);
-    assert(L == I);
     size_t Idx = I - 1;
-    if (Idx >= Beg && Idx < End)
+    if (Idx >= InputLabelBeg && Idx < InputLabelEnd) {
+      dfsan_label L = dfsan_create_label("", nullptr);
+      assert(L == I - InputLabelBeg);
       dfsan_set_label(L, Buf + Idx, 1);
+    }
   }
   dfsan_label SizeL = dfsan_create_label("", nullptr);
-  assert(SizeL == InputLen + 1);
+  InputSizeLabel = SizeL;
+  assert(InputSizeLabel == InputLabelEnd - InputLabelBeg + 1);
   dfsan_set_label(SizeL, &InputLen, sizeof(InputLen));
 
   LLVMFuzzerTestOneInput(Buf, InputLen);

Modified: compiler-rt/trunk/lib/fuzzer/scripts/collect_data_flow.py
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/fuzzer/scripts/collect_data_flow.py?rev=358311&r1=358310&r2=358311&view=diff
==============================================================================
--- compiler-rt/trunk/lib/fuzzer/scripts/collect_data_flow.py (original)
+++ compiler-rt/trunk/lib/fuzzer/scripts/collect_data_flow.py Fri Apr 12 14:00:12 2019
@@ -65,8 +65,8 @@ def main(argv):
     tmpfile = os.path.join(tmpdir, str(r[0]) + "-" + str(r[1]))
     ret = subprocess.call([exe, str(r[0]), str(r[1]), inp, tmpfile])
     if ret and r[1] - r[0] >= 2:
-      q.append([r[0], (r[1] + r[0]) / 2])
-      q.append([(r[1] + r[0]) / 2, r[1]])
+      q.append([r[0], (r[1] + r[0]) // 2])
+      q.append([(r[1] + r[0]) // 2, r[1]])
     else:
       outputs.append(tmpfile)
       print("******* Success: ", r)

Modified: compiler-rt/trunk/test/fuzzer/dataflow.test
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/test/fuzzer/dataflow.test?rev=358311&r1=358310&r2=358311&view=diff
==============================================================================
--- compiler-rt/trunk/test/fuzzer/dataflow.test (original)
+++ compiler-rt/trunk/test/fuzzer/dataflow.test Fri Apr 12 14:00:12 2019
@@ -82,3 +82,14 @@ USE_DATA_FLOW_TRACE: INFO: DataFlowTrace
 USE_DATA_FLOW_TRACE-DAG: a8eefe2fd5d6b32028f355fafa3e739a6bf5edc => |000001|
 USE_DATA_FLOW_TRACE-DGA: d28cb407e8e1a702c72d25473f0553d3ec172262 => |0000011|
 USE_DATA_FLOW_TRACE: INFO: DataFlowTrace: 6 trace files, 3 functions, 2 traces with focus function
+
+# Test that we can run collect_data_flow on a long input (>2**16 bytes)
+RUN: rm -rf %t/OUT
+RUN: printf "%0.sA" {1..150001} > %t/IN/very_long_input
+RUN: %libfuzzer_src/scripts/collect_data_flow.py %t-ThreeFunctionsTestDF %t/IN/very_long_input %t/OUT | FileCheck %s --check-prefix=COLLECT_TRACE_FOR_LONG_INPUT
+RUN: rm %t/IN/very_long_input
+COLLECT_TRACE_FOR_LONG_INPUT: ******* Trying:{{[ ]+}}[0, 150001]
+COLLECT_TRACE_FOR_LONG_INPUT: ******* Trying:{{[ ]+}}[75000, 150001]
+COLLECT_TRACE_FOR_LONG_INPUT: ******* Trying:{{[ ]+}}[112500, 150001]
+COLLECT_TRACE_FOR_LONG_INPUT: ******* Success:{{[ ]+}}[{{[0123456789]+}}, 150001]
+COLLECT_TRACE_FOR_LONG_INPUT: ******* Success:{{[ ]+}}[0, {{[0123456789]+}}]