[compiler-rt] r332029 - [libFuzzer] Experimental data flow tracer for fuzz targets.

Kostya Serebryany via llvm-commits llvm-commits at lists.llvm.org
Thu May 10 12:59:01 PDT 2018


Author: kcc
Date: Thu May 10 12:59:01 2018
New Revision: 332029

URL: http://llvm.org/viewvc/llvm-project?rev=332029&view=rev
Log:
[libFuzzer] Experimental data flow tracer for fuzz targets.

Summary:
Experimental data flow tracer for fuzz targets.
Allows to tell which bytes of the input affect which functions of the fuzz target.

We previously attempted to use DFSan directly in the libFuzzer process,
and that didn't work nicely.
Now we will try to collect the data flow information for the seed corpus
in a separate process (using this tracer), and then use it in the regular libFuzzer runs.

Reviewers: morehouse, pcc, Dor1s

Reviewed By: morehouse, Dor1s

Subscribers: delcypher, #sanitizers, llvm-commits

Differential Revision: https://reviews.llvm.org/D46666

Added:
    compiler-rt/trunk/lib/fuzzer/dataflow/
    compiler-rt/trunk/lib/fuzzer/dataflow/DataFlow.cpp
    compiler-rt/trunk/test/fuzzer/ThreeFunctionsTest.cpp
    compiler-rt/trunk/test/fuzzer/dataflow.test

Added: compiler-rt/trunk/lib/fuzzer/dataflow/DataFlow.cpp
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/fuzzer/dataflow/DataFlow.cpp?rev=332029&view=auto
==============================================================================
--- compiler-rt/trunk/lib/fuzzer/dataflow/DataFlow.cpp (added)
+++ compiler-rt/trunk/lib/fuzzer/dataflow/DataFlow.cpp Thu May 10 12:59:01 2018
@@ -0,0 +1,203 @@
+/*===- DataFlow.cpp - a standalone DataFlow tracer                  -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// An experimental data-flow tracer for fuzz targets.
+// It is based on DFSan and SanitizerCoverage.
+// https://clang.llvm.org/docs/DataFlowSanitizer.html
+// https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-data-flow
+//
+// It executes the fuzz target on the given input while monitoring the
+// data flow for every instrumented comparison instruction.
+//
+// The output shows which functions depend on which bytes of the input.
+//
+// Build:
+//   1. Compile this file with -fsanitize=dataflow
+//   2. Build the fuzz target with -g -fsanitize=dataflow
+//       -fsanitize-coverage=trace-pc-guard,pc-table,func,trace-cmp
+//   3. Link those together with -fsanitize=dataflow
+//
+//  -fsanitize-coverage=trace-cmp inserts callbacks around every comparison
+//  instruction, DFSan modifies the calls to pass the data flow labels.
+//  The callbacks update the data flow label for the current function.
+//  See e.g. __dfsw___sanitizer_cov_trace_cmp1 below.
+//
+//  -fsanitize-coverage=trace-pc-guard,pc-table,func instruments function
+//  entries so that the comparison callback knows that current function.
+//
+//
+// Run:
+//   # Collect data flow for INPUT_FILE, write to OUTPUT_FILE (default: stdout)
+//   ./a.out INPUT_FILE [OUTPUT_FILE]
+//
+//   # Print all instrumented functions. llvm-symbolizer must be present in PATH
+//   ./a.out
+//
+// Example output:
+// ===============
+// LEN:    5
+// LABELS: 10
+// L7 1 6
+// L8 2 7
+// L9 3 8
+// L10 4 9
+// F1 10
+// F2 5
+//  ===============
+// "LEN:" indicates the number of bytes in the input.
+// "LABELS:" indicates the number of DFSan labels created while running the input.
+//   * The labels [1,LEN] correspond to the bytes of the input
+//     (label 1 corresponds to byte 0, and so on)
+//   * The label LEN+1 corresponds to the input size.
+//   * The labels [LEN+2,LABELS] correspond to DFSan's union labels.
+// "Li j k": describes the label 'i' as a union of labels 'j' and 'k'.
+// "Ff l": tells that the function 'f' depends on the label 'l'.
+//===----------------------------------------------------------------------===*/
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <execinfo.h>  // backtrace_symbols_fd
+
+#include <sanitizer/dfsan_interface.h>
+
+extern "C" {
+extern int LLVMFuzzerTestOneInput(const unsigned char *Data, size_t Size);
+__attribute__((weak)) extern int LLVMFuzzerInitialize(int *argc, char ***argv);
+} // extern "C"
+
+static size_t InputLen;
+static size_t NumFuncs;
+static const uintptr_t *FuncsBeg;
+static __thread size_t CurrentFunc;
+static dfsan_label *FuncLabels;  // Array of NumFuncs elements.
+
+// Prints all instrumented functions.
+int PrintFunctions() {
+  // We don't have the symbolizer integrated with dfsan yet.
+  // So use backtrace_symbols_fd and pipe it through llvm-symbolizer.
+  // TODO(kcc): this is pretty ugly and may break in lots of ways.
+  //      We'll need to make a proper in-process symbolizer work with DFSan.
+  FILE *Pipe = popen("sed 's/(+/ /g; s/).*//g' "
+                     "| llvm-symbolizer "
+                     "| grep 'dfs\\$' "
+                     "| sed 's/dfs\\$//g'", "w");
+  for (size_t I = 0; I < NumFuncs; I++) {
+    uintptr_t PC = FuncsBeg[I * 2];
+    void *const Buf[1] = {(void*)PC};
+    backtrace_symbols_fd(Buf, 1, fileno(Pipe));
+  }
+  pclose(Pipe);
+  return 0;
+}
+
+void PrintDataFlow(FILE *Out) {
+  fprintf(Out, "LEN:    %zd\n", InputLen);
+  fprintf(Out, "LABELS: %zd\n", dfsan_get_label_count());
+  for (dfsan_label L = InputLen + 2; L <= dfsan_get_label_count(); L++) {
+    auto *DLI = dfsan_get_label_info(L);
+    fprintf(Out, "L%d %d %d\n", L, DLI->l1, DLI->l2);
+  }
+  for (size_t I = 0; I < NumFuncs; I++)
+    if (FuncLabels[I])
+      fprintf(Out, "F%zd %d\n", I, FuncLabels[I]);
+}
+
+int main(int argc, char **argv) {
+  if (LLVMFuzzerInitialize)
+    LLVMFuzzerInitialize(&argc, &argv);
+  if (argc == 1)
+    return PrintFunctions();
+  assert(argc == 2 || argc == 3);
+
+  const char *Input = argv[1];
+  fprintf(stderr, "INFO: reading '%s'\n", Input);
+  FILE *In = fopen(Input, "r");
+  assert(In);
+  fseek(In, 0, SEEK_END);
+  InputLen = ftell(In);
+  fseek(In, 0, SEEK_SET);
+  unsigned char *Buf = (unsigned char*)malloc(InputLen);
+  size_t NumBytesRead = fread(Buf, 1, InputLen, In);
+  assert(NumBytesRead == InputLen);
+  fclose(In);
+
+  fprintf(stderr, "INFO: running '%s'\n", Input);
+  for (size_t I = 1; I <= InputLen; I++) {
+    dfsan_label L = dfsan_create_label("", nullptr);
+    assert(L == I);
+    dfsan_set_label(L, Buf + I - 1, 1);
+  }
+  dfsan_label SizeL = dfsan_create_label("", nullptr);
+  assert(SizeL == InputLen + 1);
+  dfsan_set_label(SizeL, &InputLen, sizeof(InputLen));
+
+  LLVMFuzzerTestOneInput(Buf, InputLen);
+  free(Buf);
+
+  bool OutIsStdout = argc == 2;
+  fprintf(stderr, "INFO: writing dataflow to %s\n",
+          OutIsStdout ? "<stdout>" : argv[2]);
+  FILE *Out = OutIsStdout ? stdout : fopen(argv[2], "w");
+  PrintDataFlow(Out);
+  if (!OutIsStdout) fclose(Out);
+}
+
+extern "C" {
+
+void __sanitizer_cov_trace_pc_guard_init(uint32_t *start,
+                                         uint32_t *stop) {
+  assert(NumFuncs == 0 && "This tool does not support DSOs");
+  assert(start < stop && "The code is not instrumented for coverage");
+  if (start == stop || *start) return;  // Initialize only once.
+  for (uint32_t *x = start; x < stop; x++)
+    *x = ++NumFuncs;  // The first index is 1.
+  FuncLabels = (dfsan_label*)calloc(NumFuncs, sizeof(dfsan_label));
+  fprintf(stderr, "INFO: %zd instrumented function(s) observed\n", NumFuncs);
+}
+
+void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg,
+                              const uintptr_t *pcs_end) {
+  assert(NumFuncs == (pcs_end - pcs_beg) / 2);
+  FuncsBeg = pcs_beg;
+}
+
+void __sanitizer_cov_trace_pc_indir(uint64_t x){}  // unused.
+
+void __sanitizer_cov_trace_pc_guard(uint32_t *guard){
+  uint32_t FuncNum = *guard - 1;  // Guards start from 1.
+  assert(FuncNum < NumFuncs);
+  CurrentFunc = FuncNum;
+}
+
+void __dfsw___sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases,
+                                         dfsan_label L1, dfsan_label UnusedL) {
+  assert(CurrentFunc < NumFuncs);
+  FuncLabels[CurrentFunc] = dfsan_union(FuncLabels[CurrentFunc], L1);
+}
+
+#define HOOK(Name, Type)                                                       \
+  void Name(Type Arg1, Type Arg2, dfsan_label L1, dfsan_label L2) {            \
+    assert(CurrentFunc < NumFuncs);                                            \
+    FuncLabels[CurrentFunc] =                                                  \
+        dfsan_union(FuncLabels[CurrentFunc], dfsan_union(L1, L2));             \
+  }
+
+HOOK(__dfsw___sanitizer_cov_trace_const_cmp1, uint8_t)
+HOOK(__dfsw___sanitizer_cov_trace_const_cmp2, uint16_t)
+HOOK(__dfsw___sanitizer_cov_trace_const_cmp4, uint32_t)
+HOOK(__dfsw___sanitizer_cov_trace_const_cmp8, uint64_t)
+HOOK(__dfsw___sanitizer_cov_trace_cmp1, uint8_t)
+HOOK(__dfsw___sanitizer_cov_trace_cmp2, uint16_t)
+HOOK(__dfsw___sanitizer_cov_trace_cmp4, uint32_t)
+HOOK(__dfsw___sanitizer_cov_trace_cmp8, uint64_t)
+
+} // extern "C"

Added: compiler-rt/trunk/test/fuzzer/ThreeFunctionsTest.cpp
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/test/fuzzer/ThreeFunctionsTest.cpp?rev=332029&view=auto
==============================================================================
--- compiler-rt/trunk/test/fuzzer/ThreeFunctionsTest.cpp (added)
+++ compiler-rt/trunk/test/fuzzer/ThreeFunctionsTest.cpp Thu May 10 12:59:01 2018
@@ -0,0 +1,34 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Find "FUZZME", the target has 3 different functions.
+#include <assert.h>
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <cstdio>
+
+__attribute__((noinline))
+static bool Func1(const uint8_t *Data, size_t Size) {
+  // assumes Size >= 5, doesn't check it.
+  return Data[4] == 'M';
+}
+
+__attribute__((noinline))
+bool Func2(const uint8_t *Data, size_t Size) {
+  return Size >= 6 && Data[5] == 'E';
+}
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size >= 5
+      && Data[0] == 'F'
+      && Data[1] == 'U'
+      && Data[2] == 'Z'
+      && Data[3] == 'Z'
+      && Func1(Data, Size)
+      && Func2(Data, Size)) {
+        fprintf(stderr, "BINGO\n");
+        abort();
+  }
+  return 0;
+}

Added: compiler-rt/trunk/test/fuzzer/dataflow.test
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/test/fuzzer/dataflow.test?rev=332029&view=auto
==============================================================================
--- compiler-rt/trunk/test/fuzzer/dataflow.test (added)
+++ compiler-rt/trunk/test/fuzzer/dataflow.test Thu May 10 12:59:01 2018
@@ -0,0 +1,76 @@
+# Tests the data flow tracer.
+REQUIRES: linux
+
+# Build the tracer and the test.
+RUN: %no_fuzzer_cpp_compiler -c -fno-sanitize=all -fsanitize=dataflow -fsanitize-coverage=trace-pc-guard,pc-table,func,trace-cmp   %S/ThreeFunctionsTest.cpp -o %t-ThreeFunctionsTest.o
+RUN: %no_fuzzer_cpp_compiler    -fno-sanitize=all -fsanitize=dataflow  %t-ThreeFunctionsTest.o %S/../../lib/fuzzer/dataflow/DataFlow.cpp -o  %t-ThreeFunctionsTestDF
+
+# Dump the function list.
+RUN:  %t-ThreeFunctionsTestDF 2>&1 | FileCheck %s --check-prefix=FUNC_LIST
+FUNC_LIST-DAG: LLVMFuzzerTestOneInput
+FUNC_LIST-DAG: Func1
+FUNC_LIST-DAG: Func2
+
+# Prepare the inputs.
+RUN: rm -rf %t/IN
+RUN: mkdir -p %t/IN
+RUN: echo -n ABC    > %t/IN/ABC
+RUN: echo -n FUABC  > %t/IN/FUABC
+RUN: echo -n FUZZR  > %t/IN/FUZZR
+RUN: echo -n FUZZM  > %t/IN/FUZZM
+RUN: echo -n FUZZMU > %t/IN/FUZZMU
+
+# ABC: No data is used, the only used label is 4 (corresponds to the size)
+RUN:%t-ThreeFunctionsTestDF %t/IN/ABC    | FileCheck %s --check-prefix=IN_ABC
+IN_ABC: LEN: 3
+IN_ABC: LABELS: 4
+IN_ABC: F{{[012]}} 4
+IN_ABC-NOT: F
+
+# FUABC: First 3 bytes are checked, Func1/Func2 are not called.
+RUN:%t-ThreeFunctionsTestDF %t/IN/FUABC  | FileCheck %s --check-prefix=IN_FUABC
+IN_FUABC: LEN: 5
+IN_FUABC: LABELS:
+IN_FUABC: L{{.*}} 1
+IN_FUABC: L{{.*}} 2
+IN_FUABC: L{{.*}} 3
+IN_FUABC-NOT: L{{.*}} 4
+IN_FUABC: F{{[012]}}
+IN_FUABC-NOT: F
+
+# FUZZR: 5 bytes are used (4 in one function, 5-th in the other), Func2 is not called.
+RUN:%t-ThreeFunctionsTestDF %t/IN/FUZZR  | FileCheck %s --check-prefix=IN_FUZZR
+IN_FUZZR: LEN: 5
+IN_FUZZR: LABELS:
+IN_FUZZR: L{{.*}} 1
+IN_FUZZR: L{{.*}} 2
+IN_FUZZR: L{{.*}} 3
+IN_FUZZR: L[[L0:[0-9]*]] 4
+IN_FUZZR-DAG: F{{[012]}} 5
+IN_FUZZR-DAG: F{{[012]}} [[L0]]
+IN_FUZZR-NOT: F
+
+# FUZZM: 5 bytes are used, both Func1 and Func2 are called, Func2 depends only on size (label 6).
+RUN:%t-ThreeFunctionsTestDF %t/IN/FUZZM  | FileCheck %s --check-prefix=IN_FUZZM
+IN_FUZZM: LEN: 5
+IN_FUZZM: LABELS:
+IN_FUZZM: L{{.*}} 1
+IN_FUZZM: L{{.*}} 2
+IN_FUZZM: L{{.*}} 3
+IN_FUZZM: L{{.*}} 4
+IN_FUZZM-DAG: F{{[012]}} 6
+IN_FUZZM-DAG: F{{[012]}} 5
+IN_FUZZM-DAG: F
+
+# FUZZMU: 6 bytes are used, both Func1 and Func2 are called, Func2 depends on byte 6 and size (label 7)
+RUN:%t-ThreeFunctionsTestDF %t/IN/FUZZMU  | FileCheck %s --check-prefix=IN_FUZZMU
+IN_FUZZMU: LEN: 6
+IN_FUZZMU: LABELS:
+IN_FUZZMU: L{{.*}} 1
+IN_FUZZMU: L{{.*}} 2
+IN_FUZZMU: L{{.*}} 3
+IN_FUZZMU: L{{.*}} 4
+IN_FUZZMU: L[[L2:[0-9]*]] 6 7
+IN_FUZZMU-DAG: F{{[012]}} 5
+IN_FUZZMU-DAG: F{{[012]}} [[L2]]
+IN_FUZZMU-DAG: F




More information about the llvm-commits mailing list