[compiler-rt] r360213 - [libFuzzer] extend the data flow tracer to also produce basic block coverage for every input. An extended test coming in a separte change.

Tue May 7 17:51:16 PDT 2019

Author: kcc
Date: Tue May  7 17:51:15 2019
New Revision: 360213

URL: http://llvm.org/viewvc/llvm-project?rev=360213&view=rev
Log:
[libFuzzer] extend the data flow tracer to also produce basic block coverage for every input. An extended test coming in a separte change.

Modified:
    compiler-rt/trunk/lib/fuzzer/dataflow/DataFlow.cpp
    compiler-rt/trunk/lib/fuzzer/scripts/merge_data_flow.py
    compiler-rt/trunk/test/fuzzer/ThreeFunctionsTest.cpp
    compiler-rt/trunk/test/fuzzer/dataflow.test
    compiler-rt/trunk/test/fuzzer/simple-cmp.test

Modified: compiler-rt/trunk/lib/fuzzer/dataflow/DataFlow.cpp
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/fuzzer/dataflow/DataFlow.cpp?rev=360213&r1=360212&r2=360213&view=diff
==============================================================================

--- compiler-rt/trunk/lib/fuzzer/dataflow/DataFlow.cpp (original)
+++ compiler-rt/trunk/lib/fuzzer/dataflow/DataFlow.cpp Tue May  7 17:51:15 2019
@@ -13,12 +13,13 @@
 // It executes the fuzz target on the given input while monitoring the
 // data flow for every instrumented comparison instruction.
 //
-// The output shows which functions depend on which bytes of the input.
+// The output shows which functions depend on which bytes of the input,
+// and also provides basic-block coverage for every input.
 //
 // Build:
 //   1. Compile this file with -fsanitize=dataflow
 //   2. Build the fuzz target with -g -fsanitize=dataflow
-//       -fsanitize-coverage=trace-pc-guard,pc-table,func,trace-cmp
+//       -fsanitize-coverage=trace-pc-guard,pc-table,bb,trace-cmp
 //   3. Link those together with -fsanitize=dataflow
 //
 //  -fsanitize-coverage=trace-cmp inserts callbacks around every comparison
@@ -26,13 +27,15 @@
 //  The callbacks update the data flow label for the current function.
 //  See e.g. __dfsw___sanitizer_cov_trace_cmp1 below.
 //
-//  -fsanitize-coverage=trace-pc-guard,pc-table,func instruments function
+//  -fsanitize-coverage=trace-pc-guard,pc-table,bb instruments function
 //  entries so that the comparison callback knows that current function.
+//  -fsanitize-coverage=...,bb also allows to collect basic block coverage.
 //
 //
 // Run:
-//   # Collect data flow for INPUT_FILE, write to OUTPUT_FILE (default: stdout)
-//   ./a.out INPUT_FILE [OUTPUT_FILE]
+//   # Collect data flow and coverage for INPUT_FILE
+//   # write to OUTPUT_FILE (default: stdout)
+//   ./a.out FIRST_LABEL LAST_LABEL INPUT_FILE [OUTPUT_FILE]
 //
 //   # Print all instrumented functions. llvm-symbolizer must be present in PATH
 //   ./a.out
@@ -41,10 +44,15 @@
 // ===============
 //  F0 11111111111111
 //  F1 10000000000000
+//  C0 1 2 3 4
+//  C1
 //  ===============
 // "FN xxxxxxxxxx": tells what bytes of the input does the function N depend on.
 //    The byte string is LEN+1 bytes. The last byte is set if the function
 //    depends on the input length.
+// "CN X Y Z": tells that a function N has basic blocks X, Y, and Z covered
+//    in addition to the function's entry block.
+//
 //===----------------------------------------------------------------------===*/
 
 #include <assert.h>
@@ -66,13 +74,19 @@ static size_t InputLen;
 static size_t InputLabelBeg;
 static size_t InputLabelEnd;
 static size_t InputSizeLabel;
-static size_t NumFuncs;
-static const uintptr_t *FuncsBeg;
+static size_t NumFuncs, NumGuards;
+static uint32_t *GuardsBeg, *GuardsEnd;
+static const uintptr_t *PCsBeg, *PCsEnd;
 static __thread size_t CurrentFunc;
 static dfsan_label *FuncLabels;  // Array of NumFuncs elements.
+static bool *BBExecuted;  // Array of NumGuards elements.
 static char *PrintableStringForLabel;  // InputLen + 2 bytes.
 static bool LabelSeen[1 << 8 * sizeof(dfsan_label)];
 
+enum {
+  PCFLAG_FUNC_ENTRY = 1,
+};
+
 // Prints all instrumented functions.
 static int PrintFunctions() {
   // We don't have the symbolizer integrated with dfsan yet.
@@ -83,8 +97,10 @@ static int PrintFunctions() {
                      "| llvm-symbolizer "
                      "| grep 'dfs\\$' "
                      "| sed 's/dfs\\$//g'", "w");
-  for (size_t I = 0; I < NumFuncs; I++) {
-    uintptr_t PC = FuncsBeg[I * 2];
+  for (size_t I = 0; I < NumGuards; I++) {
+    uintptr_t PC = PCsBeg[I * 2];
+    uintptr_t PCFlags = PCsBeg[I * 2 + 1];
+    if (!(PCFlags & PCFLAG_FUNC_ENTRY)) continue;
     void *const Buf[1] = {(void*)PC};
     backtrace_symbols_fd(Buf, 1, fileno(Pipe));
   }
@@ -123,6 +139,28 @@ static void PrintDataFlow(FILE *Out) {
       fprintf(Out, "F%zd %s\n", I, GetPrintableStringForLabel(FuncLabels[I]));
 }
 
+static void PrintCoverage(FILE *Out) {
+  ssize_t CurrentFuncGuard = -1;
+  ssize_t CurrentFuncNum = -1;
+  int NumFuncsCovered = 0;
+  for (size_t I = 0; I < NumGuards; I++) {
+    bool IsEntry = PCsBeg[I * 2 + 1] & PCFLAG_FUNC_ENTRY;
+    if (IsEntry) {
+      CurrentFuncNum++;
+      CurrentFuncGuard = I;
+    }
+    if (!BBExecuted[I]) continue;
+    if (IsEntry) {
+      if (NumFuncsCovered) fprintf(Out, "\n");
+      fprintf(Out, "C%zd ", CurrentFuncNum);
+      NumFuncsCovered++;
+    } else {
+      fprintf(Out, "%zd ", I - CurrentFuncGuard);
+    }
+  }
+  fprintf(Out, "\n");
+}
+
 int main(int argc, char **argv) {
   if (LLVMFuzzerInitialize)
     LLVMFuzzerInitialize(&argc, &argv);
@@ -168,6 +206,7 @@ int main(int argc, char **argv) {
           OutIsStdout ? "<stdout>" : argv[4]);
   FILE *Out = OutIsStdout ? stdout : fopen(argv[4], "w");
   PrintDataFlow(Out);
+  PrintCoverage(Out);
   if (!OutIsStdout) fclose(Out);
 }
 
@@ -178,21 +217,36 @@ void __sanitizer_cov_trace_pc_guard_init
   assert(NumFuncs == 0 && "This tool does not support DSOs");
   assert(start < stop && "The code is not instrumented for coverage");
   if (start == stop || *start) return;  // Initialize only once.
-  for (uint32_t *x = start; x < stop; x++)
-    *x = ++NumFuncs;  // The first index is 1.
-  FuncLabels = (dfsan_label*)calloc(NumFuncs, sizeof(dfsan_label));
-  fprintf(stderr, "INFO: %zd instrumented function(s) observed\n", NumFuncs);
+  GuardsBeg = start;
+  GuardsEnd = stop;
 }
 
 void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg,
                               const uintptr_t *pcs_end) {
-  assert(NumFuncs == (pcs_end - pcs_beg) / 2);
-  FuncsBeg = pcs_beg;
+  if (NumGuards) return;  // Initialize only once.
+  NumGuards = GuardsEnd - GuardsBeg;
+  PCsBeg = pcs_beg;
+  PCsEnd = pcs_end;
+  assert(NumGuards == (PCsEnd - PCsBeg) / 2);
+  for (size_t i = 0; i < NumGuards; i++) {
+    if (PCsBeg[i * 2 + 1] & PCFLAG_FUNC_ENTRY) {
+      NumFuncs++;
+      GuardsBeg[i] = NumFuncs;
+    }
+  }
+  FuncLabels = (dfsan_label*)calloc(NumFuncs, sizeof(dfsan_label));
+  BBExecuted = (bool*)calloc(NumGuards, sizeof(bool));
+  fprintf(stderr, "INFO: %zd instrumented function(s) observed "
+          "and %zd basic blocks\n", NumFuncs, NumGuards);
 }
 
 void __sanitizer_cov_trace_pc_indir(uint64_t x){}  // unused.
 
-void __sanitizer_cov_trace_pc_guard(uint32_t *guard){
+void __sanitizer_cov_trace_pc_guard(uint32_t *guard) {
+  size_t GuardIdx = guard - GuardsBeg;
+  assert(GuardIdx < NumGuards);
+  BBExecuted[GuardIdx] = true;
+  if (!*guard) return;  // not a function entry.
   uint32_t FuncNum = *guard - 1;  // Guards start from 1.
   assert(FuncNum < NumFuncs);
   CurrentFunc = FuncNum;

Modified: compiler-rt/trunk/lib/fuzzer/scripts/merge_data_flow.py
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/fuzzer/scripts/merge_data_flow.py?rev=360213&r1=360212&r2=360213&view=diff
==============================================================================
--- compiler-rt/trunk/lib/fuzzer/scripts/merge_data_flow.py (original)
+++ compiler-rt/trunk/lib/fuzzer/scripts/merge_data_flow.py Tue May  7 17:51:15 2019
@@ -23,6 +23,8 @@ def Merge(a, b):
 def main(argv):
   D = {}
   for line in fileinput.input():
+    if line.startswith('C'):
+      continue
     [F,BV] = line.strip().split(' ')
     if F in D:
       D[F] = Merge(D[F], BV)

Modified: compiler-rt/trunk/test/fuzzer/ThreeFunctionsTest.cpp
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/test/fuzzer/ThreeFunctionsTest.cpp?rev=360213&r1=360212&r2=360213&view=diff
==============================================================================
--- compiler-rt/trunk/test/fuzzer/ThreeFunctionsTest.cpp (original)
+++ compiler-rt/trunk/test/fuzzer/ThreeFunctionsTest.cpp Tue May  7 17:51:15 2019
@@ -3,24 +3,21 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 // Find "FUZZME", the target has 3 different functions.
+//
+// This test, and the accompanying lit tests
+// (dataflow.test, only-some-bytes.test) assume that the compiler
+// instruments functions in their lexical order.
+// This assumption is not guaranteed, but it is likely to hold.
+// It allows to simplify the test quite a bit: in the lit tests
+// LLVMFuzzerTestOneInput is "F0", Func1 is "F1", Func2 is "F2".
 #include <assert.h>
 #include <cstddef>
 #include <cstdint>
 #include <cstdlib>
 #include <cstdio>
 
-extern "C"
-__attribute__((noinline))
-bool Func1(const uint8_t *Data, size_t Size) {
-  // assumes Size >= 5, doesn't check it.
-  return Data[4] == 'M';
-}
-
-extern "C"
-__attribute__((noinline))
-bool Func2(const uint8_t *Data, size_t Size) {
-  return Size >= 6 && Data[5] == 'E';
-}
+extern "C" bool Func1(const uint8_t *Data, size_t Size);
+extern "C" bool Func2(const uint8_t *Data, size_t Size);
 
 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
   if (Size >= 5
@@ -35,3 +32,18 @@ extern "C" int LLVMFuzzerTestOneInput(co
   }
   return 0;
 }
+
+extern "C"
+__attribute__((noinline))
+bool Func1(const uint8_t *Data, size_t Size) {
+  // assumes Size >= 5, doesn't check it.
+  return Data[4] == 'M';
+}
+
+extern "C"
+__attribute__((noinline))
+bool Func2(const uint8_t *Data, size_t Size) {
+  return Size >= 6 && Data[5] == 'E';
+}
+
+

Modified: compiler-rt/trunk/test/fuzzer/dataflow.test
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/test/fuzzer/dataflow.test?rev=360213&r1=360212&r2=360213&view=diff
==============================================================================
--- compiler-rt/trunk/test/fuzzer/dataflow.test (original)
+++ compiler-rt/trunk/test/fuzzer/dataflow.test Tue May  7 17:51:15 2019
@@ -3,8 +3,8 @@ REQUIRES: linux, x86_64
 
 # Build the tracer and the test.
 RUN: %no_fuzzer_cpp_compiler -c -fno-sanitize=all -fsanitize=dataflow  %S/../../lib/fuzzer/dataflow/DataFlow.cpp -o  %t-DataFlow.o
-RUN: %no_fuzzer_cpp_compiler    -fno-sanitize=all -fsanitize=dataflow -fsanitize-coverage=trace-pc-guard,pc-table,func,trace-cmp   %S/ThreeFunctionsTest.cpp     %t-DataFlow.o -o %t-ThreeFunctionsTestDF
-RUN: %no_fuzzer_cpp_compiler    -fno-sanitize=all -fsanitize=dataflow -fsanitize-coverage=trace-pc-guard,pc-table,func,trace-cmp   %S/ExplodeDFSanLabelsTest.cpp %t-DataFlow.o -o %t-ExplodeDFSanLabelsTestDF
+RUN: %no_fuzzer_cpp_compiler    -fno-sanitize=all -fsanitize=dataflow -fsanitize-coverage=trace-pc-guard,pc-table,bb,trace-cmp   %S/ThreeFunctionsTest.cpp     %t-DataFlow.o -o %t-ThreeFunctionsTestDF
+RUN: %no_fuzzer_cpp_compiler    -fno-sanitize=all -fsanitize=dataflow -fsanitize-coverage=trace-pc-guard,pc-table,bb,trace-cmp   %S/ExplodeDFSanLabelsTest.cpp %t-DataFlow.o -o %t-ExplodeDFSanLabelsTestDF
 RUN: %cpp_compiler %S/ThreeFunctionsTest.cpp -o %t-ThreeFunctionsTest
 
 # Dump the function list.
@@ -23,27 +23,33 @@ RUN: echo -n FUZZM  > %t/IN/FUZZM
 RUN: echo -n FUZZMU > %t/IN/FUZZMU
 RUN: echo -n 1234567890123456 > %t/IN/1234567890123456
 
+# This test assumes that the functions in ThreeFunctionsTestDF are instrumented
+# in a specific order:
+# LLVMFuzzerTestOneInput: F0
+# Func1: F1
+# Func2: F2
+
 # ABC: No data is used, the only used label is 4 (corresponds to the size)
 RUN:%t-ThreeFunctionsTestDF 0 3 %t/IN/ABC    | FileCheck %s --check-prefix=IN_ABC
-IN_ABC: F{{[012]}} 0001
+IN_ABC: F0 0001
 IN_ABC-NOT: F
 
 # FUABC: First 3 bytes are checked, Func1/Func2 are not called.
 RUN:%t-ThreeFunctionsTestDF 0 5 %t/IN/FUABC  | FileCheck %s --check-prefix=IN_FUABC
-IN_FUABC: F{{[012]}} 111001
+IN_FUABC: F0 111001
 IN_FUABC-NOT: F
 
 # FUZZR: 5 bytes are used (4 in one function, 5-th in the other), Func2 is not called.
 RUN:%t-ThreeFunctionsTestDF 0 5 %t/IN/FUZZR  | FileCheck %s --check-prefix=IN_FUZZR
-IN_FUZZR-DAG: F{{[012]}} 111101
-IN_FUZZR-DAG: F{{[012]}} 000010
+IN_FUZZR: F0 111101
+IN_FUZZR: F1 000010
 IN_FUZZR-NOT: F
 
 # FUZZM: 5 bytes are used, both Func1 and Func2 are called, Func2 depends only on size (label 6).
 RUN:%t-ThreeFunctionsTestDF 0 5 %t/IN/FUZZM  | FileCheck %s --check-prefix=IN_FUZZM
-IN_FUZZM-DAG: F{{[012]}} 000010
-IN_FUZZM-DAG: F{{[012]}} 111101
-IN_FUZZM-DAG: F{{[012]}} 000001
+IN_FUZZM: F0 111101
+IN_FUZZM: F1 000010
+IN_FUZZM: F2 000001
 
 # FUZZMU: 6 bytes are used, both Func1 and Func2 are called, Func2 depends on byte 6 and size (label 7)
 RUN:%t-ThreeFunctionsTestDF 0 6 %t/IN/FUZZMU  | FileCheck %s --check-prefix=IN_FUZZMU
@@ -53,14 +59,14 @@ RUN:rm -f %t-merge-*
 RUN:%t-ThreeFunctionsTestDF 0 2 %t/IN/FUZZMU > %t-merge-1
 RUN:%t-ThreeFunctionsTestDF 2 4 %t/IN/FUZZMU > %t-merge-2
 RUN:%t-ThreeFunctionsTestDF 4 6 %t/IN/FUZZMU > %t-merge-3
-RUN:%libfuzzer_src/scripts/merge_data_flow.py  %t-merge-* | FileCheck %s --check-prefix=IN_FUZZMU
+RUN:%libfuzzer_src/scripts/merge_data_flow.py  %t-merge-* | sort | FileCheck %s --check-prefix=IN_FUZZMU
 
 # Test collect_data_flow
 RUN: %libfuzzer_src/scripts/collect_data_flow.py %t-ThreeFunctionsTestDF %t/IN/FUZZMU | FileCheck %s --check-prefix=IN_FUZZMU
 
-IN_FUZZMU-DAG: F{{[012]}} 0000100
-IN_FUZZMU-DAG: F{{[012]}} 1111001
-IN_FUZZMU-DAG: F{{[012]}} 0000011
+IN_FUZZMU: F0 1111001
+IN_FUZZMU: F1 0000100
+IN_FUZZMU: F2 0000011
 
 # A very simple test will cause DFSan to die with "out of labels"
 RUN: not %t-ExplodeDFSanLabelsTestDF 0 16 %t/IN/1234567890123456 2>&1 | FileCheck %s --check-prefix=OUT_OF_LABELS

Modified: compiler-rt/trunk/test/fuzzer/simple-cmp.test
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/test/fuzzer/simple-cmp.test?rev=360213&r1=360212&r2=360213&view=diff
==============================================================================
--- compiler-rt/trunk/test/fuzzer/simple-cmp.test (original)
+++ compiler-rt/trunk/test/fuzzer/simple-cmp.test Tue May  7 17:51:15 2019
@@ -1,3 +1,4 @@
+REQUIRES: linux, x86_64
 RUN: %cpp_compiler %S/SimpleCmpTest.cpp -o %t-SimpleCmpTest
 
 RUN: not %run %t-SimpleCmpTest -seed=1 -runs=100000000 2>&1 | FileCheck %s