[compiler-rt] r337434 - [libFuzzer] first experimental attempt at DFT-based mutations (DFT=data-flow-trace)

Kostya Serebryany via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 18 18:23:32 PDT 2018


Author: kcc
Date: Wed Jul 18 18:23:32 2018
New Revision: 337434

URL: http://llvm.org/viewvc/llvm-project?rev=337434&view=rev
Log:
[libFuzzer] first experimental attempt at DFT-based mutations (DFT=data-flow-trace)

Modified:
    compiler-rt/trunk/lib/fuzzer/FuzzerCorpus.h
    compiler-rt/trunk/lib/fuzzer/FuzzerDataFlowTrace.cpp
    compiler-rt/trunk/lib/fuzzer/FuzzerDataFlowTrace.h
    compiler-rt/trunk/lib/fuzzer/FuzzerLoop.cpp
    compiler-rt/trunk/lib/fuzzer/FuzzerMutate.cpp
    compiler-rt/trunk/lib/fuzzer/FuzzerMutate.h
    compiler-rt/trunk/lib/fuzzer/tests/FuzzerUnittest.cpp
    compiler-rt/trunk/test/fuzzer/only-some-bytes.test

Modified: compiler-rt/trunk/lib/fuzzer/FuzzerCorpus.h
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/fuzzer/FuzzerCorpus.h?rev=337434&r1=337433&r2=337434&view=diff
==============================================================================
--- compiler-rt/trunk/lib/fuzzer/FuzzerCorpus.h (original)
+++ compiler-rt/trunk/lib/fuzzer/FuzzerCorpus.h Wed Jul 18 18:23:32 2018
@@ -38,7 +38,7 @@ struct InputInfo {
   bool Reduced = false;
   bool HasFocusFunction = false;
   Vector<uint32_t> UniqFeatureSet;
-  Vector<bool> DataFlowTraceForFocusFunction;
+  Vector<uint8_t> DataFlowTraceForFocusFunction;
 };
 
 class InputCorpus {
@@ -88,7 +88,7 @@ class InputCorpus {
   const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; }
   void AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile,
                    bool HasFocusFunction, const Vector<uint32_t> &FeatureSet,
-                   const DataFlowTrace &DFT) {
+                   const DataFlowTrace &DFT, const InputInfo *BaseII) {
     assert(!U.empty());
     if (FeatureDebug)
       Printf("ADD_TO_CORPUS %zd NF %zd\n", Inputs.size(), NumFeatures);
@@ -106,6 +106,11 @@ class InputCorpus {
     if (HasFocusFunction)
       if (auto V = DFT.Get(Sha1Str))
         II.DataFlowTraceForFocusFunction = *V;
+    // This is a gross heuristic.
+    // Ideally, when we add an element to a corpus we need to know its DFT.
+    // But if we don't, we'll use the DFT of its base input.
+    if (II.DataFlowTraceForFocusFunction.empty() && BaseII)
+      II.DataFlowTraceForFocusFunction = BaseII->DataFlowTraceForFocusFunction;
     UpdateCorpusDistribution();
     PrintCorpus();
     // ValidateFeatureSet();

Modified: compiler-rt/trunk/lib/fuzzer/FuzzerDataFlowTrace.cpp
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/fuzzer/FuzzerDataFlowTrace.cpp?rev=337434&r1=337433&r2=337434&view=diff
==============================================================================
--- compiler-rt/trunk/lib/fuzzer/FuzzerDataFlowTrace.cpp (original)
+++ compiler-rt/trunk/lib/fuzzer/FuzzerDataFlowTrace.cpp Wed Jul 18 18:23:32 2018
@@ -67,7 +67,7 @@ void DataFlowTrace::Init(const std::stri
         const char *End = L.c_str() + L.size();
         assert(Beg < End);
         size_t Len = End - Beg;
-        Vector<bool> V(Len);
+        Vector<uint8_t> V(Len);
         for (size_t I = 0; I < Len; I++) {
           if (Beg[I] != '0' && Beg[I] != '1')
             ParseError("the trace should contain only 0 or 1");

Modified: compiler-rt/trunk/lib/fuzzer/FuzzerDataFlowTrace.h
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/fuzzer/FuzzerDataFlowTrace.h?rev=337434&r1=337433&r2=337434&view=diff
==============================================================================
--- compiler-rt/trunk/lib/fuzzer/FuzzerDataFlowTrace.h (original)
+++ compiler-rt/trunk/lib/fuzzer/FuzzerDataFlowTrace.h Wed Jul 18 18:23:32 2018
@@ -40,7 +40,7 @@ class DataFlowTrace {
  public:
   void Init(const std::string &DirPath, const std::string &FocusFunction);
   void Clear() { Traces.clear(); }
-  const Vector<bool> *Get(const std::string &InputSha1) const {
+  const Vector<uint8_t> *Get(const std::string &InputSha1) const {
     auto It = Traces.find(InputSha1);
     if (It != Traces.end())
       return &It->second;
@@ -49,7 +49,7 @@ class DataFlowTrace {
 
  private:
   // Input's sha1 => DFT for the FocusFunction.
-  std::unordered_map<std::string, Vector<bool> > Traces;
+  std::unordered_map<std::string, Vector<uint8_t> > Traces;
 };
 }  // namespace fuzzer
 

Modified: compiler-rt/trunk/lib/fuzzer/FuzzerLoop.cpp
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/fuzzer/FuzzerLoop.cpp?rev=337434&r1=337433&r2=337434&view=diff
==============================================================================
--- compiler-rt/trunk/lib/fuzzer/FuzzerLoop.cpp (original)
+++ compiler-rt/trunk/lib/fuzzer/FuzzerLoop.cpp Wed Jul 18 18:23:32 2018
@@ -503,8 +503,7 @@ bool Fuzzer::RunOne(const uint8_t *Data,
   if (NumNewFeatures) {
     TPC.UpdateObservedPCs();
     Corpus.AddToCorpus({Data, Data + Size}, NumNewFeatures, MayDeleteFile,
-                       TPC.ObservedFocusFunction(),
-                       UniqFeatureSetTmp, DFT);
+                       TPC.ObservedFocusFunction(), UniqFeatureSetTmp, DFT, II);
     return true;
   }
   if (II && FoundUniqFeaturesOfII &&
@@ -687,7 +686,12 @@ void Fuzzer::MutateAndTestOne() {
       break;
     MaybeExitGracefully();
     size_t NewSize = 0;
-    NewSize = MD.Mutate(CurrentUnitData, Size, CurrentMaxMutationLen);
+    if (II.HasFocusFunction && !II.DataFlowTraceForFocusFunction.empty() &&
+        Size <= CurrentMaxMutationLen)
+      NewSize = MD.MutateWithMask(CurrentUnitData, Size, Size,
+                                  II.DataFlowTraceForFocusFunction);
+    else
+      NewSize = MD.Mutate(CurrentUnitData, Size, CurrentMaxMutationLen);
     assert(NewSize > 0 && "Mutator returned empty unit");
     assert(NewSize <= CurrentMaxMutationLen && "Mutator return oversized unit");
     Size = NewSize;

Modified: compiler-rt/trunk/lib/fuzzer/FuzzerMutate.cpp
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/fuzzer/FuzzerMutate.cpp?rev=337434&r1=337433&r2=337434&view=diff
==============================================================================
--- compiler-rt/trunk/lib/fuzzer/FuzzerMutate.cpp (original)
+++ compiler-rt/trunk/lib/fuzzer/FuzzerMutate.cpp Wed Jul 18 18:23:32 2018
@@ -529,6 +529,33 @@ size_t MutationDispatcher::MutateImpl(ui
   return 1;   // Fallback, should not happen frequently.
 }
 
+// Mask represents the set of Data bytes that are worth mutating.
+size_t MutationDispatcher::MutateWithMask(uint8_t *Data, size_t Size,
+                                          size_t MaxSize,
+                                          const Vector<uint8_t> &Mask) {
+  assert(Size <= Mask.size());
+  // * Copy the worthy bytes into a temporary array T
+  // * Mutate T
+  // * Copy T back.
+  // This is totally unoptimized.
+  auto &T = MutateWithMaskTemp;
+  if (T.size() < Size)
+    T.resize(Size);
+  size_t OneBits = 0;
+  for (size_t I = 0; I < Size; I++)
+    if (Mask[I])
+      T[OneBits++] = Data[I];
+
+  assert(!T.empty());
+  size_t NewSize = Mutate(T.data(), OneBits, OneBits);
+  assert(NewSize <= OneBits);
+  // Even if NewSize < OneBits we still use all OneBits bytes.
+  for (size_t I = 0, J = 0; I < Size; I++)
+    if (Mask[I])
+      Data[I] = T[J++];
+  return Size;
+}
+
 void MutationDispatcher::AddWordToManualDictionary(const Word &W) {
   ManualDictionary.push_back(
       {W, std::numeric_limits<size_t>::max()});

Modified: compiler-rt/trunk/lib/fuzzer/FuzzerMutate.h
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/fuzzer/FuzzerMutate.h?rev=337434&r1=337433&r2=337434&view=diff
==============================================================================
--- compiler-rt/trunk/lib/fuzzer/FuzzerMutate.h (original)
+++ compiler-rt/trunk/lib/fuzzer/FuzzerMutate.h Wed Jul 18 18:23:32 2018
@@ -70,6 +70,13 @@ public:
   /// Applies one of the configured mutations.
   /// Returns the new size of data which could be up to MaxSize.
   size_t Mutate(uint8_t *Data, size_t Size, size_t MaxSize);
+
+  /// Applies one of the configured mutations to the bytes of Data
+  /// that have '1' in Mask.
+  /// Mask.size() should be >= Size.
+  size_t MutateWithMask(uint8_t *Data, size_t Size, size_t MaxSize,
+                        const Vector<uint8_t> &Mask);
+
   /// Applies one of the default mutations. Provided as a service
   /// to mutation authors.
   size_t DefaultMutate(uint8_t *Data, size_t Size, size_t MaxSize);
@@ -142,6 +149,7 @@ public:
 
   const InputCorpus *Corpus = nullptr;
   Vector<uint8_t> MutateInPlaceHere;
+  Vector<uint8_t> MutateWithMaskTemp;
   // CustomCrossOver needs its own buffer as a custom implementation may call
   // LLVMFuzzerMutate, which in turn may resize MutateInPlaceHere.
   Vector<uint8_t> CustomCrossOverInPlaceHere;

Modified: compiler-rt/trunk/lib/fuzzer/tests/FuzzerUnittest.cpp
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/fuzzer/tests/FuzzerUnittest.cpp?rev=337434&r1=337433&r2=337434&view=diff
==============================================================================
--- compiler-rt/trunk/lib/fuzzer/tests/FuzzerUnittest.cpp (original)
+++ compiler-rt/trunk/lib/fuzzer/tests/FuzzerUnittest.cpp Wed Jul 18 18:23:32 2018
@@ -588,7 +588,8 @@ TEST(Corpus, Distribution) {
   size_t N = 10;
   size_t TriesPerUnit = 1<<16;
   for (size_t i = 0; i < N; i++)
-    C->AddToCorpus(Unit{ static_cast<uint8_t>(i) }, 1, false, false, {}, DFT);
+    C->AddToCorpus(Unit{static_cast<uint8_t>(i)}, 1, false, false, {}, DFT,
+                   nullptr);
 
   Vector<size_t> Hist(N);
   for (size_t i = 0; i < N * TriesPerUnit; i++) {

Modified: compiler-rt/trunk/test/fuzzer/only-some-bytes.test
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/test/fuzzer/only-some-bytes.test?rev=337434&r1=337433&r2=337434&view=diff
==============================================================================
--- compiler-rt/trunk/test/fuzzer/only-some-bytes.test (original)
+++ compiler-rt/trunk/test/fuzzer/only-some-bytes.test Wed Jul 18 18:23:32 2018
@@ -7,7 +7,7 @@ RUN: %no_fuzzer_cpp_compiler    -fno-san
 RUN: %cpp_compiler %S/OnlySomeBytesTest.cpp -o %t-Fuzz
 
 # Prepare the inputs.
-RUN: rm -rf %t/IN
+RUN: rm -rf %t/*
 RUN: mkdir -p %t/IN
 RUN: echo -n 0123456789012345678901234567890123456789012345678901234567891234 > %t/IN/6
 RUN: cat  %t/IN/6 %t/IN/6 %t/IN/6 %t/IN/6 > %t/IN/8
@@ -22,10 +22,16 @@ RUN: (echo -n ABC; cat %t/IN/12) > %t/IN
 RUN: %t-Fuzz -focus_function=f0 -runs=0 %t/IN 2>&1 | FileCheck %s --check-prefix=ONE_FOCUSED_INPUT
 ONE_FOCUSED_INPUT: INFO: 1/3 inputs touch the focus function
 
-RUN: rm -rf %t/DFT_DIR
-RUN: %libfuzzer_src/scripts/collect_data_flow.py %t-DFT %t/IN %t/DFT_DIR
+RUN: rm -rf %t/IN_DFT
+RUN: %libfuzzer_src/scripts/collect_data_flow.py %t-DFT %t/IN %t/IN_DFT > /dev/null 2>&1
 
-# Repat twice to make sure that the inputs with DFT are not removed from the corpus.
-RUN: %t-Fuzz -focus_function=f0 -data_flow_trace=%t/DFT_DIR -runs=100 %t/IN 2>&1 | FileCheck %s --check-prefix=HAVE_DFT
-RUN: %t-Fuzz -focus_function=f0 -data_flow_trace=%t/DFT_DIR -runs=100 %t/IN 2>&1 | FileCheck %s --check-prefix=HAVE_DFT
+# Repeat twice to make sure that the inputs with DFT are not removed from the corpus.
+RUN: %t-Fuzz -focus_function=f0 -data_flow_trace=%t/IN_DFT -runs=100 %t/IN 2>&1 | FileCheck %s --check-prefix=HAVE_DFT
+RUN: %t-Fuzz -focus_function=f0 -data_flow_trace=%t/IN_DFT -runs=100 %t/IN 2>&1 | FileCheck %s --check-prefix=HAVE_DFT
 HAVE_DFT: INFO: 1/{{.*}} inputs have the Data Flow Trace
+
+# Collect DFT, then use it.
+RUN: rm -rf %t/C  && mkdir %t/C &&  cp %t/IN/* %t/C
+RUN: rm -rf %t/C_DFT && %libfuzzer_src/scripts/collect_data_flow.py %t-DFT %t/C %t/C_DFT > /dev/null 2>&1
+RUN: not %t-Fuzz -focus_function=f0 -data_flow_trace=%t/C_DFT -seed=1 -runs=1000000 -use_value_profile=3 %t/C 2> %t/log
+RUN: grep BINGO %t/log




More information about the llvm-commits mailing list