[llvm] r263769 - [libFuzzer] improve -merge functionality

Kostya Serebryany via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 17 17:23:29 PDT 2016


Author: kcc
Date: Thu Mar 17 19:23:29 2016
New Revision: 263769

URL: http://llvm.org/viewvc/llvm-project?rev=263769&view=rev
Log:
[libFuzzer] improve -merge functionality

Modified:
    llvm/trunk/lib/Fuzzer/FuzzerDriver.cpp
    llvm/trunk/lib/Fuzzer/FuzzerFlags.def
    llvm/trunk/lib/Fuzzer/FuzzerInternal.h
    llvm/trunk/lib/Fuzzer/FuzzerLoop.cpp
    llvm/trunk/lib/Fuzzer/test/fuzzer-traces.test
    llvm/trunk/lib/Fuzzer/test/merge.test

Modified: llvm/trunk/lib/Fuzzer/FuzzerDriver.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/FuzzerDriver.cpp?rev=263769&r1=263768&r2=263769&view=diff
==============================================================================
--- llvm/trunk/lib/Fuzzer/FuzzerDriver.cpp (original)
+++ llvm/trunk/lib/Fuzzer/FuzzerDriver.cpp Thu Mar 17 19:23:29 2016
@@ -284,8 +284,7 @@ static int FuzzerDriver(const std::vecto
   Options.UseTraces = Flags.use_traces;
   Options.UseMemcmp = Flags.use_memcmp;
   Options.ShuffleAtStartUp = Flags.shuffle;
-  Options.PreferSmallDuringInitialShuffle =
-      Flags.prefer_small_during_initial_shuffle;
+  Options.PreferSmall = Flags.prefer_small;
   Options.Reload = Flags.reload;
   Options.OnlyASCII = Flags.only_ascii;
   Options.OutputCSV = Flags.output_csv;

Modified: llvm/trunk/lib/Fuzzer/FuzzerFlags.def
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/FuzzerFlags.def?rev=263769&r1=263768&r2=263769&view=diff
==============================================================================
--- llvm/trunk/lib/Fuzzer/FuzzerFlags.def (original)
+++ llvm/trunk/lib/Fuzzer/FuzzerFlags.def Thu Mar 17 19:23:29 2016
@@ -21,10 +21,8 @@ FUZZER_FLAG_INT(cross_over, 1, "If 1, cr
 FUZZER_FLAG_INT(mutate_depth, 5,
             "Apply this number of consecutive mutations to each input.")
 FUZZER_FLAG_INT(shuffle, 1, "Shuffle inputs at startup")
-FUZZER_FLAG_INT(
-    prefer_small_during_initial_shuffle, -1,
-    "If 1, always prefer smaller inputs during the initial corpus shuffle."
-    " If 0, never do that. If -1, do it sometimes.")
+FUZZER_FLAG_INT(prefer_small, 1,
+    "If 1, always prefer smaller inputs during the corpus shuffle.")
 FUZZER_FLAG_INT(
     timeout, 1200,
     "Timeout in seconds (if positive). "

Modified: llvm/trunk/lib/Fuzzer/FuzzerInternal.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/FuzzerInternal.h?rev=263769&r1=263768&r2=263769&view=diff
==============================================================================
--- llvm/trunk/lib/Fuzzer/FuzzerInternal.h (original)
+++ llvm/trunk/lib/Fuzzer/FuzzerInternal.h Thu Mar 17 19:23:29 2016
@@ -29,6 +29,7 @@
 namespace fuzzer {
 using namespace std::chrono;
 typedef std::vector<uint8_t> Unit;
+typedef std::vector<Unit> UnitVector;
 
 // A simple POD sized array of bytes.
 template <size_t kMaxSize> class FixedWord {
@@ -288,7 +289,7 @@ public:
     bool UseFullCoverageSet = false;
     bool Reload = true;
     bool ShuffleAtStartUp = true;
-    int PreferSmallDuringInitialShuffle = -1;
+    bool PreferSmall = true;
     size_t MaxNumberOfRuns = ULONG_MAX;
     int ReportSlowUnits = 10;
     bool OnlyASCII = false;
@@ -342,6 +343,8 @@ public:
 
   // Merge Corpora[1:] into Corpora[0].
   void Merge(const std::vector<std::string> &Corpora);
+  // Returns a subset of 'Extra' that adds coverage to 'Initial'.
+  UnitVector FindExtraUnits(const UnitVector &Initial, const UnitVector &Extra);
   MutationDispatcher &GetMD() { return MD; }
   void PrintFinalStats();
   void SetMaxLen(size_t MaxLen);
@@ -359,6 +362,8 @@ private:
   void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix);
   void PrintStats(const char *Where, const char *End = "\n");
   void PrintStatusForNewUnit(const Unit &U);
+  void ShuffleCorpus(UnitVector *V);
+
   // Updates the probability distribution for the units in the corpus.
   // Must be called whenever the corpus or unit weights are changed.
   void UpdateCorpusDistribution();
@@ -367,6 +372,7 @@ private:
   size_t RecordCallerCalleeCoverage();
   void PrepareCoverageBeforeRun();
   bool CheckCoverageAfterRun();
+  void ResetCoverage();
 
   // Trace-based fuzzing: we run a unit with some kind of tracing
   // enabled and record potentially useful mutations. Then

Modified: llvm/trunk/lib/Fuzzer/FuzzerLoop.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/FuzzerLoop.cpp?rev=263769&r1=263768&r2=263769&view=diff
==============================================================================
--- llvm/trunk/lib/Fuzzer/FuzzerLoop.cpp (original)
+++ llvm/trunk/lib/Fuzzer/FuzzerLoop.cpp Thu Mar 17 19:23:29 2016
@@ -243,34 +243,25 @@ void Fuzzer::RereadOutputCorpus(size_t M
   }
 }
 
+void Fuzzer::ShuffleCorpus(UnitVector *V) {
+  std::random_shuffle(V->begin(), V->end(), MD.GetRand());
+  if (Options.PreferSmall)
+    std::stable_sort(V->begin(), V->end(), [](const Unit &A, const Unit &B) {
+      return A.size() < B.size();
+    });
+}
+
 void Fuzzer::ShuffleAndMinimize() {
-  bool PreferSmall = (Options.PreferSmallDuringInitialShuffle == 1 ||
-                      (Options.PreferSmallDuringInitialShuffle == -1 &&
-                       MD.GetRand().RandBool()));
-  if (Options.Verbosity)
-    Printf("INFO: PreferSmall: %d\n", PreferSmall);
   PrintStats("READ  ");
   std::vector<Unit> NewCorpus;
-  if (Options.ShuffleAtStartUp) {
-    std::random_shuffle(Corpus.begin(), Corpus.end(), MD.GetRand());
-    if (PreferSmall)
-      std::stable_sort(
-          Corpus.begin(), Corpus.end(),
-          [](const Unit &A, const Unit &B) { return A.size() < B.size(); });
-  }
-  Unit U;
-  for (const auto &C : Corpus) {
-    for (size_t First = 0; First < 1; First++) {
-      U.clear();
-      size_t Last = std::min(First + Options.MaxLen, C.size());
-      U.insert(U.begin(), C.begin() + First, C.begin() + Last);
-      if (Options.OnlyASCII)
-        ToASCII(U.data(), U.size());
-      if (RunOne(U)) {
-        NewCorpus.push_back(U);
-        if (Options.Verbosity >= 2)
-          Printf("NEW0: %zd L %zd\n", LastRecordedBlockCoverage, U.size());
-      }
+  if (Options.ShuffleAtStartUp)
+    ShuffleCorpus(&Corpus);
+
+  for (const auto &U : Corpus) {
+    if (RunOne(U)) {
+      NewCorpus.push_back(U);
+      if (Options.Verbosity >= 2)
+        Printf("NEW0: %zd L %zd\n", LastRecordedBlockCoverage, U.size());
     }
   }
   Corpus = NewCorpus;
@@ -438,37 +429,65 @@ void Fuzzer::ReportNewCoverage(const Uni
   NumberOfNewUnitsAdded++;
 }
 
+// Finds minimal number of units in 'Extra' that add coverage to 'Initial'.
+// We do it by actually executing the units, sometimes more than once,
+// because we may be using different coverage-like signals and the only
+// common thing between them is that we can say "this unit found new stuff".
+UnitVector Fuzzer::FindExtraUnits(const UnitVector &Initial,
+                                  const UnitVector &Extra) {
+  UnitVector Res = Extra;
+  size_t OldSize = Res.size();
+  for (int Iter = 0; Iter < 10; Iter++) {
+    ShuffleCorpus(&Res);
+    ResetCoverage();
+
+    for (auto &U : Initial)
+      RunOne(U);
+
+    Corpus.clear();
+    for (auto &U : Res)
+      if (RunOne(U))
+        Corpus.push_back(U);
+
+    char Stat[7] = "MIN   ";
+    Stat[3] = '0' + Iter;
+    PrintStats(Stat);
+
+    size_t NewSize = Corpus.size();
+    Res.swap(Corpus);
+
+    if (NewSize == OldSize)
+      break;
+    OldSize = NewSize;
+  }
+  return Res;
+}
+
 void Fuzzer::Merge(const std::vector<std::string> &Corpora) {
   if (Corpora.size() <= 1) {
     Printf("Merge requires two or more corpus dirs\n");
     return;
   }
-  auto InitialCorpusDir = Corpora[0];
-  assert(Options.MaxLen > 0);
-  ReadDir(InitialCorpusDir, nullptr, Options.MaxLen);
-  Printf("Merge: running the initial corpus '%s' of %d units\n",
-         InitialCorpusDir.c_str(), Corpus.size());
-  for (auto &U : Corpus)
-    RunOne(U);
-
   std::vector<std::string> ExtraCorpora(Corpora.begin() + 1, Corpora.end());
 
-  size_t NumTried = 0;
-  size_t NumMerged = 0;
-  for (auto &C : ExtraCorpora) {
-    Corpus.clear();
-    ReadDir(C, nullptr, Options.MaxLen);
-    Printf("Merge: merging the extra corpus '%s' of %zd units\n", C.c_str(),
-           Corpus.size());
-    for (auto &U : Corpus) {
-      NumTried++;
-      if (RunOne(U)) {
-        WriteToOutputCorpus(U);
-        NumMerged++;
-      }
-    }
+  assert(Options.MaxLen > 0);
+  UnitVector Initial, Extra;
+  ReadDirToVectorOfUnits(Corpora[0].c_str(), &Initial, nullptr, Options.MaxLen);
+  for (auto &C : ExtraCorpora)
+    ReadDirToVectorOfUnits(C.c_str(), &Extra, nullptr, Options.MaxLen);
+
+  if (!Initial.empty()) {
+    Printf("=== Minimizing the initial corpus of %zd units\n", Initial.size());
+    Initial = FindExtraUnits({}, Initial);
   }
-  Printf("Merge: written %zd out of %zd units\n", NumMerged, NumTried);
+
+  Printf("=== Merging extra %zd units\n", Extra.size());
+  auto Res = FindExtraUnits(Initial, Extra);
+
+  for (auto &U: Res)
+    WriteToOutputCorpus(U);
+
+  Printf("=== Merge: written %zd units\n", Res.size());
 }
 
 void Fuzzer::MutateAndTestOne() {
@@ -507,6 +526,12 @@ size_t Fuzzer::ChooseUnitIdxToMutate() {
   return Idx;
 }
 
+void Fuzzer::ResetCoverage() {
+  CHECK_WEAK_API_FUNCTION(__sanitizer_reset_coverage);
+  __sanitizer_reset_coverage();
+  CounterBitmap.clear();
+}
+
 // Experimental search heuristic: drilling.
 // - Read, shuffle, execute and minimize the corpus.
 // - Choose one random unit.
@@ -521,8 +546,7 @@ void Fuzzer::Drill() {
 
   Unit U = ChooseUnitToMutate();
 
-  CHECK_WEAK_API_FUNCTION(__sanitizer_reset_coverage);
-  __sanitizer_reset_coverage();
+  ResetCoverage();
 
   std::vector<Unit> SavedCorpus;
   SavedCorpus.swap(Corpus);
@@ -535,7 +559,7 @@ void Fuzzer::Drill() {
   SavedOutputCorpusPath.swap(Options.OutputCorpus);
   Loop();
 
-  __sanitizer_reset_coverage();
+  ResetCoverage();
 
   PrintStats("REINIT");
   SavedOutputCorpusPath.swap(Options.OutputCorpus);

Modified: llvm/trunk/lib/Fuzzer/test/fuzzer-traces.test
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/test/fuzzer-traces.test?rev=263769&r1=263768&r2=263769&view=diff
==============================================================================
--- llvm/trunk/lib/Fuzzer/test/fuzzer-traces.test (original)
+++ llvm/trunk/lib/Fuzzer/test/fuzzer-traces.test Thu Mar 17 19:23:29 2016
@@ -7,19 +7,19 @@ RUN: not LLVMFuzzer-SimpleCmpTest -use_t
 RUN: not LLVMFuzzer-MemcmpTest               -seed=4294967295 -runs=100000   2>&1 | FileCheck %s
 RUN:     LLVMFuzzer-MemcmpTest -use_memcmp=0 -seed=4294967295 -runs=1000000  2>&1 | FileCheck %s --check-prefix=Done1000000
 
-RUN: not LLVMFuzzer-StrncmpTest               -seed=1 -runs=100000   2>&1 | FileCheck %s
-RUN:     LLVMFuzzer-StrncmpTest -use_memcmp=0 -seed=1 -runs=1000000  2>&1 | FileCheck %s --check-prefix=Done1000000
+RUN: not LLVMFuzzer-StrncmpTest               -seed=2 -runs=100000   2>&1 | FileCheck %s
+RUN:     LLVMFuzzer-StrncmpTest -use_memcmp=0 -seed=3 -runs=1000000  2>&1 | FileCheck %s --check-prefix=Done1000000
 
-RUN: not LLVMFuzzer-StrcmpTest               -seed=1 -runs=200000   2>&1 | FileCheck %s
-RUN:     LLVMFuzzer-StrcmpTest -use_memcmp=0 -seed=1 -runs=1000000  2>&1 | FileCheck %s --check-prefix=Done1000000
+RUN: not LLVMFuzzer-StrcmpTest               -seed=4 -runs=200000   2>&1 | FileCheck %s
+RUN:     LLVMFuzzer-StrcmpTest -use_memcmp=0 -seed=5 -runs=1000000  2>&1 | FileCheck %s --check-prefix=Done1000000
 
-RUN: not LLVMFuzzer-SwitchTest -use_traces=1 -seed=1 -runs=1000002  2>&1 | FileCheck %s
-RUN:     LLVMFuzzer-SwitchTest               -seed=1 -runs=1000000  2>&1 | FileCheck %s --check-prefix=Done1000000
+RUN: not LLVMFuzzer-SwitchTest -use_traces=1 -seed=6 -runs=1000002  2>&1 | FileCheck %s
+RUN:     LLVMFuzzer-SwitchTest               -seed=7 -runs=1000000  2>&1 | FileCheck %s --check-prefix=Done1000000
 
-RUN: not LLVMFuzzer-SimpleHashTest -use_traces=1 -seed=1 -runs=10000000  2>&1 | FileCheck %s
-RUN:     LLVMFuzzer-SimpleHashTest               -seed=1 -runs=10000000 2>&1 | FileCheck %s --check-prefix=Done10000000
+RUN: not LLVMFuzzer-SimpleHashTest -use_traces=1 -seed=8 -runs=10000000  2>&1 | FileCheck %s
+RUN:     LLVMFuzzer-SimpleHashTest               -seed=9 -runs=10000000 2>&1 | FileCheck %s --check-prefix=Done10000000
 
-RUN: LLVMFuzzer-RepeatedMemcmp -seed=1 -runs=100000 2>&1 | FileCheck %s --check-prefix=RECOMMENDED_DICT
+RUN: LLVMFuzzer-RepeatedMemcmp -seed=10 -runs=100000 2>&1 | FileCheck %s --check-prefix=RECOMMENDED_DICT
 RECOMMENDED_DICT:###### Recommended dictionary. ######
 RECOMMENDED_DICT-DAG: "foo"
 RECOMMENDED_DICT-DAG: "bar"

Modified: llvm/trunk/lib/Fuzzer/test/merge.test
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/test/merge.test?rev=263769&r1=263768&r2=263769&view=diff
==============================================================================
--- llvm/trunk/lib/Fuzzer/test/merge.test (original)
+++ llvm/trunk/lib/Fuzzer/test/merge.test Thu Mar 17 19:23:29 2016
@@ -8,8 +8,8 @@ RUN: echo ..Z... > %tmp/T1/3
 
 # T1 has 3 elements, T2 is empty.
 RUN: LLVMFuzzer-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=CHECK1
-CHECK1: Merge: running the initial corpus {{.*}} of 3 units
-CHECK1: Merge: written 0 out of 0 units
+CHECK1: === Minimizing the initial corpus of 3 units
+CHECK1: === Merge: written 0 units
 
 RUN: echo ...Z.. > %tmp/T2/1
 RUN: echo ....E. > %tmp/T2/2
@@ -20,10 +20,11 @@ RUN: echo ..Z... > %tmp/T2/c
 
 # T1 has 3 elements, T2 has 6 elements, only 3 are new.
 RUN: LLVMFuzzer-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=CHECK2
-CHECK2: Merge: running the initial corpus {{.*}} of 3 units
-CHECK2: Merge: written 3 out of 6 units
+CHECK2: === Minimizing the initial corpus of 3 units
+CHECK2: === Merging extra 6 units
+CHECK2: === Merge: written 3 units
 
 # Now, T1 has 6 units and T2 has no new interesting units.
 RUN: LLVMFuzzer-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=CHECK3
-CHECK3: Merge: running the initial corpus {{.*}} of 6 units
-CHECK3: Merge: written 0 out of 6 units
+CHECK3: === Minimizing the initial corpus of 6 units
+CHECK3: === Merge: written 0 units




More information about the llvm-commits mailing list