[compiler-rt] 62673c4 - [libFuzzer] Add an option to keep initial seed inputs around.

Dokyung Song via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 3 08:54:49 PDT 2020


Author: Dokyung Song
Date: 2020-09-03T15:54:39Z
New Revision: 62673c430de43837b0f177089ed184a0ffcd5678

URL: https://github.com/llvm/llvm-project/commit/62673c430de43837b0f177089ed184a0ffcd5678
DIFF: https://github.com/llvm/llvm-project/commit/62673c430de43837b0f177089ed184a0ffcd5678.diff

LOG: [libFuzzer] Add an option to keep initial seed inputs around.

This patch adds an option "keep_seed" to keep all initial seed inputs in the
corpus. Previously, only the initial seed inputs that find new coverage were
added to the corpus, and all the other initial inputs were discarded. We
observed in some circumstances that useful initial seed inputs are discarded as
they find no new coverage, even though they contain useful fragments in them
(e.g., SQLITE3 FuzzBench benchmark). This newly added option provides a way to
keeping seed inputs in the corpus for those circumstances. With this patch, and
with -keep_seed=1, all initial seed inputs are kept in the corpus regardless of
whether they find new coverage or not. Further, these seed inputs are not
replaced with smaller inputs even if -reduce_inputs=1.

Differential Revision: https://reviews.llvm.org/D86577

Added: 
    compiler-rt/test/fuzzer/KeepSeedTest.cpp
    compiler-rt/test/fuzzer/keep-seed.test

Modified: 
    compiler-rt/lib/fuzzer/FuzzerCorpus.h
    compiler-rt/lib/fuzzer/FuzzerDriver.cpp
    compiler-rt/lib/fuzzer/FuzzerFlags.def
    compiler-rt/lib/fuzzer/FuzzerFork.cpp
    compiler-rt/lib/fuzzer/FuzzerInternal.h
    compiler-rt/lib/fuzzer/FuzzerLoop.cpp
    compiler-rt/lib/fuzzer/FuzzerOptions.h
    compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/fuzzer/FuzzerCorpus.h b/compiler-rt/lib/fuzzer/FuzzerCorpus.h
index 54d1e09ec6df..fd8ff6ca427d 100644
--- a/compiler-rt/lib/fuzzer/FuzzerCorpus.h
+++ b/compiler-rt/lib/fuzzer/FuzzerCorpus.h
@@ -33,6 +33,7 @@ struct InputInfo {
   // Stats.
   size_t NumExecutedMutations = 0;
   size_t NumSuccessfullMutations = 0;
+  bool NeverReduce = false;
   bool MayDeleteFile = false;
   bool Reduced = false;
   bool HasFocusFunction = false;
@@ -177,7 +178,7 @@ class InputCorpus {
   bool empty() const { return Inputs.empty(); }
   const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; }
   InputInfo *AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile,
-                         bool HasFocusFunction,
+                         bool HasFocusFunction, bool NeverReduce,
                          const Vector<uint32_t> &FeatureSet,
                          const DataFlowTrace &DFT, const InputInfo *BaseII) {
     assert(!U.empty());
@@ -187,6 +188,7 @@ class InputCorpus {
     InputInfo &II = *Inputs.back();
     II.U = U;
     II.NumFeatures = NumFeatures;
+    II.NeverReduce = NeverReduce;
     II.MayDeleteFile = MayDeleteFile;
     II.UniqFeatureSet = FeatureSet;
     II.HasFocusFunction = HasFocusFunction;

diff  --git a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp
index 2615014a0215..a13fb03a7fc1 100644
--- a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp
@@ -671,6 +671,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
   Options.Verbosity = Flags.verbosity;
   Options.MaxLen = Flags.max_len;
   Options.LenControl = Flags.len_control;
+  Options.KeepSeed = Flags.keep_seed;
   Options.UnitTimeoutSec = Flags.timeout;
   Options.ErrorExitCode = Flags.error_exitcode;
   Options.TimeoutExitCode = Flags.timeout_exitcode;

diff  --git a/compiler-rt/lib/fuzzer/FuzzerFlags.def b/compiler-rt/lib/fuzzer/FuzzerFlags.def
index 811479146603..1dc805d4e539 100644
--- a/compiler-rt/lib/fuzzer/FuzzerFlags.def
+++ b/compiler-rt/lib/fuzzer/FuzzerFlags.def
@@ -23,6 +23,10 @@ FUZZER_FLAG_INT(len_control, 100, "Try generating small inputs first, "
 FUZZER_FLAG_STRING(seed_inputs, "A comma-separated list of input files "
   "to use as an additional seed corpus. Alternatively, an \"@\" followed by "
   "the name of a file containing the comma-separated list.")
+FUZZER_FLAG_INT(keep_seed, 0, "If 1, keep seed inputs in the corpus even if "
+  "they do not produce new coverage. When used with |reduce_inputs==1|, the "
+  "seed inputs will never be reduced. This option can be useful when seeds are"
+  "not properly formed for the fuzz target but still have useful snippets.")
 FUZZER_FLAG_INT(cross_over, 1, "If 1, cross over inputs.")
 FUZZER_FLAG_INT(mutate_depth, 5,
             "Apply this number of consecutive mutations to each input.")

diff  --git a/compiler-rt/lib/fuzzer/FuzzerFork.cpp b/compiler-rt/lib/fuzzer/FuzzerFork.cpp
index d9e6b79443e0..84725d22a9c7 100644
--- a/compiler-rt/lib/fuzzer/FuzzerFork.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerFork.cpp
@@ -309,11 +309,15 @@ void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
   else
     Env.MainCorpusDir = CorpusDirs[0];
 
-  auto CFPath = DirPlusFile(Env.TempDir, "merge.txt");
-  CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, {}, &Env.Features,
-                      {}, &Env.Cov,
-                      CFPath, false);
-  RemoveFile(CFPath);
+  if (Options.KeepSeed) {
+    for (auto &File : SeedFiles)
+      Env.Files.push_back(File.File);
+  } else {
+    auto CFPath = DirPlusFile(Env.TempDir, "merge.txt");
+    CrashResistantMerge(Env.Args, {}, SeedFiles, &Env.Files, {}, &Env.Features,
+                        {}, &Env.Cov, CFPath, false);
+    RemoveFile(CFPath);
+  }
   Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs,
          Env.Files.size(), Env.TempDir.c_str());
 

diff  --git a/compiler-rt/lib/fuzzer/FuzzerInternal.h b/compiler-rt/lib/fuzzer/FuzzerInternal.h
index 31096ce804bc..2b172d912227 100644
--- a/compiler-rt/lib/fuzzer/FuzzerInternal.h
+++ b/compiler-rt/lib/fuzzer/FuzzerInternal.h
@@ -67,7 +67,8 @@ class Fuzzer {
 
   void ExecuteCallback(const uint8_t *Data, size_t Size);
   bool RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile = false,
-              InputInfo *II = nullptr, bool *FoundUniqFeatures = nullptr);
+              InputInfo *II = nullptr, bool ForceAddToCorpus = false,
+              bool *FoundUniqFeatures = nullptr);
 
   // Merge Corpora[1:] into Corpora[0].
   void Merge(const Vector<std::string> &Corpora);

diff  --git a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp
index 02db6d27b0a3..d76075b0a81a 100644
--- a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp
@@ -464,7 +464,8 @@ static void RenameFeatureSetFile(const std::string &FeaturesDir,
 }
 
 bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile,
-                    InputInfo *II, bool *FoundUniqFeatures) {
+                    InputInfo *II, bool ForceAddToCorpus,
+                    bool *FoundUniqFeatures) {
   if (!Size)
     return false;
 
@@ -478,7 +479,7 @@ bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile,
       UniqFeatureSetTmp.push_back(Feature);
     if (Options.Entropic)
       Corpus.UpdateFeatureFrequency(II, Feature);
-    if (Options.ReduceInputs && II)
+    if (Options.ReduceInputs && II && !II->NeverReduce)
       if (std::binary_search(II->UniqFeatureSet.begin(),
                              II->UniqFeatureSet.end(), Feature))
         FoundUniqFeaturesOfII++;
@@ -487,11 +488,12 @@ bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile,
     *FoundUniqFeatures = FoundUniqFeaturesOfII;
   PrintPulseAndReportSlowInput(Data, Size);
   size_t NumNewFeatures = Corpus.NumFeatureUpdates() - NumUpdatesBefore;
-  if (NumNewFeatures) {
+  if (NumNewFeatures || ForceAddToCorpus) {
     TPC.UpdateObservedPCs();
-    auto NewII = Corpus.AddToCorpus({Data, Data + Size}, NumNewFeatures,
-                                    MayDeleteFile, TPC.ObservedFocusFunction(),
-                                    UniqFeatureSetTmp, DFT, II);
+    auto NewII =
+        Corpus.AddToCorpus({Data, Data + Size}, NumNewFeatures, MayDeleteFile,
+                           TPC.ObservedFocusFunction(), ForceAddToCorpus,
+                           UniqFeatureSetTmp, DFT, II);
     WriteFeatureSetToFile(Options.FeaturesDir, Sha1ToString(NewII->Sha1),
                           NewII->UniqFeatureSet);
     return true;
@@ -700,7 +702,7 @@ void Fuzzer::MutateAndTestOne() {
 
     bool FoundUniqFeatures = false;
     bool NewCov = RunOne(CurrentUnitData, Size, /*MayDeleteFile=*/true, &II,
-                         &FoundUniqFeatures);
+                         /*ForceAddToCorpus*/ false, &FoundUniqFeatures);
     TryDetectingAMemoryLeak(CurrentUnitData, Size,
                             /*DuringInitialCorpusExecution*/ false);
     if (NewCov) {
@@ -768,7 +770,9 @@ void Fuzzer::ReadAndExecuteSeedCorpora(Vector<SizedFile> &CorporaFiles) {
     for (auto &SF : CorporaFiles) {
       auto U = FileToVector(SF.File, MaxInputLen, /*ExitOnError=*/false);
       assert(U.size() <= MaxInputLen);
-      RunOne(U.data(), U.size());
+      RunOne(U.data(), U.size(), /*MayDeleteFile*/ false, /*II*/ nullptr,
+             /*ForceAddToCorpus*/ Options.KeepSeed,
+             /*FoundUniqFeatures*/ nullptr);
       CheckExitOnSrcPosOrItem();
       TryDetectingAMemoryLeak(U.data(), U.size(),
                               /*DuringInitialCorpusExecution*/ true);

diff  --git a/compiler-rt/lib/fuzzer/FuzzerOptions.h b/compiler-rt/lib/fuzzer/FuzzerOptions.h
index b75e7c7af709..26961973d50b 100644
--- a/compiler-rt/lib/fuzzer/FuzzerOptions.h
+++ b/compiler-rt/lib/fuzzer/FuzzerOptions.h
@@ -18,6 +18,7 @@ struct FuzzingOptions {
   int Verbosity = 1;
   size_t MaxLen = 0;
   size_t LenControl = 1000;
+  bool KeepSeed = false;
   int UnitTimeoutSec = 300;
   int TimeoutExitCode = 70;
   int OOMExitCode = 71;

diff  --git a/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp b/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp
index 0e9435ab8fcb..93b54f54c19a 100644
--- a/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp
+++ b/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp
@@ -597,8 +597,10 @@ TEST(Corpus, Distribution) {
   size_t N = 10;
   size_t TriesPerUnit = 1<<16;
   for (size_t i = 0; i < N; i++)
-    C->AddToCorpus(Unit{static_cast<uint8_t>(i)}, 1, false, false, {}, DFT,
-                   nullptr);
+    C->AddToCorpus(Unit{static_cast<uint8_t>(i)}, /*NumFeatures*/ 1,
+                   /*MayDeleteFile*/ false, /*HasFocusFunction*/ false,
+                   /*ForceAddToCorpus*/ false, /*FeatureSet*/ {}, DFT,
+                   /*BaseII*/ nullptr);
 
   Vector<size_t> Hist(N);
   for (size_t i = 0; i < N * TriesPerUnit; i++) {

diff  --git a/compiler-rt/test/fuzzer/KeepSeedTest.cpp b/compiler-rt/test/fuzzer/KeepSeedTest.cpp
new file mode 100644
index 000000000000..f343161abde5
--- /dev/null
+++ b/compiler-rt/test/fuzzer/KeepSeedTest.cpp
@@ -0,0 +1,37 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// Test whether the fuzzer can find "SELECT FROM WHERE", given a seed input
+// "SELECTxFROMxWHERE". Without -keep_seed=1, it takes longer time to trigger
+// find the desired string, because the seed input is more likely to be reduced
+// to a prefix of the given input first, losing useful fragments towards the end
+// of the seed input.
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+static volatile int Sink = 0;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size > 17)
+    return 0;
+
+  if (Size >= 6 && Data[0] == 'S' && Data[1] == 'E' && Data[2] == 'L' &&
+      Data[3] == 'E' && Data[4] == 'C' && Data[5] == 'T') {
+    if (Size >= 7 && Data[6] == ' ') {
+      if (Size >= 11 && Data[7] == 'F' && Data[8] == 'R' && Data[9] == 'O' &&
+          Data[10] == 'M') {
+        if (Size >= 12 && Data[11] == ' ') {
+          if (Size >= 17 && Data[12] == 'W' && Data[13] == 'H' &&
+              Data[14] == 'E' && Data[15] == 'R' && Data[16] == 'E') {
+            fprintf(stderr, "BINGO; Found the target, exiting.\n");
+            exit(1);
+          }
+        }
+      }
+    }
+  }
+  return 0;
+}

diff  --git a/compiler-rt/test/fuzzer/keep-seed.test b/compiler-rt/test/fuzzer/keep-seed.test
new file mode 100644
index 000000000000..29212ac7c177
--- /dev/null
+++ b/compiler-rt/test/fuzzer/keep-seed.test
@@ -0,0 +1,17 @@
+REQUIRES: linux, x86_64
+RUN: %cpp_compiler %S/KeepSeedTest.cpp -o %t-KeepSeedTest
+
+RUN: rm -rf %t-corpus
+RUN: mkdir %t-corpus
+RUN: echo -n SELECTxFROMxWHERE > %t-corpus/valid-fragments
+
+RUN: not %run %t-KeepSeedTest -keep_seed=1 -seed=1 -runs=2000000 %t-corpus 2>&1 | FileCheck %s
+CHECK: BINGO
+
+RUN: rm -rf %t-corpus-baseline
+RUN: mkdir %t-corpus-baseline
+RUN: echo -n SELECTxFROMxWHERE > %t-corpus-baseline/valid-fragments
+
+# The following checks whether without -keep_seed=1 libFuzzer does not find the
+# crashing input "SELECT FROM WHERE" even with 2x more runs.
+RUN: %run %t-KeepSeedTest -seed=1 -runs=4000000 %t-corpus-baseline -print_final_stats=1


        


More information about the llvm-commits mailing list