[compiler-rt] r312033 - [libFUzzer] change the way we load the seed corpora: instead of loading all files and these executing all files, load and execute them one-by-one. This should reduce the memory usage in many cases

Kostya Serebryany via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 29 13:51:25 PDT 2017


Author: kcc
Date: Tue Aug 29 13:51:24 2017
New Revision: 312033

URL: http://llvm.org/viewvc/llvm-project?rev=312033&view=rev
Log:
[libFUzzer] change the way we load the seed corpora: instead of loading all files and these executing all files, load and execute them one-by-one. This should reduce the memory usage in many cases

Modified:
    compiler-rt/trunk/lib/fuzzer/FuzzerIO.h
    compiler-rt/trunk/lib/fuzzer/FuzzerIOPosix.cpp
    compiler-rt/trunk/lib/fuzzer/FuzzerInternal.h
    compiler-rt/trunk/lib/fuzzer/FuzzerLoop.cpp
    compiler-rt/trunk/test/fuzzer/fuzzer-dirs.test
    compiler-rt/trunk/test/fuzzer/reduce_inputs.test

Modified: compiler-rt/trunk/lib/fuzzer/FuzzerIO.h
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/fuzzer/FuzzerIO.h?rev=312033&r1=312032&r2=312033&view=diff
==============================================================================
--- compiler-rt/trunk/lib/fuzzer/FuzzerIO.h (original)
+++ compiler-rt/trunk/lib/fuzzer/FuzzerIO.h Tue Aug 29 13:51:24 2017
@@ -53,6 +53,7 @@ void RawPrint(const char *Str);
 
 // Platform specific functions:
 bool IsFile(const std::string &Path);
+size_t FileSize(const std::string &Path);
 
 void ListFilesInDirRecursive(const std::string &Dir, long *Epoch,
                              Vector<std::string> *V, bool TopDir);

Modified: compiler-rt/trunk/lib/fuzzer/FuzzerIOPosix.cpp
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/fuzzer/FuzzerIOPosix.cpp?rev=312033&r1=312032&r2=312033&view=diff
==============================================================================
--- compiler-rt/trunk/lib/fuzzer/FuzzerIOPosix.cpp (original)
+++ compiler-rt/trunk/lib/fuzzer/FuzzerIOPosix.cpp Tue Aug 29 13:51:24 2017
@@ -32,6 +32,13 @@ bool IsFile(const std::string &Path) {
   return S_ISREG(St.st_mode);
 }
 
+size_t FileSize(const std::string &Path) {
+  struct stat St;
+  if (stat(Path.c_str(), &St))
+    return 0;
+  return St.st_size;
+}
+
 void ListFilesInDirRecursive(const std::string &Dir, long *Epoch,
                              Vector<std::string> *V, bool TopDir) {
   auto E = GetEpoch(Dir);

Modified: compiler-rt/trunk/lib/fuzzer/FuzzerInternal.h
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/fuzzer/FuzzerInternal.h?rev=312033&r1=312032&r2=312033&view=diff
==============================================================================
--- compiler-rt/trunk/lib/fuzzer/FuzzerInternal.h (original)
+++ compiler-rt/trunk/lib/fuzzer/FuzzerInternal.h Tue Aug 29 13:51:24 2017
@@ -38,7 +38,6 @@ public:
   void Loop(const Vector<std::string> &CorpusDirs);
   void ReadAndExecuteSeedCorpora(const Vector<std::string> &CorpusDirs);
   void MinimizeCrashLoop(const Unit &U);
-  void ShuffleAndMinimize(UnitVector *V);
   void RereadOutputCorpus(size_t MaxSize);
 
   size_t secondsSinceProcessStartUp() {
@@ -103,7 +102,6 @@ private:
   void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix);
   void PrintStats(const char *Where, const char *End = "\n", size_t Units = 0);
   void PrintStatusForNewUnit(const Unit &U, const char *Text);
-  void ShuffleCorpus(UnitVector *V);
   void CheckExitOnSrcPosOrItem();
 
   static void StaticDeathCallback();

Modified: compiler-rt/trunk/lib/fuzzer/FuzzerLoop.cpp
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/fuzzer/FuzzerLoop.cpp?rev=312033&r1=312032&r2=312033&view=diff
==============================================================================
--- compiler-rt/trunk/lib/fuzzer/FuzzerLoop.cpp (original)
+++ compiler-rt/trunk/lib/fuzzer/FuzzerLoop.cpp Tue Aug 29 13:51:24 2017
@@ -371,39 +371,6 @@ void Fuzzer::RereadOutputCorpus(size_t M
     PrintStats("RELOAD");
 }
 
-void Fuzzer::ShuffleCorpus(UnitVector *V) {
-  std::shuffle(V->begin(), V->end(), MD.GetRand());
-  if (Options.PreferSmall)
-    std::stable_sort(V->begin(), V->end(), [](const Unit &A, const Unit &B) {
-      return A.size() < B.size();
-    });
-}
-
-void Fuzzer::ShuffleAndMinimize(UnitVector *InitialCorpus) {
-  Printf("#0\tREAD units: %zd; rss: %zdMb\n", InitialCorpus->size(),
-         GetPeakRSSMb());
-  if (Options.ShuffleAtStartUp)
-    ShuffleCorpus(InitialCorpus);
-
-  // Test the callback with empty input and never try it again.
-  uint8_t dummy;
-  ExecuteCallback(&dummy, 0);
-
-  for (auto &U : *InitialCorpus) {
-    RunOne(U.data(), U.size());
-    CheckExitOnSrcPosOrItem();
-    TryDetectingAMemoryLeak(U.data(), U.size(),
-                            /*DuringInitialCorpusExecution*/ true);
-    U.clear();
-  }
-  PrintStats("INITED");
-  if (Corpus.empty()) {
-    Printf("ERROR: no interesting inputs were found. "
-           "Is the code instrumented for coverage? Exiting.\n");
-    exit(1);
-  }
-}
-
 void Fuzzer::PrintPulseAndReportSlowInput(const uint8_t *Data, size_t Size) {
   auto TimeOfUnit =
       duration_cast<seconds>(UnitStopTime - UnitStartTime).count();
@@ -628,26 +595,68 @@ void Fuzzer::MutateAndTestOne() {
 void Fuzzer::ReadAndExecuteSeedCorpora(const Vector<std::string> &CorpusDirs) {
   const size_t kMaxSaneLen = 1 << 20;
   const size_t kMinDefaultLen = 4096;
-  size_t TemporaryMaxLen = Options.MaxLen ? Options.MaxLen : kMaxSaneLen;
-  UnitVector InitialCorpus;
-  for (auto &Inp : CorpusDirs) {
-    Printf("Loading corpus dir: %s\n", Inp.c_str());
-    ReadDirToVectorOfUnits(Inp.c_str(), &InitialCorpus, nullptr,
-                           TemporaryMaxLen, /*ExitOnError=*/false);
-  }
-  if (Options.MaxLen == 0) {
-    size_t MaxLen = 0;
-    for (auto &U : InitialCorpus)
-      MaxLen = std::max(U.size(), MaxLen);
-    SetMaxInputLen(std::min(std::max(kMinDefaultLen, MaxLen), kMaxSaneLen));
+  struct SizedFile {
+    std::string File;
+    size_t Size;
+  };
+  Vector<SizedFile> SizedFiles;
+  size_t MaxSize = 0;
+  size_t MinSize = -1;
+  size_t TotalSize = 0;
+  for (auto &Dir : CorpusDirs) {
+    Vector<std::string> Files;
+    ListFilesInDirRecursive(Dir, 0, &Files, /*TopDir*/true);
+    Printf("INFO: % 8zd files found in %s\n", Files.size(), Dir.c_str());
+    for (auto &File : Files) {
+      if (size_t Size = FileSize(File)) {
+        MaxSize = Max(Size, MaxSize);
+        MinSize = Min(Size, MinSize);
+        TotalSize += Size;
+        SizedFiles.push_back({File, Size});
+      }
+    }
+  }
+  if (Options.MaxLen == 0)
+    SetMaxInputLen(std::min(std::max(kMinDefaultLen, MaxSize), kMaxSaneLen));
+  assert(MaxInputLen > 0);
+
+  if (SizedFiles.empty()) {
+    Printf("INFO: A corpus is not provided, starting from an empty corpus\n");
+    Unit U({'\n'}); // Valid ASCII input.
+    RunOne(U.data(), U.size());
+  } else {
+    Printf("INFO: seed corpus: files: %zd min: %zdb max: %zdb total: %zdb"
+           " rss: %zdMb\n",
+           SizedFiles.size(), MinSize, MaxSize, TotalSize, GetPeakRSSMb());
+    if (Options.ShuffleAtStartUp)
+      std::shuffle(SizedFiles.begin(), SizedFiles.end(), MD.GetRand());
+
+    if (Options.PreferSmall)
+      std::stable_sort(
+          SizedFiles.begin(), SizedFiles.end(),
+          [](const SizedFile &A, const SizedFile &B) { return A.Size < B.Size; });
+
+    // Load and execute inputs one by one.
+    for (auto &SF : SizedFiles) {
+      auto U = FileToVector(SF.File, MaxInputLen);
+      assert(U.size() <= MaxInputLen);
+      RunOne(U.data(), U.size());
+      CheckExitOnSrcPosOrItem();
+      TryDetectingAMemoryLeak(U.data(), U.size(),
+                              /*DuringInitialCorpusExecution*/ true);
+    }
   }
 
-  if (InitialCorpus.empty()) {
-    InitialCorpus.push_back(Unit({'\n'}));  // Valid ASCII input.
-    if (Options.Verbosity)
-      Printf("INFO: A corpus is not provided, starting from an empty corpus\n");
+  // Test the callback with empty input and never try it again.
+  uint8_t dummy;
+  ExecuteCallback(&dummy, 0);
+
+  PrintStats("INITED");
+  if (Corpus.empty()) {
+    Printf("ERROR: no interesting inputs were found. "
+           "Is the code instrumented for coverage? Exiting.\n");
+    exit(1);
   }
-  ShuffleAndMinimize(&InitialCorpus);
 }
 
 void Fuzzer::Loop(const Vector<std::string> &CorpusDirs) {

Modified: compiler-rt/trunk/test/fuzzer/fuzzer-dirs.test
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/test/fuzzer/fuzzer-dirs.test?rev=312033&r1=312032&r2=312033&view=diff
==============================================================================
--- compiler-rt/trunk/test/fuzzer/fuzzer-dirs.test (original)
+++ compiler-rt/trunk/test/fuzzer/fuzzer-dirs.test Tue Aug 29 13:51:24 2017
@@ -6,7 +6,7 @@ RUN: echo a > %t/SUB1/a
 RUN: echo b > %t/SUB1/SUB2/b
 RUN: echo c > %t/SUB1/SUB2/SUB3/c
 RUN: %t-SimpleTest %t/SUB1 -runs=0 2>&1 | FileCheck %s --check-prefix=SUBDIRS
-SUBDIRS: READ   units: 3
+SUBDIRS: INFO: seed corpus: files: 3 min: 2b max: 2b total: 6b
 RUN: echo -n zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz > %t/SUB1/f64
 RUN: cat %t/SUB1/f64 %t/SUB1/f64 %t/SUB1/f64 %t/SUB1/f64 > %t/SUB1/f256
 RUN: cat %t/SUB1/f256 %t/SUB1/f256 %t/SUB1/f256 %t/SUB1/f256 > %t/SUB1/f1024

Modified: compiler-rt/trunk/test/fuzzer/reduce_inputs.test
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/test/fuzzer/reduce_inputs.test?rev=312033&r1=312032&r2=312033&view=diff
==============================================================================
--- compiler-rt/trunk/test/fuzzer/reduce_inputs.test (original)
+++ compiler-rt/trunk/test/fuzzer/reduce_inputs.test Tue Aug 29 13:51:24 2017
@@ -9,7 +9,7 @@ CHECK: INFO: found item with checksum '0
 
 # Test that reduce_inputs deletes redundant files in the corpus.
 RUN: %t-ShrinkControlFlowSimpleTest -runs=0 %t/C 2>&1 | FileCheck %s --check-prefix=COUNT
-COUNT: READ units: 4
+COUNT: seed corpus: files: 4
 
 # a bit longer test
 RUN: %t-ShrinkControlFlowTest  -exit_on_item=0eb8e4ed029b774d80f2b66408203801cb982a60  -seed=1 -runs=1000000  2>&1 | FileCheck %s




More information about the llvm-commits mailing list