[llvm] r261267 - [libFuzzer] only read MaxLen bytes from every file in the corpus to speedup loading the corpus

Kostya Serebryany via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 18 13:49:10 PST 2016


Author: kcc
Date: Thu Feb 18 15:49:10 2016
New Revision: 261267

URL: http://llvm.org/viewvc/llvm-project?rev=261267&view=rev
Log:
[libFuzzer] only read MaxLen bytes from every file in the corpus to speedup loading the corpus

Modified:
    llvm/trunk/lib/Fuzzer/FuzzerDriver.cpp
    llvm/trunk/lib/Fuzzer/FuzzerIO.cpp
    llvm/trunk/lib/Fuzzer/FuzzerInternal.h
    llvm/trunk/lib/Fuzzer/FuzzerLoop.cpp

Modified: llvm/trunk/lib/Fuzzer/FuzzerDriver.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/FuzzerDriver.cpp?rev=261267&r1=261266&r2=261267&view=diff
==============================================================================
--- llvm/trunk/lib/Fuzzer/FuzzerDriver.cpp (original)
+++ llvm/trunk/lib/Fuzzer/FuzzerDriver.cpp Thu Feb 18 15:49:10 2016
@@ -356,7 +356,7 @@ static int FuzzerDriver(const std::vecto
   F.RereadOutputCorpus();
   for (auto &inp : *Inputs)
     if (inp != Options.OutputCorpus)
-      F.ReadDir(inp, nullptr);
+      F.ReadDir(inp, nullptr, Options.MaxLen);
 
   if (F.CorpusSize() == 0)
     F.AddToCorpus(Unit());  // Can't fuzz empty corpus, so add an empty input.

Modified: llvm/trunk/lib/Fuzzer/FuzzerIO.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/FuzzerIO.cpp?rev=261267&r1=261266&r2=261267&view=diff
==============================================================================
--- llvm/trunk/lib/Fuzzer/FuzzerIO.cpp (original)
+++ llvm/trunk/lib/Fuzzer/FuzzerIO.cpp Thu Feb 18 15:49:10 2016
@@ -55,12 +55,18 @@ static std::vector<std::string> ListFile
   return V;
 }
 
-Unit FileToVector(const std::string &Path) {
+Unit FileToVector(const std::string &Path, size_t MaxSize) {
   std::ifstream T(Path);
   if (!T) {
     Printf("No such directory: %s; exiting\n", Path.c_str());
     exit(1);
   }
+  if (MaxSize) {
+    Unit Res(MaxSize);
+    T.read(reinterpret_cast<char*>(Res.data()), MaxSize);
+    Res.resize(T.gcount());
+    return Res;
+  }
   return Unit((std::istreambuf_iterator<char>(T)),
               std::istreambuf_iterator<char>());
 }
@@ -84,16 +90,16 @@ void WriteToFile(const Unit &U, const st
 }
 
 void ReadDirToVectorOfUnits(const char *Path, std::vector<Unit> *V,
-                            long *Epoch) {
+                            long *Epoch, size_t MaxSize) {
   long E = Epoch ? *Epoch : 0;
   auto Files = ListFilesInDir(Path, Epoch);
   for (size_t i = 0; i < Files.size(); i++) {
     auto &X = Files[i];
     auto FilePath = DirPlusFile(Path, X);
     if (Epoch && GetEpoch(FilePath) < E) continue;
-    if ((i % 1000) == 0 && i)
+    if ((i & (i - 1)) == 0 && i >= 1024)
       Printf("Loaded %zd/%zd files from %s\n", i, Files.size(), Path);
-    V->push_back(FileToVector(FilePath));
+    V->push_back(FileToVector(FilePath, MaxSize));
   }
 }
 

Modified: llvm/trunk/lib/Fuzzer/FuzzerInternal.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/FuzzerInternal.h?rev=261267&r1=261266&r2=261267&view=diff
==============================================================================
--- llvm/trunk/lib/Fuzzer/FuzzerInternal.h (original)
+++ llvm/trunk/lib/Fuzzer/FuzzerInternal.h Thu Feb 18 15:49:10 2016
@@ -65,9 +65,9 @@ typedef FixedWord<27> Word; // 28 bytes.
 
 bool IsFile(const std::string &Path);
 std::string FileToString(const std::string &Path);
-Unit FileToVector(const std::string &Path);
+Unit FileToVector(const std::string &Path, size_t MaxSize = 0);
 void ReadDirToVectorOfUnits(const char *Path, std::vector<Unit> *V,
-                            long *Epoch);
+                            long *Epoch, size_t MaxSize);
 void WriteToFile(const Unit &U, const std::string &Path);
 void CopyFileToErr(const std::string &Path);
 // Returns "Dir/FileName" or equivalent for the current OS.
@@ -302,9 +302,9 @@ public:
   void InitializeTraceState();
   void AssignTaintLabels(uint8_t *Data, size_t Size);
   size_t CorpusSize() const { return Corpus.size(); }
-  void ReadDir(const std::string &Path, long *Epoch) {
+  void ReadDir(const std::string &Path, long *Epoch, size_t MaxSize) {
     Printf("Loading corpus: %s\n", Path.c_str());
-    ReadDirToVectorOfUnits(Path.c_str(), &Corpus, Epoch);
+    ReadDirToVectorOfUnits(Path.c_str(), &Corpus, Epoch, MaxSize);
   }
   void RereadOutputCorpus();
   // Save the current corpus to OutputCorpus.

Modified: llvm/trunk/lib/Fuzzer/FuzzerLoop.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/FuzzerLoop.cpp?rev=261267&r1=261266&r2=261267&view=diff
==============================================================================
--- llvm/trunk/lib/Fuzzer/FuzzerLoop.cpp (original)
+++ llvm/trunk/lib/Fuzzer/FuzzerLoop.cpp Thu Feb 18 15:49:10 2016
@@ -166,7 +166,7 @@ void Fuzzer::RereadOutputCorpus() {
     return;
   std::vector<Unit> AdditionalCorpus;
   ReadDirToVectorOfUnits(Options.OutputCorpus.c_str(), &AdditionalCorpus,
-                         &EpochOfLastReadOfOutputCorpus);
+                         &EpochOfLastReadOfOutputCorpus, Options.MaxLen);
   if (Corpus.empty()) {
     Corpus = AdditionalCorpus;
     return;
@@ -384,7 +384,7 @@ void Fuzzer::Merge(const std::vector<std
     return;
   }
   auto InitialCorpusDir = Corpora[0];
-  ReadDir(InitialCorpusDir, nullptr);
+  ReadDir(InitialCorpusDir, nullptr, Options.MaxLen);
   Printf("Merge: running the initial corpus '%s' of %d units\n",
          InitialCorpusDir.c_str(), Corpus.size());
   for (auto &U : Corpus)
@@ -396,7 +396,7 @@ void Fuzzer::Merge(const std::vector<std
   size_t NumMerged = 0;
   for (auto &C : ExtraCorpora) {
     Corpus.clear();
-    ReadDir(C, nullptr);
+    ReadDir(C, nullptr, Options.MaxLen);
     Printf("Merge: merging the extra corpus '%s' of %zd units\n", C.c_str(),
            Corpus.size());
     for (auto &U : Corpus) {




More information about the llvm-commits mailing list