[compiler-rt] r312033 - [libFUzzer] change the way we load the seed corpora: instead of loading all files and these executing all files, load and execute them one-by-one. This should reduce the memory usage in many cases
Kostya Serebryany via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 29 13:51:25 PDT 2017
Author: kcc
Date: Tue Aug 29 13:51:24 2017
New Revision: 312033
URL: http://llvm.org/viewvc/llvm-project?rev=312033&view=rev
Log:
[libFUzzer] change the way we load the seed corpora: instead of loading all files and these executing all files, load and execute them one-by-one. This should reduce the memory usage in many cases
Modified:
compiler-rt/trunk/lib/fuzzer/FuzzerIO.h
compiler-rt/trunk/lib/fuzzer/FuzzerIOPosix.cpp
compiler-rt/trunk/lib/fuzzer/FuzzerInternal.h
compiler-rt/trunk/lib/fuzzer/FuzzerLoop.cpp
compiler-rt/trunk/test/fuzzer/fuzzer-dirs.test
compiler-rt/trunk/test/fuzzer/reduce_inputs.test
Modified: compiler-rt/trunk/lib/fuzzer/FuzzerIO.h
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/fuzzer/FuzzerIO.h?rev=312033&r1=312032&r2=312033&view=diff
==============================================================================
--- compiler-rt/trunk/lib/fuzzer/FuzzerIO.h (original)
+++ compiler-rt/trunk/lib/fuzzer/FuzzerIO.h Tue Aug 29 13:51:24 2017
@@ -53,6 +53,7 @@ void RawPrint(const char *Str);
// Platform specific functions:
bool IsFile(const std::string &Path);
+size_t FileSize(const std::string &Path);
void ListFilesInDirRecursive(const std::string &Dir, long *Epoch,
Vector<std::string> *V, bool TopDir);
Modified: compiler-rt/trunk/lib/fuzzer/FuzzerIOPosix.cpp
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/fuzzer/FuzzerIOPosix.cpp?rev=312033&r1=312032&r2=312033&view=diff
==============================================================================
--- compiler-rt/trunk/lib/fuzzer/FuzzerIOPosix.cpp (original)
+++ compiler-rt/trunk/lib/fuzzer/FuzzerIOPosix.cpp Tue Aug 29 13:51:24 2017
@@ -32,6 +32,13 @@ bool IsFile(const std::string &Path) {
return S_ISREG(St.st_mode);
}
+size_t FileSize(const std::string &Path) {
+ struct stat St;
+ if (stat(Path.c_str(), &St))
+ return 0;
+ return St.st_size;
+}
+
void ListFilesInDirRecursive(const std::string &Dir, long *Epoch,
Vector<std::string> *V, bool TopDir) {
auto E = GetEpoch(Dir);
Modified: compiler-rt/trunk/lib/fuzzer/FuzzerInternal.h
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/fuzzer/FuzzerInternal.h?rev=312033&r1=312032&r2=312033&view=diff
==============================================================================
--- compiler-rt/trunk/lib/fuzzer/FuzzerInternal.h (original)
+++ compiler-rt/trunk/lib/fuzzer/FuzzerInternal.h Tue Aug 29 13:51:24 2017
@@ -38,7 +38,6 @@ public:
void Loop(const Vector<std::string> &CorpusDirs);
void ReadAndExecuteSeedCorpora(const Vector<std::string> &CorpusDirs);
void MinimizeCrashLoop(const Unit &U);
- void ShuffleAndMinimize(UnitVector *V);
void RereadOutputCorpus(size_t MaxSize);
size_t secondsSinceProcessStartUp() {
@@ -103,7 +102,6 @@ private:
void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix);
void PrintStats(const char *Where, const char *End = "\n", size_t Units = 0);
void PrintStatusForNewUnit(const Unit &U, const char *Text);
- void ShuffleCorpus(UnitVector *V);
void CheckExitOnSrcPosOrItem();
static void StaticDeathCallback();
Modified: compiler-rt/trunk/lib/fuzzer/FuzzerLoop.cpp
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/fuzzer/FuzzerLoop.cpp?rev=312033&r1=312032&r2=312033&view=diff
==============================================================================
--- compiler-rt/trunk/lib/fuzzer/FuzzerLoop.cpp (original)
+++ compiler-rt/trunk/lib/fuzzer/FuzzerLoop.cpp Tue Aug 29 13:51:24 2017
@@ -371,39 +371,6 @@ void Fuzzer::RereadOutputCorpus(size_t M
PrintStats("RELOAD");
}
-void Fuzzer::ShuffleCorpus(UnitVector *V) {
- std::shuffle(V->begin(), V->end(), MD.GetRand());
- if (Options.PreferSmall)
- std::stable_sort(V->begin(), V->end(), [](const Unit &A, const Unit &B) {
- return A.size() < B.size();
- });
-}
-
-void Fuzzer::ShuffleAndMinimize(UnitVector *InitialCorpus) {
- Printf("#0\tREAD units: %zd; rss: %zdMb\n", InitialCorpus->size(),
- GetPeakRSSMb());
- if (Options.ShuffleAtStartUp)
- ShuffleCorpus(InitialCorpus);
-
- // Test the callback with empty input and never try it again.
- uint8_t dummy;
- ExecuteCallback(&dummy, 0);
-
- for (auto &U : *InitialCorpus) {
- RunOne(U.data(), U.size());
- CheckExitOnSrcPosOrItem();
- TryDetectingAMemoryLeak(U.data(), U.size(),
- /*DuringInitialCorpusExecution*/ true);
- U.clear();
- }
- PrintStats("INITED");
- if (Corpus.empty()) {
- Printf("ERROR: no interesting inputs were found. "
- "Is the code instrumented for coverage? Exiting.\n");
- exit(1);
- }
-}
-
void Fuzzer::PrintPulseAndReportSlowInput(const uint8_t *Data, size_t Size) {
auto TimeOfUnit =
duration_cast<seconds>(UnitStopTime - UnitStartTime).count();
@@ -628,26 +595,68 @@ void Fuzzer::MutateAndTestOne() {
void Fuzzer::ReadAndExecuteSeedCorpora(const Vector<std::string> &CorpusDirs) {
const size_t kMaxSaneLen = 1 << 20;
const size_t kMinDefaultLen = 4096;
- size_t TemporaryMaxLen = Options.MaxLen ? Options.MaxLen : kMaxSaneLen;
- UnitVector InitialCorpus;
- for (auto &Inp : CorpusDirs) {
- Printf("Loading corpus dir: %s\n", Inp.c_str());
- ReadDirToVectorOfUnits(Inp.c_str(), &InitialCorpus, nullptr,
- TemporaryMaxLen, /*ExitOnError=*/false);
- }
- if (Options.MaxLen == 0) {
- size_t MaxLen = 0;
- for (auto &U : InitialCorpus)
- MaxLen = std::max(U.size(), MaxLen);
- SetMaxInputLen(std::min(std::max(kMinDefaultLen, MaxLen), kMaxSaneLen));
+ struct SizedFile {
+ std::string File;
+ size_t Size;
+ };
+ Vector<SizedFile> SizedFiles;
+ size_t MaxSize = 0;
+ size_t MinSize = -1;
+ size_t TotalSize = 0;
+ for (auto &Dir : CorpusDirs) {
+ Vector<std::string> Files;
+ ListFilesInDirRecursive(Dir, 0, &Files, /*TopDir*/true);
+ Printf("INFO: % 8zd files found in %s\n", Files.size(), Dir.c_str());
+ for (auto &File : Files) {
+ if (size_t Size = FileSize(File)) {
+ MaxSize = Max(Size, MaxSize);
+ MinSize = Min(Size, MinSize);
+ TotalSize += Size;
+ SizedFiles.push_back({File, Size});
+ }
+ }
+ }
+ if (Options.MaxLen == 0)
+ SetMaxInputLen(std::min(std::max(kMinDefaultLen, MaxSize), kMaxSaneLen));
+ assert(MaxInputLen > 0);
+
+ if (SizedFiles.empty()) {
+ Printf("INFO: A corpus is not provided, starting from an empty corpus\n");
+ Unit U({'\n'}); // Valid ASCII input.
+ RunOne(U.data(), U.size());
+ } else {
+ Printf("INFO: seed corpus: files: %zd min: %zdb max: %zdb total: %zdb"
+ " rss: %zdMb\n",
+ SizedFiles.size(), MinSize, MaxSize, TotalSize, GetPeakRSSMb());
+ if (Options.ShuffleAtStartUp)
+ std::shuffle(SizedFiles.begin(), SizedFiles.end(), MD.GetRand());
+
+ if (Options.PreferSmall)
+ std::stable_sort(
+ SizedFiles.begin(), SizedFiles.end(),
+ [](const SizedFile &A, const SizedFile &B) { return A.Size < B.Size; });
+
+ // Load and execute inputs one by one.
+ for (auto &SF : SizedFiles) {
+ auto U = FileToVector(SF.File, MaxInputLen);
+ assert(U.size() <= MaxInputLen);
+ RunOne(U.data(), U.size());
+ CheckExitOnSrcPosOrItem();
+ TryDetectingAMemoryLeak(U.data(), U.size(),
+ /*DuringInitialCorpusExecution*/ true);
+ }
}
- if (InitialCorpus.empty()) {
- InitialCorpus.push_back(Unit({'\n'})); // Valid ASCII input.
- if (Options.Verbosity)
- Printf("INFO: A corpus is not provided, starting from an empty corpus\n");
+ // Test the callback with empty input and never try it again.
+ uint8_t dummy;
+ ExecuteCallback(&dummy, 0);
+
+ PrintStats("INITED");
+ if (Corpus.empty()) {
+ Printf("ERROR: no interesting inputs were found. "
+ "Is the code instrumented for coverage? Exiting.\n");
+ exit(1);
}
- ShuffleAndMinimize(&InitialCorpus);
}
void Fuzzer::Loop(const Vector<std::string> &CorpusDirs) {
Modified: compiler-rt/trunk/test/fuzzer/fuzzer-dirs.test
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/test/fuzzer/fuzzer-dirs.test?rev=312033&r1=312032&r2=312033&view=diff
==============================================================================
--- compiler-rt/trunk/test/fuzzer/fuzzer-dirs.test (original)
+++ compiler-rt/trunk/test/fuzzer/fuzzer-dirs.test Tue Aug 29 13:51:24 2017
@@ -6,7 +6,7 @@ RUN: echo a > %t/SUB1/a
RUN: echo b > %t/SUB1/SUB2/b
RUN: echo c > %t/SUB1/SUB2/SUB3/c
RUN: %t-SimpleTest %t/SUB1 -runs=0 2>&1 | FileCheck %s --check-prefix=SUBDIRS
-SUBDIRS: READ units: 3
+SUBDIRS: INFO: seed corpus: files: 3 min: 2b max: 2b total: 6b
RUN: echo -n zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz > %t/SUB1/f64
RUN: cat %t/SUB1/f64 %t/SUB1/f64 %t/SUB1/f64 %t/SUB1/f64 > %t/SUB1/f256
RUN: cat %t/SUB1/f256 %t/SUB1/f256 %t/SUB1/f256 %t/SUB1/f256 > %t/SUB1/f1024
Modified: compiler-rt/trunk/test/fuzzer/reduce_inputs.test
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/test/fuzzer/reduce_inputs.test?rev=312033&r1=312032&r2=312033&view=diff
==============================================================================
--- compiler-rt/trunk/test/fuzzer/reduce_inputs.test (original)
+++ compiler-rt/trunk/test/fuzzer/reduce_inputs.test Tue Aug 29 13:51:24 2017
@@ -9,7 +9,7 @@ CHECK: INFO: found item with checksum '0
# Test that reduce_inputs deletes redundant files in the corpus.
RUN: %t-ShrinkControlFlowSimpleTest -runs=0 %t/C 2>&1 | FileCheck %s --check-prefix=COUNT
-COUNT: READ units: 4
+COUNT: seed corpus: files: 4
# a bit longer test
RUN: %t-ShrinkControlFlowTest -exit_on_item=0eb8e4ed029b774d80f2b66408203801cb982a60 -seed=1 -runs=1000000 2>&1 | FileCheck %s
More information about the llvm-commits
mailing list