[compiler-rt] a30dbbe - Redistribute energy for Corpus

Matt Morehouse via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 8 09:23:07 PDT 2021


Author: gtt1995
Date: 2021-09-08T09:22:45-07:00
New Revision: a30dbbe9241fa72580638eb08468d3e6a39cf2b0

URL: https://github.com/llvm/llvm-project/commit/a30dbbe9241fa72580638eb08468d3e6a39cf2b0
DIFF: https://github.com/llvm/llvm-project/commit/a30dbbe9241fa72580638eb08468d3e6a39cf2b0.diff

LOG: Redistribute energy for Corpus

I found that the initial corpus allocation of fork mode has certain defects.
I designed a new initial corpus allocation strategy based on size grouping.
This method can give more energy to the small seeds in the corpus and
increase the throughput of the test.

Fuzzbench data (glibfuzzer is -fork_corpus_groups=1):
https://www.fuzzbench.com/reports/experimental/2021-08-05-parallel/index.html

Reviewed By: morehouse

Differential Revision: https://reviews.llvm.org/D105084

Added: 
    compiler-rt/test/fuzzer/fork_corpus_groups.test

Modified: 
    compiler-rt/lib/fuzzer/FuzzerDriver.cpp
    compiler-rt/lib/fuzzer/FuzzerFlags.def
    compiler-rt/lib/fuzzer/FuzzerFork.cpp
    compiler-rt/lib/fuzzer/FuzzerOptions.h

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp
index c1a7e1b4dabc9..6b007f2ad45c6 100644
--- a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp
@@ -870,6 +870,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
     exit(0);
   }
 
+  Options.ForkCorpusGroups = Flags.fork_corpus_groups;
   if (Flags.fork)
     FuzzWithFork(F->GetMD().GetRand(), Options, Args, *Inputs, Flags.fork);
 

diff  --git a/compiler-rt/lib/fuzzer/FuzzerFlags.def b/compiler-rt/lib/fuzzer/FuzzerFlags.def
index d0cf0e904d8b6..11815349b0149 100644
--- a/compiler-rt/lib/fuzzer/FuzzerFlags.def
+++ b/compiler-rt/lib/fuzzer/FuzzerFlags.def
@@ -58,6 +58,10 @@ FUZZER_FLAG_INT(max_total_time, 0, "If positive, indicates the maximal total "
 FUZZER_FLAG_INT(help, 0, "Print help.")
 FUZZER_FLAG_INT(fork, 0, "Experimental mode where fuzzing happens "
                 "in a subprocess")
+FUZZER_FLAG_INT(fork_corpus_groups, 0, "For fork mode, enable the corpus-group "
+		"strategy, The main corpus will be grouped according to size, "
+		"and each sub-process will randomly select seeds from 
diff erent "
+		"groups as the sub-corpus.")
 FUZZER_FLAG_INT(ignore_timeouts, 1, "Ignore timeouts in fork mode")
 FUZZER_FLAG_INT(ignore_ooms, 1, "Ignore OOMs in fork mode")
 FUZZER_FLAG_INT(ignore_crashes, 0, "Ignore crashes in fork mode")

diff  --git a/compiler-rt/lib/fuzzer/FuzzerFork.cpp b/compiler-rt/lib/fuzzer/FuzzerFork.cpp
index 8771ab3613190..eb68b72f47483 100644
--- a/compiler-rt/lib/fuzzer/FuzzerFork.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerFork.cpp
@@ -95,9 +95,12 @@ struct GlobalEnv {
   std::set<uint32_t> Features, Cov;
   std::set<std::string> FilesWithDFT;
   std::vector<std::string> Files;
+  std::vector<std::size_t> FilesSizes;
   Random *Rand;
   std::chrono::system_clock::time_point ProcessStartTime;
   int Verbosity = 0;
+  int Group = 0;
+  int NumCorpuses = 8;
 
   size_t NumTimeouts = 0;
   size_t NumOOMs = 0;
@@ -136,10 +139,24 @@ struct GlobalEnv {
     if (size_t CorpusSubsetSize =
             std::min(Files.size(), (size_t)sqrt(Files.size() + 2))) {
       auto Time1 = std::chrono::system_clock::now();
-      for (size_t i = 0; i < CorpusSubsetSize; i++) {
-        auto &SF = Files[Rand->SkewTowardsLast(Files.size())];
-        Seeds += (Seeds.empty() ? "" : ",") + SF;
-        CollectDFT(SF);
+      if (Group) { // whether to group the corpus.
+        size_t AverageCorpusSize = Files.size() / NumCorpuses + 1;
+        size_t StartIndex = ((JobId - 1) % NumCorpuses) * AverageCorpusSize;
+        for (size_t i = 0; i < CorpusSubsetSize; i++) {
+          size_t RandNum = (*Rand)(AverageCorpusSize);
+          size_t Index = RandNum + StartIndex;
+          Index = Index < Files.size() ? Index
+                                       : Rand->SkewTowardsLast(Files.size());
+          auto &SF = Files[Index];
+          Seeds += (Seeds.empty() ? "" : ",") + SF;
+          CollectDFT(SF);
+        }
+      } else {
+        for (size_t i = 0; i < CorpusSubsetSize; i++) {
+          auto &SF = Files[Rand->SkewTowardsLast(Files.size())];
+          Seeds += (Seeds.empty() ? "" : ",") + SF;
+          CollectDFT(SF);
+        }
       }
       auto Time2 = std::chrono::system_clock::now();
       auto DftTimeInSeconds = duration_cast<seconds>(Time2 - Time1).count();
@@ -222,7 +239,16 @@ struct GlobalEnv {
       auto U = FileToVector(Path);
       auto NewPath = DirPlusFile(MainCorpusDir, Hash(U));
       WriteToFile(U, NewPath);
-      Files.push_back(NewPath);
+      if (Group) { // Insert the queue according to the size of the seed.
+        size_t UnitSize = U.size();
+        auto Idx =
+            std::upper_bound(FilesSizes.begin(), FilesSizes.end(), UnitSize) -
+            FilesSizes.begin();
+        FilesSizes.insert(FilesSizes.begin() + Idx, UnitSize);
+        Files.insert(Files.begin() + Idx, NewPath);
+      } else {
+        Files.push_back(NewPath);
+      }
     }
     Features.insert(NewFeatures.begin(), NewFeatures.end());
     Cov.insert(NewCov.begin(), NewCov.end());
@@ -231,10 +257,8 @@ struct GlobalEnv {
         if (TPC.PcIsFuncEntry(TE))
           PrintPC("  NEW_FUNC: %p %F %L\n", "",
                   TPC.GetNextInstructionPc(TE->PC));
-
   }
 
-
   void CollectDFT(const std::string &InputPath) {
     if (DataFlowBinary.empty()) return;
     if (!FilesWithDFT.insert(InputPath).second) return;
@@ -297,6 +321,7 @@ void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
   Env.Verbosity = Options.Verbosity;
   Env.ProcessStartTime = std::chrono::system_clock::now();
   Env.DataFlowBinary = Options.CollectDataFlow;
+  Env.Group = Options.ForkCorpusGroups;
 
   std::vector<SizedFile> SeedFiles;
   for (auto &Dir : CorpusDirs)
@@ -327,6 +352,12 @@ void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
     Env.Cov.insert(NewFeatures.begin(), NewFeatures.end());
     RemoveFile(CFPath);
   }
+
+  if (Env.Group) {
+    for (auto &path : Env.Files)
+      Env.FilesSizes.push_back(FileSize(path));
+  }
+
   Printf("INFO: -fork=%d: %zd seed inputs, starting to fuzz in %s\n", NumJobs,
          Env.Files.size(), Env.TempDir.c_str());
 
@@ -341,6 +372,8 @@ void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
     WriteToFile(Unit({1}), Env.StopFile());
   };
 
+  size_t MergeCycle = 20;
+  size_t JobExecuted = 0;
   size_t JobId = 1;
   std::vector<std::thread> Threads;
   for (int t = 0; t < NumJobs; t++) {
@@ -362,6 +395,45 @@ void FuzzWithFork(Random &Rand, const FuzzingOptions &Options,
 
     Env.RunOneMergeJob(Job.get());
 
+    // merge the corpus .
+    JobExecuted++;
+    if (Env.Group && JobExecuted >= MergeCycle) {
+      std::vector<SizedFile> CurrentSeedFiles;
+      for (auto &Dir : CorpusDirs)
+        GetSizedFilesFromDir(Dir, &CurrentSeedFiles);
+      std::sort(CurrentSeedFiles.begin(), CurrentSeedFiles.end());
+
+      auto CFPath = DirPlusFile(Env.TempDir, "merge.txt");
+      std::set<uint32_t> TmpNewFeatures, TmpNewCov;
+      std::set<uint32_t> TmpFeatures, TmpCov;
+      Env.Files.clear();
+      Env.FilesSizes.clear();
+      CrashResistantMerge(Env.Args, {}, CurrentSeedFiles, &Env.Files,
+                          TmpFeatures, &TmpNewFeatures, TmpCov, &TmpNewCov,
+                          CFPath, false);
+      for (auto &path : Env.Files)
+        Env.FilesSizes.push_back(FileSize(path));
+      RemoveFile(CFPath);
+      JobExecuted = 0;
+      MergeCycle += 5;
+    }
+
+    // Since the number of corpus seeds will gradually increase, in order to
+    // control the number in each group to be about three times the number of
+    // seeds selected each time, the number of groups is dynamically adjusted.
+    if (Env.Files.size() < 2000)
+      Env.NumCorpuses = 12;
+    else if (Env.Files.size() < 6000)
+      Env.NumCorpuses = 20;
+    else if (Env.Files.size() < 12000)
+      Env.NumCorpuses = 32;
+    else if (Env.Files.size() < 16000)
+      Env.NumCorpuses = 40;
+    else if (Env.Files.size() < 24000)
+      Env.NumCorpuses = 60;
+    else
+      Env.NumCorpuses = 80;
+
     // Continue if our crash is one of the ignored ones.
     if (Options.IgnoreTimeouts && ExitCode == Options.TimeoutExitCode)
       Env.NumTimeouts++;

diff  --git a/compiler-rt/lib/fuzzer/FuzzerOptions.h b/compiler-rt/lib/fuzzer/FuzzerOptions.h
index d0c285a6821d9..72e2561061945 100644
--- a/compiler-rt/lib/fuzzer/FuzzerOptions.h
+++ b/compiler-rt/lib/fuzzer/FuzzerOptions.h
@@ -47,6 +47,7 @@ struct FuzzingOptions {
   int ReportSlowUnits = 10;
   bool OnlyASCII = false;
   bool Entropic = true;
+  bool ForkCorpusGroups = false;
   size_t EntropicFeatureFrequencyThreshold = 0xFF;
   size_t EntropicNumberOfRarestFeatures = 100;
   bool EntropicScalePerExecTime = false;

diff  --git a/compiler-rt/test/fuzzer/fork_corpus_groups.test b/compiler-rt/test/fuzzer/fork_corpus_groups.test
new file mode 100644
index 0000000000000..1ccc8e4490b09
--- /dev/null
+++ b/compiler-rt/test/fuzzer/fork_corpus_groups.test
@@ -0,0 +1,21 @@
+# UNSUPPORTED: darwin, freebsd, aarch64
+BINGO: BINGO
+RUN: %cpp_compiler %S/SimpleTest.cpp -o %t-SimpleTest
+RUN: not %run %t-SimpleTest -fork=1 -fork_corpus_groups=1 2>&1 | FileCheck %s --check-prefix=BINGO
+
+TIMEOUT: ERROR: libFuzzer: timeout
+RUN: %cpp_compiler %S/TimeoutTest.cpp -o %t-TimeoutTest
+RUN: not %run %t-TimeoutTest -fork=1 -fork_corpus_groups=1 -timeout=1 -ignore_timeouts=0 2>&1 | FileCheck %s --check-prefix=TIMEOUT
+
+OOM: ERROR: libFuzzer: out-of-memory
+RUN: %cpp_compiler %S/OutOfMemoryTest.cpp -o %t-OutOfMemoryTest
+RUN: not %run %t-OutOfMemoryTest -fork=1 -fork_corpus_groups=1 -ignore_ooms=0  -rss_limit_mb=128 2>&1 | FileCheck %s --check-prefix=OOM
+
+# access-violation is the error thrown on Windows. Address will be smaller on i386.
+CRASH: {{SEGV|access-violation}} on unknown address 0x00000000
+RUN: %cpp_compiler %S/ShallowOOMDeepCrash.cpp -o %t-ShallowOOMDeepCrash
+RUN: not %run %t-ShallowOOMDeepCrash -fork=1 -fork_corpus_groups=1 -rss_limit_mb=128 2>&1 | FileCheck %s --check-prefix=CRASH
+
+MAX_TOTAL_TIME: INFO: fuzzed for {{.*}} seconds, wrapping up soon
+MAX_TOTAL_TIME: INFO: exiting: {{.*}} time:
+RUN: not %run %t-ShallowOOMDeepCrash -fork=1 -fork_corpus_groups=1 -rss_limit_mb=128 -ignore_crashes=1 -max_total_time=10 2>&1 | FileCheck %s  --check-prefix=MAX_TOTAL_TIME


        


More information about the llvm-commits mailing list