[llvm] r258473 - Use std::piecewise_constant_distribution instead of ad-hoc binary search.

Ivan Krasin via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 21 17:32:34 PST 2016


Author: krasin
Date: Thu Jan 21 19:32:34 2016
New Revision: 258473

URL: http://llvm.org/viewvc/llvm-project?rev=258473&view=rev
Log:
Use std::piecewise_constant_distribution instead of ad-hoc binary search.

Summary:
Fix the issue with the most recently discovered unit receiving much less attention.

Note: I had to change the seed for one test to make it pass. Alternatively,
the number of runs could be increased. I believe that the average time of
'foo' discovery is not increased, just seed=1 was particularly convenient
for the previous PRNG scheme used.

Reviewers: aizatsky, kcc

Subscribers: llvm-commits, kcc

Differential Revision: http://reviews.llvm.org/D16419

Modified:
    llvm/trunk/lib/Fuzzer/FuzzerInterface.h
    llvm/trunk/lib/Fuzzer/FuzzerInternal.h
    llvm/trunk/lib/Fuzzer/FuzzerLoop.cpp
    llvm/trunk/lib/Fuzzer/test/FuzzerUnittest.cpp

Modified: llvm/trunk/lib/Fuzzer/FuzzerInterface.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/FuzzerInterface.h?rev=258473&r1=258472&r2=258473&view=diff
==============================================================================
--- llvm/trunk/lib/Fuzzer/FuzzerInterface.h (original)
+++ llvm/trunk/lib/Fuzzer/FuzzerInterface.h Thu Jan 21 19:32:34 2016
@@ -66,6 +66,18 @@ class FuzzerRandomBase {
   // Return a random number in range [0,n).
   size_t operator()(size_t n) { return n ? Rand() % n : 0; }
   bool RandBool() { return Rand() % 2; }
+
+  // The methods below is to satisfy UniformRandomNumberGenerator:
+  // http://en.cppreference.com/w/cpp/concept/UniformRandomNumberGenerator\
+
+  // Returns a random number between 0 and RAND_MAX inclusive.
+  double operator()() { return operator()(RAND_MAX); }
+
+  // Returns the smallest value that operator() may return.
+  double min() { return 0; }
+
+  // Returns the largest value that operator() may return.
+  double max() { return RAND_MAX; }
 };
 
 // Using libc's stand/rand.

Modified: llvm/trunk/lib/Fuzzer/FuzzerInternal.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/FuzzerInternal.h?rev=258473&r1=258472&r2=258473&view=diff
==============================================================================
--- llvm/trunk/lib/Fuzzer/FuzzerInternal.h (original)
+++ llvm/trunk/lib/Fuzzer/FuzzerInternal.h Thu Jan 21 19:32:34 2016
@@ -17,6 +17,7 @@
 #include <chrono>
 #include <cstddef>
 #include <cstdlib>
+#include <random>
 #include <string>
 #include <string.h>
 #include <vector>
@@ -200,7 +201,7 @@ class Fuzzer {
     bool PrintNewCovPcs = false;
   };
   Fuzzer(UserSuppliedFuzzer &USF, FuzzingOptions Options);
-  void AddToCorpus(const Unit &U) { Corpus.push_back(U); }
+  void AddToCorpus(const Unit &U) { Corpus.push_back(U); UpdateCorpusDistribution(); }
   size_t ChooseUnitIdxToMutate();
   const Unit &ChooseUnitToMutate() { return Corpus[ChooseUnitIdxToMutate()]; };
   void Loop();
@@ -241,6 +242,9 @@ class Fuzzer {
   void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix);
   void PrintStats(const char *Where, const char *End = "\n");
   void PrintStatusForNewUnit(const Unit &U);
+  // Updates the probability distribution for the units in the corpus.
+  // Must be called whenever the corpus or unit weights are changed.
+  void UpdateCorpusDistribution();
 
   void SyncCorpus();
 
@@ -280,6 +284,7 @@ class Fuzzer {
     return Res;
   }
 
+  std::piecewise_constant_distribution<double> CorpusDistribution;
   UserSuppliedFuzzer &USF;
   FuzzingOptions Options;
   system_clock::time_point ProcessStartTime = system_clock::now();

Modified: llvm/trunk/lib/Fuzzer/FuzzerLoop.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/FuzzerLoop.cpp?rev=258473&r1=258472&r2=258473&view=diff
==============================================================================
--- llvm/trunk/lib/Fuzzer/FuzzerLoop.cpp (original)
+++ llvm/trunk/lib/Fuzzer/FuzzerLoop.cpp Thu Jan 21 19:32:34 2016
@@ -163,6 +163,7 @@ void Fuzzer::RereadOutputCorpus() {
     if (UnitHashesAddedToCorpus.insert(Hash(X)).second) {
       if (RunOne(X)) {
         Corpus.push_back(X);
+        UpdateCorpusDistribution();
         PrintStats("RELOAD");
       }
     }
@@ -200,6 +201,7 @@ void Fuzzer::ShuffleAndMinimize() {
     }
   }
   Corpus = NewCorpus;
+  UpdateCorpusDistribution();
   for (auto &X : Corpus)
     UnitHashesAddedToCorpus.insert(Hash(X));
   PrintStats("INITED");
@@ -347,6 +349,7 @@ void Fuzzer::PrintStatusForNewUnit(const
 
 void Fuzzer::ReportNewCoverage(const Unit &U) {
   Corpus.push_back(U);
+  UpdateCorpusDistribution();
   UnitHashesAddedToCorpus.insert(Hash(U));
   USF.GetMD().RecordSuccessfulMutationSequence();
   PrintStatusForNewUnit(U);
@@ -409,22 +412,11 @@ void Fuzzer::MutateAndTestOne() {
 
 // Returns an index of random unit from the corpus to mutate.
 // Hypothesis: units added to the corpus last are more likely to be interesting.
-// This function gives more wieght to the more recent units.
+// This function gives more weight to the more recent units.
 size_t Fuzzer::ChooseUnitIdxToMutate() {
-    size_t N = Corpus.size();
-    size_t Total = (N + 1) * N / 2;
-    size_t R = USF.GetRand()(Total);
-    size_t IdxBeg = 0, IdxEnd = N;
-    // Binary search.
-    while (IdxEnd - IdxBeg >= 2) {
-      size_t Idx = IdxBeg + (IdxEnd - IdxBeg) / 2;
-      if (R > (Idx + 1) * Idx / 2)
-        IdxBeg = Idx;
-      else
-        IdxEnd = Idx;
-    }
-    assert(IdxBeg < N);
-    return IdxBeg;
+    size_t Idx = static_cast<size_t>(CorpusDistribution(USF.GetRand()));
+    assert(Idx < Corpus.size());
+    return Idx;
 }
 
 // Experimental search heuristic: drilling.
@@ -447,6 +439,7 @@ void Fuzzer::Drill() {
   std::vector<Unit> SavedCorpus;
   SavedCorpus.swap(Corpus);
   Corpus.push_back(U);
+  UpdateCorpusDistribution();
   assert(Corpus.size() == 1);
   RunOne(U);
   PrintStats("DRILL ");
@@ -510,4 +503,14 @@ void Fuzzer::SyncCorpus() {
   ExecuteCommand(Options.SyncCommand + " " + Options.OutputCorpus);
 }
 
+void Fuzzer::UpdateCorpusDistribution() {
+  size_t N = Corpus.size();
+  std::vector<double> Intervals(N+1);
+  std::vector<double> Weights(N);
+  std::iota(Intervals.begin(), Intervals.end(), 0);
+  std::iota(Weights.begin(), Weights.end(), 1);
+  CorpusDistribution = std::piecewise_constant_distribution<double>(
+      Intervals.begin(), Intervals.end(), Weights.begin());
+}
+
 }  // namespace fuzzer

Modified: llvm/trunk/lib/Fuzzer/test/FuzzerUnittest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/test/FuzzerUnittest.cpp?rev=258473&r1=258472&r2=258473&view=diff
==============================================================================
--- llvm/trunk/lib/Fuzzer/test/FuzzerUnittest.cpp (original)
+++ llvm/trunk/lib/Fuzzer/test/FuzzerUnittest.cpp Thu Jan 21 19:32:34 2016
@@ -6,7 +6,7 @@ using namespace fuzzer;
 
 // For now, have LLVMFuzzerTestOneInput just to make it link.
 // Later we may want to make unittests that actually call LLVMFuzzerTestOneInput.
-extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
   abort();
 }
 
@@ -400,3 +400,23 @@ TEST(FuzzerUtil, Base64) {
   EXPECT_EQ("YWJjeHk=", Base64({'a', 'b', 'c', 'x', 'y'}));
   EXPECT_EQ("YWJjeHl6", Base64({'a', 'b', 'c', 'x', 'y', 'z'}));
 }
+
+TEST(Corpus, Distribution) {
+  FuzzerRandomLibc Rand(0);
+  SimpleUserSuppliedFuzzer USF(&Rand, LLVMFuzzerTestOneInput);
+  Fuzzer::FuzzingOptions Options;
+  Fuzzer Fuzz(USF, Options);
+  size_t N = 10;
+  size_t TriesPerUnit = 1<<20;
+  for (size_t i = 0; i < N; i++) {
+    Fuzz.AddToCorpus(Unit{ static_cast<uint8_t>(i) });
+  }
+  std::vector<size_t> Hist(N);
+  for (size_t i = 0; i < N * TriesPerUnit; i++) {
+    Hist[Fuzz.ChooseUnitIdxToMutate()]++;
+  }
+  for (size_t i = 0; i < N; i++) {
+    // A weak sanity check that every unit gets invoked.
+    EXPECT_GT(Hist[i], TriesPerUnit / N / 3);
+  }
+}




More information about the llvm-commits mailing list