[llvm] r258571 - Use std::piecewise_constant_distribution instead of ad-hoc binary search.

Ivan Krasin via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 22 14:28:27 PST 2016


Author: krasin
Date: Fri Jan 22 16:28:27 2016
New Revision: 258571

URL: http://llvm.org/viewvc/llvm-project?rev=258571&view=rev
Log:
Use std::piecewise_constant_distribution instead of ad-hoc binary search.

Summary:
Fix the issue with the most recently discovered unit receiving much less attention.

Note: this is the second attempt (prev: r258473). Now, libc++ build is fixed.

Reviewers: aizatsky, kcc

Subscribers: llvm-commits

Differential Revision: http://reviews.llvm.org/D16487

Modified:
    llvm/trunk/lib/Fuzzer/FuzzerInternal.h
    llvm/trunk/lib/Fuzzer/FuzzerLoop.cpp
    llvm/trunk/lib/Fuzzer/test/FuzzerUnittest.cpp

Modified: llvm/trunk/lib/Fuzzer/FuzzerInternal.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/FuzzerInternal.h?rev=258571&r1=258570&r2=258571&view=diff
==============================================================================
--- llvm/trunk/lib/Fuzzer/FuzzerInternal.h (original)
+++ llvm/trunk/lib/Fuzzer/FuzzerInternal.h Fri Jan 22 16:28:27 2016
@@ -13,14 +13,15 @@
 #define LLVM_FUZZER_INTERNAL_H
 
 #include <cassert>
-#include <climits>
 #include <chrono>
+#include <climits>
 #include <cstddef>
 #include <cstdlib>
-#include <string>
+#include <random>
 #include <string.h>
-#include <vector>
+#include <string>
 #include <unordered_set>
+#include <vector>
 
 #include "FuzzerInterface.h"
 
@@ -29,10 +30,8 @@ using namespace std::chrono;
 typedef std::vector<uint8_t> Unit;
 
 // A simple POD sized array of bytes.
-template<size_t kMaxSize>
-class FixedWord {
- public:
-
+template <size_t kMaxSize> class FixedWord {
+public:
   FixedWord() : Size(0) {}
   FixedWord(const uint8_t *B, uint8_t S) { Set(B, S); }
 
@@ -42,12 +41,13 @@ class FixedWord {
     Size = S;
   }
 
-  bool operator == (const FixedWord<kMaxSize> &w) const {
+  bool operator==(const FixedWord<kMaxSize> &w) const {
     return Size == w.Size && 0 == memcmp(Data, w.Data, Size);
   }
 
-  bool operator < (const FixedWord<kMaxSize> &w) const {
-    if (Size != w.Size) return Size < w.Size;
+  bool operator<(const FixedWord<kMaxSize> &w) const {
+    if (Size != w.Size)
+      return Size < w.Size;
     return memcmp(Data, w.Data, Size) < 0;
   }
 
@@ -55,12 +55,12 @@ class FixedWord {
   const uint8_t *data() const { return Data; }
   uint8_t size() const { return Size; }
 
- private:
+private:
   uint8_t Size;
   uint8_t Data[kMaxSize];
 };
 
-typedef FixedWord<27> Word;  // 28 bytes.
+typedef FixedWord<27> Word; // 28 bytes.
 
 std::string FileToString(const std::string &Path);
 Unit FileToVector(const std::string &Path);
@@ -108,7 +108,7 @@ bool ParseOneDictionaryEntry(const std::
 bool ParseDictionaryFile(const std::string &Text, std::vector<Unit> *Units);
 
 class MutationDispatcher {
- public:
+public:
   MutationDispatcher(FuzzerRandomBase &Rand);
   ~MutationDispatcher();
   /// Indicate that we are about to start a new sequence of mutations.
@@ -162,27 +162,27 @@ class MutationDispatcher {
 
   void SetCorpus(const std::vector<Unit> *Corpus);
 
- private:
+private:
   FuzzerRandomBase &Rand;
   struct Impl;
   Impl *MDImpl;
 };
 
 class Fuzzer {
- public:
+public:
   struct FuzzingOptions {
     int Verbosity = 1;
     int MaxLen = 0;
     int UnitTimeoutSec = 300;
     int MaxTotalTimeSec = 0;
     bool DoCrossOver = true;
-    int  MutateDepth = 5;
+    int MutateDepth = 5;
     bool ExitOnFirst = false;
     bool UseCounters = false;
     bool UseIndirCalls = true;
     bool UseTraces = false;
     bool UseMemcmp = true;
-    bool UseFullCoverageSet  = false;
+    bool UseFullCoverageSet = false;
     bool Reload = true;
     bool ShuffleAtStartUp = true;
     int PreferSmallDuringInitialShuffle = -1;
@@ -195,12 +195,15 @@ class Fuzzer {
     std::string ArtifactPrefix = "./";
     std::string ExactArtifactPath;
     bool SaveArtifacts = true;
-    bool PrintNEW = true;  // Print a status line when new units are found;
+    bool PrintNEW = true; // Print a status line when new units are found;
     bool OutputCSV = false;
     bool PrintNewCovPcs = false;
   };
   Fuzzer(UserSuppliedFuzzer &USF, FuzzingOptions Options);
-  void AddToCorpus(const Unit &U) { Corpus.push_back(U); }
+  void AddToCorpus(const Unit &U) {
+    Corpus.push_back(U);
+    UpdateCorpusDistribution();
+  }
   size_t ChooseUnitIdxToMutate();
   const Unit &ChooseUnitToMutate() { return Corpus[ChooseUnitIdxToMutate()]; };
   void Loop();
@@ -231,7 +234,7 @@ class Fuzzer {
   // Merge Corpora[1:] into Corpora[0].
   void Merge(const std::vector<std::string> &Corpora);
 
- private:
+private:
   void AlarmCallback();
   void MutateAndTestOne();
   void ReportNewCoverage(const Unit &U);
@@ -241,6 +244,9 @@ class Fuzzer {
   void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix);
   void PrintStats(const char *Where, const char *End = "\n");
   void PrintStatusForNewUnit(const Unit &U);
+  // Updates the probability distribution for the units in the corpus.
+  // Must be called whenever the corpus or unit weights are changed.
+  void UpdateCorpusDistribution();
 
   void SyncCorpus();
 
@@ -249,7 +255,6 @@ class Fuzzer {
   void PrepareCoverageBeforeRun();
   bool CheckCoverageAfterRun();
 
-
   // Trace-based fuzzing: we run a unit with some kind of tracing
   // enabled and record potentially useful mutations. Then
   // We apply these mutations one by one to the unit and run it again.
@@ -274,12 +279,20 @@ class Fuzzer {
 
   // For UseCounters
   std::vector<uint8_t> CounterBitmap;
-  size_t TotalBits() {  // Slow. Call it only for printing stats.
+  size_t TotalBits() { // Slow. Call it only for printing stats.
     size_t Res = 0;
-    for (auto x : CounterBitmap) Res += __builtin_popcount(x);
+    for (auto x : CounterBitmap)
+      Res += __builtin_popcount(x);
     return Res;
   }
 
+  // TODO(krasin): remove GetRand from UserSuppliedFuzzer,
+  // and fully rely on the generator and the seed.
+  // The user supplied fuzzer will have a way to access the
+  // generator for its own purposes (like seeding the custom
+  // PRNG).
+  std::mt19937 Generator;
+  std::piecewise_constant_distribution<double> CorpusDistribution;
   UserSuppliedFuzzer &USF;
   FuzzingOptions Options;
   system_clock::time_point ProcessStartTime = system_clock::now();
@@ -292,8 +305,8 @@ class Fuzzer {
   size_t LastCoveragePcBufferLen = 0;
 };
 
-class SimpleUserSuppliedFuzzer: public UserSuppliedFuzzer {
- public:
+class SimpleUserSuppliedFuzzer : public UserSuppliedFuzzer {
+public:
   SimpleUserSuppliedFuzzer(FuzzerRandomBase *Rand, UserCallback Callback)
       : UserSuppliedFuzzer(Rand), Callback(Callback) {}
 
@@ -301,10 +314,10 @@ class SimpleUserSuppliedFuzzer: public U
     return Callback(Data, Size);
   }
 
- private:
+private:
   UserCallback Callback = nullptr;
 };
 
-};  // namespace fuzzer
+}; // namespace fuzzer
 
 #endif // LLVM_FUZZER_INTERNAL_H

Modified: llvm/trunk/lib/Fuzzer/FuzzerLoop.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/FuzzerLoop.cpp?rev=258571&r1=258570&r2=258571&view=diff
==============================================================================
--- llvm/trunk/lib/Fuzzer/FuzzerLoop.cpp (original)
+++ llvm/trunk/lib/Fuzzer/FuzzerLoop.cpp Fri Jan 22 16:28:27 2016
@@ -15,9 +15,9 @@
 #include <memory>
 
 #if defined(__has_include)
-# if __has_include(<sanitizer/coverage_interface.h>)
-#  include <sanitizer/coverage_interface.h>
-# endif
+#if __has_include(<sanitizer / coverage_interface.h>)
+#include <sanitizer/coverage_interface.h>
+#endif
 #endif
 
 extern "C" {
@@ -28,11 +28,11 @@ __attribute__((weak)) void __sanitizer_p
 __attribute__((weak)) void __sanitizer_reset_coverage();
 __attribute__((weak)) size_t __sanitizer_get_total_unique_caller_callee_pairs();
 __attribute__((weak)) size_t __sanitizer_get_total_unique_coverage();
-__attribute__((weak))
-void __sanitizer_set_death_callback(void (*callback)(void));
+__attribute__((weak)) void
+__sanitizer_set_death_callback(void (*callback)(void));
 __attribute__((weak)) size_t __sanitizer_get_number_of_counters();
-__attribute__((weak))
-uintptr_t __sanitizer_update_counter_bitset_and_clear_counters(uint8_t *bitset);
+__attribute__((weak)) uintptr_t
+__sanitizer_update_counter_bitset_and_clear_counters(uint8_t *bitset);
 __attribute__((weak)) uintptr_t
 __sanitizer_get_coverage_pc_buffer(uintptr_t **data);
 }
@@ -42,7 +42,8 @@ static const size_t kMaxUnitSizeToPrint
 
 static void MissingWeakApiFunction(const char *FnName) {
   Printf("ERROR: %s is not defined. Exiting.\n"
-         "Did you use -fsanitize-coverage=... to build your code?\n", FnName);
+         "Did you use -fsanitize-coverage=... to build your code?\n",
+         FnName);
   exit(1);
 }
 
@@ -56,7 +57,7 @@ static void MissingWeakApiFunction(const
 static Fuzzer *F;
 
 Fuzzer::Fuzzer(UserSuppliedFuzzer &USF, FuzzingOptions Options)
-    : USF(USF), Options(Options) {
+    : Generator(USF.GetRand().Rand()), USF(USF), Options(Options) {
   SetDeathCallback();
   InitializeTraceState();
   assert(!F);
@@ -92,7 +93,8 @@ void Fuzzer::AlarmCallback() {
   assert(Options.UnitTimeoutSec > 0);
   size_t Seconds =
       duration_cast<seconds>(system_clock::now() - UnitStartTime).count();
-  if (Seconds == 0) return;
+  if (Seconds == 0)
+    return;
   if (Options.Verbosity >= 2)
     Printf("AlarmCallback %zd\n", Seconds);
   if (Seconds >= (size_t)Options.UnitTimeoutSec) {
@@ -146,7 +148,8 @@ void Fuzzer::PrintStats(const char *Wher
 }
 
 void Fuzzer::RereadOutputCorpus() {
-  if (Options.OutputCorpus.empty()) return;
+  if (Options.OutputCorpus.empty())
+    return;
   std::vector<Unit> AdditionalCorpus;
   ReadDirToVectorOfUnits(Options.OutputCorpus.c_str(), &AdditionalCorpus,
                          &EpochOfLastReadOfOutputCorpus);
@@ -154,15 +157,17 @@ void Fuzzer::RereadOutputCorpus() {
     Corpus = AdditionalCorpus;
     return;
   }
-  if (!Options.Reload) return;
+  if (!Options.Reload)
+    return;
   if (Options.Verbosity >= 2)
-    Printf("Reload: read %zd new units.\n",  AdditionalCorpus.size());
+    Printf("Reload: read %zd new units.\n", AdditionalCorpus.size());
   for (auto &X : AdditionalCorpus) {
     if (X.size() > (size_t)Options.MaxLen)
       X.resize(Options.MaxLen);
     if (UnitHashesAddedToCorpus.insert(Hash(X)).second) {
       if (RunOne(X)) {
         Corpus.push_back(X);
+        UpdateCorpusDistribution();
         PrintStats("RELOAD");
       }
     }
@@ -200,6 +205,7 @@ void Fuzzer::ShuffleAndMinimize() {
     }
   }
   Corpus = NewCorpus;
+  UpdateCorpusDistribution();
   for (auto &X : Corpus)
     UnitHashesAddedToCorpus.insert(Hash(X));
   PrintStats("INITED");
@@ -304,7 +310,8 @@ bool Fuzzer::CheckCoverageAfterRun() {
 }
 
 void Fuzzer::WriteToOutputCorpus(const Unit &U) {
-  if (Options.OutputCorpus.empty()) return;
+  if (Options.OutputCorpus.empty())
+    return;
   std::string Path = DirPlusFile(Options.OutputCorpus, Hash(U));
   WriteToFile(U, Path);
   if (Options.Verbosity >= 2)
@@ -317,7 +324,7 @@ void Fuzzer::WriteUnitToFileWithPrefix(c
     return;
   std::string Path = Options.ArtifactPrefix + Prefix + Hash(U);
   if (!Options.ExactArtifactPath.empty())
-    Path = Options.ExactArtifactPath;  // Overrides ArtifactPrefix.
+    Path = Options.ExactArtifactPath; // Overrides ArtifactPrefix.
   WriteToFile(U, Path);
   Printf("artifact_prefix='%s'; Test unit written to %s\n",
          Options.ArtifactPrefix.c_str(), Path.c_str());
@@ -326,7 +333,8 @@ void Fuzzer::WriteUnitToFileWithPrefix(c
 }
 
 void Fuzzer::SaveCorpus() {
-  if (Options.OutputCorpus.empty()) return;
+  if (Options.OutputCorpus.empty())
+    return;
   for (const auto &U : Corpus)
     WriteToFile(U, DirPlusFile(Options.OutputCorpus, Hash(U)));
   if (Options.Verbosity)
@@ -347,6 +355,7 @@ void Fuzzer::PrintStatusForNewUnit(const
 
 void Fuzzer::ReportNewCoverage(const Unit &U) {
   Corpus.push_back(U);
+  UpdateCorpusDistribution();
   UnitHashesAddedToCorpus.insert(Hash(U));
   USF.GetMD().RecordSuccessfulMutationSequence();
   PrintStatusForNewUnit(U);
@@ -409,22 +418,11 @@ void Fuzzer::MutateAndTestOne() {
 
 // Returns an index of random unit from the corpus to mutate.
 // Hypothesis: units added to the corpus last are more likely to be interesting.
-// This function gives more wieght to the more recent units.
+// This function gives more weight to the more recent units.
 size_t Fuzzer::ChooseUnitIdxToMutate() {
-    size_t N = Corpus.size();
-    size_t Total = (N + 1) * N / 2;
-    size_t R = USF.GetRand()(Total);
-    size_t IdxBeg = 0, IdxEnd = N;
-    // Binary search.
-    while (IdxEnd - IdxBeg >= 2) {
-      size_t Idx = IdxBeg + (IdxEnd - IdxBeg) / 2;
-      if (R > (Idx + 1) * Idx / 2)
-        IdxBeg = Idx;
-      else
-        IdxEnd = Idx;
-    }
-    assert(IdxBeg < N);
-    return IdxBeg;
+  size_t Idx = static_cast<size_t>(CorpusDistribution(Generator));
+  assert(Idx < Corpus.size());
+  return Idx;
 }
 
 // Experimental search heuristic: drilling.
@@ -437,7 +435,7 @@ size_t Fuzzer::ChooseUnitIdxToMutate() {
 void Fuzzer::Drill() {
   // The corpus is already read, shuffled, and minimized.
   assert(!Corpus.empty());
-  Options.PrintNEW = false;  // Don't print NEW status lines when drilling.
+  Options.PrintNEW = false; // Don't print NEW status lines when drilling.
 
   Unit U = ChooseUnitToMutate();
 
@@ -447,6 +445,7 @@ void Fuzzer::Drill() {
   std::vector<Unit> SavedCorpus;
   SavedCorpus.swap(Corpus);
   Corpus.push_back(U);
+  UpdateCorpusDistribution();
   assert(Corpus.size() == 1);
   RunOne(U);
   PrintStats("DRILL ");
@@ -490,7 +489,7 @@ void Fuzzer::Loop() {
       break;
     if (Options.MaxTotalTimeSec > 0 &&
         secondsSinceProcessStartUp() >
-        static_cast<size_t>(Options.MaxTotalTimeSec))
+            static_cast<size_t>(Options.MaxTotalTimeSec))
       break;
     // Perform several mutations and runs.
     MutateAndTestOne();
@@ -501,7 +500,8 @@ void Fuzzer::Loop() {
 }
 
 void Fuzzer::SyncCorpus() {
-  if (Options.SyncCommand.empty() || Options.OutputCorpus.empty()) return;
+  if (Options.SyncCommand.empty() || Options.OutputCorpus.empty())
+    return;
   auto Now = system_clock::now();
   if (duration_cast<seconds>(Now - LastExternalSync).count() <
       Options.SyncTimeout)
@@ -510,4 +510,14 @@ void Fuzzer::SyncCorpus() {
   ExecuteCommand(Options.SyncCommand + " " + Options.OutputCorpus);
 }
 
-}  // namespace fuzzer
+void Fuzzer::UpdateCorpusDistribution() {
+  size_t N = Corpus.size();
+  std::vector<double> Intervals(N + 1);
+  std::vector<double> Weights(N);
+  std::iota(Intervals.begin(), Intervals.end(), 0);
+  std::iota(Weights.begin(), Weights.end(), 1);
+  CorpusDistribution = std::piecewise_constant_distribution<double>(
+      Intervals.begin(), Intervals.end(), Weights.begin());
+}
+
+} // namespace fuzzer

Modified: llvm/trunk/lib/Fuzzer/test/FuzzerUnittest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/test/FuzzerUnittest.cpp?rev=258571&r1=258570&r2=258571&view=diff
==============================================================================
--- llvm/trunk/lib/Fuzzer/test/FuzzerUnittest.cpp (original)
+++ llvm/trunk/lib/Fuzzer/test/FuzzerUnittest.cpp Fri Jan 22 16:28:27 2016
@@ -6,7 +6,7 @@ using namespace fuzzer;
 
 // For now, have LLVMFuzzerTestOneInput just to make it link.
 // Later we may want to make unittests that actually call LLVMFuzzerTestOneInput.
-extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
   abort();
 }
 
@@ -400,3 +400,23 @@ TEST(FuzzerUtil, Base64) {
   EXPECT_EQ("YWJjeHk=", Base64({'a', 'b', 'c', 'x', 'y'}));
   EXPECT_EQ("YWJjeHl6", Base64({'a', 'b', 'c', 'x', 'y', 'z'}));
 }
+
+TEST(Corpus, Distribution) {
+  FuzzerRandomLibc Rand(0);
+  SimpleUserSuppliedFuzzer USF(&Rand, LLVMFuzzerTestOneInput);
+  Fuzzer::FuzzingOptions Options;
+  Fuzzer Fuzz(USF, Options);
+  size_t N = 10;
+  size_t TriesPerUnit = 1<<20;
+  for (size_t i = 0; i < N; i++) {
+    Fuzz.AddToCorpus(Unit{ static_cast<uint8_t>(i) });
+  }
+  std::vector<size_t> Hist(N);
+  for (size_t i = 0; i < N * TriesPerUnit; i++) {
+    Hist[Fuzz.ChooseUnitIdxToMutate()]++;
+  }
+  for (size_t i = 0; i < N; i++) {
+    // A weak sanity check that every unit gets invoked.
+    EXPECT_GT(Hist[i], TriesPerUnit / N / 3);
+  }
+}




More information about the llvm-commits mailing list