[compiler-rt] 5cda4dc - [libFuzzer] Scale energy assigned to each input based on input execution time.

Dokyung Song via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 3 13:47:16 PDT 2020


Author: Dokyung Song
Date: 2020-09-03T20:38:20Z
New Revision: 5cda4dc7b4d28fcd11307d4234c513ff779a1c6f

URL: https://github.com/llvm/llvm-project/commit/5cda4dc7b4d28fcd11307d4234c513ff779a1c6f
DIFF: https://github.com/llvm/llvm-project/commit/5cda4dc7b4d28fcd11307d4234c513ff779a1c6f.diff

LOG: [libFuzzer] Scale energy assigned to each input based on input execution time.

This patch scales the energy computed by the Entropic schedule based on the
execution time of each input. The input execution time is compared with the
average execution time of inputs in the corpus, and, based on the amount by
which they differ, the energy is scaled from 0.1x (for inputs executing slow) to
3x (for inputs executing fast). Note that the exact scaling criteria and formula
is borrowed from AFL.

On FuzzBench, this gives a sizeable throughput increase, which in turn leads to
more coverage on several benchmarks. For details, see the following report.

https://storage.googleapis.com/fuzzer-test-suite-public/exectime-report/index.html

Differential Revision: https://reviews.llvm.org/D86092

Added: 
    compiler-rt/test/fuzzer/EntropicScalePerExecTimeTest.cpp
    compiler-rt/test/fuzzer/entropic-scale-per-exec-time.test

Modified: 
    compiler-rt/lib/fuzzer/FuzzerCorpus.h
    compiler-rt/lib/fuzzer/FuzzerDriver.cpp
    compiler-rt/lib/fuzzer/FuzzerFlags.def
    compiler-rt/lib/fuzzer/FuzzerLoop.cpp
    compiler-rt/lib/fuzzer/FuzzerOptions.h
    compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/fuzzer/FuzzerCorpus.h b/compiler-rt/lib/fuzzer/FuzzerCorpus.h
index 6b45d37a4b14..daea4f5213b1 100644
--- a/compiler-rt/lib/fuzzer/FuzzerCorpus.h
+++ b/compiler-rt/lib/fuzzer/FuzzerCorpus.h
@@ -18,6 +18,7 @@
 #include "FuzzerSHA1.h"
 #include "FuzzerTracePC.h"
 #include <algorithm>
+#include <chrono>
 #include <numeric>
 #include <random>
 #include <unordered_set>
@@ -26,6 +27,7 @@ namespace fuzzer {
 
 struct InputInfo {
   Unit U;  // The actual input data.
+  std::chrono::microseconds TimeOfUnit;
   uint8_t Sha1[kSHA1NumBytes];  // Checksum.
   // Number of features that this input has and no smaller input has.
   size_t NumFeatures = 0;
@@ -62,11 +64,15 @@ struct InputInfo {
   }
 
   // Assign more energy to a high-entropy seed, i.e., that reveals more
-  // information about the globally rare features in the neighborhood
-  // of the seed. Since we do not know the entropy of a seed that has
-  // never been executed we assign fresh seeds maximum entropy and
-  // let II->Energy approach the true entropy from above.
-  void UpdateEnergy(size_t GlobalNumberOfFeatures) {
+  // information about the globally rare features in the neighborhood of the
+  // seed. Since we do not know the entropy of a seed that has never been
+  // executed we assign fresh seeds maximum entropy and let II->Energy approach
+  // the true entropy from above. If ScalePerExecTime is true, the computed
+  // entropy is scaled based on how fast this input executes compared to the
+  // average execution time of inputs. The faster an input executes, the more
+  // energy gets assigned to the input.
+  void UpdateEnergy(size_t GlobalNumberOfFeatures, bool ScalePerExecTime,
+                    std::chrono::microseconds AverageUnitExecutionTime) {
     Energy = 0.0;
     SumIncidence = 0;
 
@@ -89,6 +95,27 @@ struct InputInfo {
     // Normalize.
     if (SumIncidence != 0)
       Energy = (Energy / SumIncidence) + logl(SumIncidence);
+
+    if (ScalePerExecTime) {
+      // Scaling to favor inputs with lower execution time.
+      uint32_t PerfScore = 100;
+      if (TimeOfUnit.count() > AverageUnitExecutionTime.count() * 10)
+        PerfScore = 10;
+      else if (TimeOfUnit.count() > AverageUnitExecutionTime.count() * 4)
+        PerfScore = 25;
+      else if (TimeOfUnit.count() > AverageUnitExecutionTime.count() * 2)
+        PerfScore = 50;
+      else if (TimeOfUnit.count() * 3 > AverageUnitExecutionTime.count() * 4)
+        PerfScore = 75;
+      else if (TimeOfUnit.count() * 4 < AverageUnitExecutionTime.count())
+        PerfScore = 300;
+      else if (TimeOfUnit.count() * 3 < AverageUnitExecutionTime.count())
+        PerfScore = 200;
+      else if (TimeOfUnit.count() * 2 < AverageUnitExecutionTime.count())
+        PerfScore = 150;
+
+      Energy *= PerfScore;
+    }
   }
 
   // Increment the frequency of the feature Idx.
@@ -121,6 +148,7 @@ struct EntropicOptions {
   bool Enabled;
   size_t NumberOfRarestFeatures;
   size_t FeatureFrequencyThreshold;
+  bool ScalePerExecTime;
 };
 
 class InputCorpus {
@@ -179,6 +207,7 @@ class InputCorpus {
   const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; }
   InputInfo *AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile,
                          bool HasFocusFunction, bool NeverReduce,
+                         std::chrono::microseconds TimeOfUnit,
                          const Vector<uint32_t> &FeatureSet,
                          const DataFlowTrace &DFT, const InputInfo *BaseII) {
     assert(!U.empty());
@@ -189,6 +218,7 @@ class InputCorpus {
     II.U = U;
     II.NumFeatures = NumFeatures;
     II.NeverReduce = NeverReduce;
+    II.TimeOfUnit = TimeOfUnit;
     II.MayDeleteFile = MayDeleteFile;
     II.UniqFeatureSet = FeatureSet;
     II.HasFocusFunction = HasFocusFunction;
@@ -471,12 +501,19 @@ class InputCorpus {
     Weights.resize(N);
     std::iota(Intervals.begin(), Intervals.end(), 0);
 
+    std::chrono::microseconds AverageUnitExecutionTime(0);
+    for (auto II : Inputs) {
+      AverageUnitExecutionTime += II->TimeOfUnit;
+    }
+    AverageUnitExecutionTime /= N;
+
     bool VanillaSchedule = true;
     if (Entropic.Enabled) {
       for (auto II : Inputs) {
         if (II->NeedsEnergyUpdate && II->Energy != 0.0) {
           II->NeedsEnergyUpdate = false;
-          II->UpdateEnergy(RareFeatures.size());
+          II->UpdateEnergy(RareFeatures.size(), Entropic.ScalePerExecTime,
+                           AverageUnitExecutionTime);
         }
       }
 

diff  --git a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp
index 93b1ff658e42..caafd1dbb0a7 100644
--- a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp
@@ -764,6 +764,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
       (size_t)Flags.entropic_feature_frequency_threshold;
   Options.EntropicNumberOfRarestFeatures =
       (size_t)Flags.entropic_number_of_rarest_features;
+  Options.EntropicScalePerExecTime = Flags.entropic_scale_per_exec_time;
   if (Options.Entropic) {
     if (!Options.FocusFunction.empty()) {
       Printf("ERROR: The parameters `--entropic` and `--focus_function` cannot "
@@ -779,6 +780,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) {
   Entropic.FeatureFrequencyThreshold =
       Options.EntropicFeatureFrequencyThreshold;
   Entropic.NumberOfRarestFeatures = Options.EntropicNumberOfRarestFeatures;
+  Entropic.ScalePerExecTime = Options.EntropicScalePerExecTime;
 
   unsigned Seed = Flags.seed;
   // Initialize Seed.

diff  --git a/compiler-rt/lib/fuzzer/FuzzerFlags.def b/compiler-rt/lib/fuzzer/FuzzerFlags.def
index 5194666634b4..fdb8362cef9d 100644
--- a/compiler-rt/lib/fuzzer/FuzzerFlags.def
+++ b/compiler-rt/lib/fuzzer/FuzzerFlags.def
@@ -175,6 +175,11 @@ FUZZER_FLAG_INT(entropic_number_of_rarest_features, 100, "Experimental. If "
      "entropic is enabled, we keep track of the frequencies only for the "
      "Top-X least abundant features (union features that are considered as "
      "rare).")
+FUZZER_FLAG_INT(entropic_scale_per_exec_time, 0, "Experimental. If 1, "
+     "the Entropic power schedule gets scaled based on the input execution "
+     "time. Inputs with lower execution time get scheduled more (up to 30x). "
+     "Note that, if 1, fuzzer stops from being deterministic even if a "
+     "non-zero random seed is given.")
 
 FUZZER_FLAG_INT(analyze_dict, 0, "Experimental")
 FUZZER_DEPRECATED_FLAG(use_clang_coverage)

diff  --git a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp
index f97b4d2f7f01..f9986dd8eea5 100644
--- a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp
@@ -470,6 +470,7 @@ bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile,
     return false;
 
   ExecuteCallback(Data, Size);
+  auto TimeOfUnit = duration_cast<microseconds>(UnitStopTime - UnitStartTime);
 
   UniqFeatureSetTmp.clear();
   size_t FoundUniqFeaturesOfII = 0;
@@ -493,7 +494,7 @@ bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile,
     auto NewII =
         Corpus.AddToCorpus({Data, Data + Size}, NumNewFeatures, MayDeleteFile,
                            TPC.ObservedFocusFunction(), ForceAddToCorpus,
-                           UniqFeatureSetTmp, DFT, II);
+                           TimeOfUnit, UniqFeatureSetTmp, DFT, II);
     WriteFeatureSetToFile(Options.FeaturesDir, Sha1ToString(NewII->Sha1),
                           NewII->UniqFeatureSet);
     return true;

diff  --git a/compiler-rt/lib/fuzzer/FuzzerOptions.h b/compiler-rt/lib/fuzzer/FuzzerOptions.h
index 281658dadf04..b17a7474d38f 100644
--- a/compiler-rt/lib/fuzzer/FuzzerOptions.h
+++ b/compiler-rt/lib/fuzzer/FuzzerOptions.h
@@ -49,6 +49,7 @@ struct FuzzingOptions {
   bool Entropic = false;
   size_t EntropicFeatureFrequencyThreshold = 0xFF;
   size_t EntropicNumberOfRarestFeatures = 100;
+  bool EntropicScalePerExecTime = false;
   std::string OutputCorpus;
   std::string ArtifactPrefix = "./";
   std::string ExactArtifactPath;

diff  --git a/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp b/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp
index 93b54f54c19a..e17fca8fe9ed 100644
--- a/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp
+++ b/compiler-rt/lib/fuzzer/tests/FuzzerUnittest.cpp
@@ -599,7 +599,9 @@ TEST(Corpus, Distribution) {
   for (size_t i = 0; i < N; i++)
     C->AddToCorpus(Unit{static_cast<uint8_t>(i)}, /*NumFeatures*/ 1,
                    /*MayDeleteFile*/ false, /*HasFocusFunction*/ false,
-                   /*ForceAddToCorpus*/ false, /*FeatureSet*/ {}, DFT,
+                   /*ForceAddToCorpus*/ false,
+                   /*TimeOfUnit*/ std::chrono::microseconds(0),
+                   /*FeatureSet*/ {}, DFT,
                    /*BaseII*/ nullptr);
 
   Vector<size_t> Hist(N);
@@ -1101,17 +1103,17 @@ TEST(Entropic, ComputeEnergy) {
   Vector<std::pair<uint32_t, uint16_t>> FeatureFreqs = {{1, 3}, {2, 3}, {3, 3}};
   II->FeatureFreqs = FeatureFreqs;
   II->NumExecutedMutations = 0;
-  II->UpdateEnergy(4);
+  II->UpdateEnergy(4, false, std::chrono::microseconds(0));
   EXPECT_LT(SubAndSquare(II->Energy, 1.450805), Precision);
 
   II->NumExecutedMutations = 9;
-  II->UpdateEnergy(5);
+  II->UpdateEnergy(5, false, std::chrono::microseconds(0));
   EXPECT_LT(SubAndSquare(II->Energy, 1.525496), Precision);
 
   II->FeatureFreqs[0].second++;
   II->FeatureFreqs.push_back(std::pair<uint32_t, uint16_t>(42, 6));
   II->NumExecutedMutations = 20;
-  II->UpdateEnergy(10);
+  II->UpdateEnergy(10, false, std::chrono::microseconds(0));
   EXPECT_LT(SubAndSquare(II->Energy, 1.792831), Precision);
 }
 

diff  --git a/compiler-rt/test/fuzzer/EntropicScalePerExecTimeTest.cpp b/compiler-rt/test/fuzzer/EntropicScalePerExecTimeTest.cpp
new file mode 100644
index 000000000000..ec3a6906d388
--- /dev/null
+++ b/compiler-rt/test/fuzzer/EntropicScalePerExecTimeTest.cpp
@@ -0,0 +1,33 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// Tests whether scaling the Entropic scheduling weight based on input execution
+// time is effective or not. Inputs of size 10 will take at least 100
+// microseconds more than any input of size 1-9. The input of size 2 in the
+// corpus should be favored by the exec-time-scaled Entropic scheduling policy
+// than the input of size 10 in the corpus, eventually finding the crashing
+// input {0xab, 0xcd} with less executions.
+#include <chrono>
+#include <cstdint>
+#include <thread>
+
+static volatile int Sink;
+static volatile int *Nil = nullptr;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size > 10)
+    return 0; // To make the test quicker.
+
+  if (Size == 10) {
+    size_t ExecTimeUSec = 100;
+    std::this_thread::sleep_for(std::chrono::microseconds(ExecTimeUSec));
+
+    Sink = 0; // execute a lot slower than the crashing input below.
+  }
+
+  if (Size == 2 && Data[0] == 0xab && Data[1] == 0xcd)
+    *Nil = 42; // crash.
+
+  return 0;
+}

diff  --git a/compiler-rt/test/fuzzer/entropic-scale-per-exec-time.test b/compiler-rt/test/fuzzer/entropic-scale-per-exec-time.test
new file mode 100644
index 000000000000..d34550f9c951
--- /dev/null
+++ b/compiler-rt/test/fuzzer/entropic-scale-per-exec-time.test
@@ -0,0 +1,8 @@
+REQUIRES: linux, x86_64
+RUN: %cpp_compiler %S/EntropicScalePerExecTimeTest.cpp -o %t-EntropicScalePerExecTimeTest
+RUN: not %run %t-EntropicScalePerExecTimeTest -entropic=1 -entropic_scale_per_exec_time=1 -seed=1 -runs=100000 -max_len=10
+
+# The following test is added as a comment here for reference, which should
+# take more runs than with -entropic_scale_per_exec_time=1 to find the crash.
+# (it takes 126,633 runs)
+# RUN: not %run %t-EntropicScalePerExecTimeTest -entropic=1 -seed=1 -runs=200000 -max_len=10


        


More information about the llvm-commits mailing list