[llvm] r236772 - [lib/Fuzzer] change the way we use taint information for fuzzing. Now, we run a single unit and collect suggested mutations based on tracing+taint data, then apply the suggested mutations one by one. The previous scheme was slower and more complex.

Kostya Serebryany kcc at google.com
Thu May 7 14:02:11 PDT 2015


Author: kcc
Date: Thu May  7 16:02:11 2015
New Revision: 236772

URL: http://llvm.org/viewvc/llvm-project?rev=236772&view=rev
Log:
[lib/Fuzzer] change the way we use taint information for fuzzing. Now, we run a single unit and collect suggested mutations based on tracing+taint data, then apply the suggested mutations one by one. The previous scheme was slower and more complex.

Modified:
    llvm/trunk/lib/Fuzzer/FuzzerDFSan.cpp
    llvm/trunk/lib/Fuzzer/FuzzerInternal.h
    llvm/trunk/lib/Fuzzer/FuzzerLoop.cpp
    llvm/trunk/lib/Fuzzer/test/fuzzer.test

Modified: llvm/trunk/lib/Fuzzer/FuzzerDFSan.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/FuzzerDFSan.cpp?rev=236772&r1=236771&r2=236772&view=diff
==============================================================================
--- llvm/trunk/lib/Fuzzer/FuzzerDFSan.cpp (original)
+++ llvm/trunk/lib/Fuzzer/FuzzerDFSan.cpp Thu May  7 16:02:11 2015
@@ -68,6 +68,7 @@
 #include "FuzzerInternal.h"
 #include <sanitizer/dfsan_interface.h>
 
+#include <algorithm>
 #include <cstring>
 #include <iostream>
 #include <unordered_map>
@@ -157,26 +158,39 @@ std::ostream &operator<<(std::ostream &o
   return os << "[" << LR.Beg << "," << LR.End << ")";
 }
 
+// For now, very simple: put Size bytes of Data at position Pos.
+struct TraceBasedMutation {
+  size_t Pos;
+  size_t Size;
+  uint64_t Data;
+};
+
 class DFSanState {
  public:
    DFSanState(const fuzzer::Fuzzer::FuzzingOptions &Options)
        : Options(Options) {}
 
-  struct CmpSiteInfo {
-    size_t ResCounters[2] = {0, 0};
-    size_t CmpSize = 0;
-    LabelRange LR;
-    std::unordered_map<uint64_t, size_t> CountedConstants;
-  };
-
   LabelRange GetLabelRange(dfsan_label L);
   void DFSanCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
                         uint64_t Arg1, uint64_t Arg2, dfsan_label L1,
                         dfsan_label L2);
-  bool Mutate(fuzzer::Unit *U);
+
+  void StartTraceRecording() {
+    RecordingTraces = true;
+    Mutations.clear();
+  }
+
+  size_t StopTraceRecording() {
+    RecordingTraces = false;
+    std::random_shuffle(Mutations.begin(), Mutations.end());
+    return Mutations.size();
+  }
+
+  void ApplyTraceBasedMutation(size_t Idx, fuzzer::Unit *U);
 
  private:
-  std::unordered_map<uintptr_t, CmpSiteInfo> PcToCmpSiteInfoMap;
+  bool RecordingTraces = false;
+  std::vector<TraceBasedMutation> Mutations;
   LabelRange LabelRanges[1 << (sizeof(dfsan_label) * 8)] = {};
   const fuzzer::Fuzzer::FuzzingOptions &Options;
 };
@@ -191,63 +205,69 @@ LabelRange DFSanState::GetLabelRange(dfs
   return LR = LabelRange::Singleton(LI);
 }
 
+void DFSanState::ApplyTraceBasedMutation(size_t Idx, fuzzer::Unit *U) {
+  assert(Idx < Mutations.size());
+  auto &M = Mutations[Idx];
+  if (Options.Verbosity >= 3)
+    std::cerr << "TBM " << M.Pos << " " << M.Size << " " << M.Data << "\n";
+  if (M.Pos + M.Size > U->size()) return;
+  memcpy(U->data() + M.Pos, &M.Data, M.Size);
+}
+
 void DFSanState::DFSanCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
                                   uint64_t Arg1, uint64_t Arg2, dfsan_label L1,
                                   dfsan_label L2) {
+  if (!RecordingTraces) return;
   if (L1 == 0 && L2 == 0)
     return;  // Not actionable.
   if (L1 != 0 && L2 != 0)
     return;  // Probably still actionable.
   bool Res = ComputeCmp(CmpSize, CmpType, Arg1, Arg2);
-  CmpSiteInfo &CSI = PcToCmpSiteInfoMap[PC];
-  CSI.CmpSize = CmpSize;
-  CSI.LR.Join(GetLabelRange(L1)).Join(GetLabelRange(L2));
-  if (!L1) CSI.CountedConstants[Arg1]++;
-  if (!L2) CSI.CountedConstants[Arg2]++;
-  size_t Counter = CSI.ResCounters[Res]++;
-
-  if (Options.Verbosity >= 2  &&
-      (Counter & (Counter - 1)) == 0 &&
-      CSI.ResCounters[!Res] == 0)
+  uint64_t Data = L1 ? Arg2 : Arg1;
+  LabelRange LR = L1 ? GetLabelRange(L1) : GetLabelRange(L2);
+
+  for (size_t Pos = LR.Beg; Pos + CmpSize <= LR.End; Pos++) {
+    Mutations.push_back({Pos, CmpSize, Data});
+    Mutations.push_back({Pos, CmpSize, Data + 1});
+    Mutations.push_back({Pos, CmpSize, Data - 1});
+  }
+
+  if (CmpSize > LR.End - LR.Beg)
+    Mutations.push_back({LR.Beg, (unsigned)(LR.End - LR.Beg), Data});
+
+
+  if (Options.Verbosity >= 3)
     std::cerr << "DFSAN:"
               << " PC " << std::hex << PC << std::dec
               << " S " << CmpSize
               << " T " << CmpType
               << " A1 " << Arg1 << " A2 " << Arg2 << " R " << Res
-              << " L" << L1 << GetLabelRange(L1)
-              << " L" << L2 << GetLabelRange(L2)
-              << " LR " << CSI.LR
+              << " L" << L1
+              << " L" << L2
+              << " R"  << LR
+              << " MU " << Mutations.size()
               << "\n";
 }
 
-bool DFSanState::Mutate(fuzzer::Unit *U) {
-  for (auto &PCToCmp : PcToCmpSiteInfoMap) {
-    auto &CSI = PCToCmp.second;
-    if (CSI.ResCounters[0] * CSI.ResCounters[1] != 0) continue;
-    if (CSI.ResCounters[0] + CSI.ResCounters[1] < 1000) continue;
-    if (CSI.CountedConstants.size() != 1) continue;
-    uintptr_t C = CSI.CountedConstants.begin()->first;
-    if (U->size() >= CSI.CmpSize) {
-      size_t RangeSize = CSI.LR.End - CSI.LR.Beg;
-      size_t Idx = CSI.LR.Beg + rand() % RangeSize;
-      if (Idx + CSI.CmpSize > U->size()) continue;
-      C += rand() % 5 - 2;
-      memcpy(U->data() + Idx, &C, CSI.CmpSize);
-      return true;
-    }
-  }
-  return false;
-}
-
 static DFSanState *DFSan;
 
 }  // namespace
 
 namespace fuzzer {
 
-bool Fuzzer::MutateWithDFSan(Unit *U) {
-  if (!&dfsan_create_label || !DFSan) return false;
-  return DFSan->Mutate(U);
+void Fuzzer::StartTraceRecording() {
+  if (!DFSan) return;
+  DFSan->StartTraceRecording();
+}
+
+size_t Fuzzer::StopTraceRecording() {
+  if (!DFSan) return 0;
+  return DFSan->StopTraceRecording();
+}
+
+void Fuzzer::ApplyTraceBasedMutation(size_t Idx, Unit *U) {
+  assert(DFSan);
+  DFSan->ApplyTraceBasedMutation(Idx, U);
 }
 
 void Fuzzer::InitializeDFSan() {
@@ -279,7 +299,7 @@ void dfsan_weak_hook_memcmp(void *caller
                             size_t n, dfsan_label s1_label,
                             dfsan_label s2_label, dfsan_label n_label) {
   uintptr_t PC = reinterpret_cast<uintptr_t>(caller_pc);
-  uint64_t S1, S2;
+  uint64_t S1 = 0, S2 = 0;
   // Simplification: handle only first 8 bytes.
   memcpy(&S1, s1, std::min(n, sizeof(S1)));
   memcpy(&S2, s2, std::min(n, sizeof(S2)));

Modified: llvm/trunk/lib/Fuzzer/FuzzerInternal.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/FuzzerInternal.h?rev=236772&r1=236771&r2=236772&view=diff
==============================================================================
--- llvm/trunk/lib/Fuzzer/FuzzerInternal.h (original)
+++ llvm/trunk/lib/Fuzzer/FuzzerInternal.h Thu May  7 16:02:11 2015
@@ -94,10 +94,20 @@ class Fuzzer {
   size_t RunOneMaximizeCoveragePairs(const Unit &U);
   void WriteToOutputCorpus(const Unit &U);
   void WriteToCrash(const Unit &U, const char *Prefix);
-  bool MutateWithDFSan(Unit *U);
   void PrintStats(const char *Where, size_t Cov, const char *End = "\n");
   void PrintUnitInASCIIOrTokens(const Unit &U, const char *PrintAfter = "");
 
+  // Trace-based fuzzing: we run a unit with some kind of tracing
+  // enabled and record potentially useful mutations. Then
+  // We apply these mutations one by one to the unit and run it again.
+
+  // Start tracing; forget all previously proposed mutations.
+  void StartTraceRecording();
+  // Stop tracing and return the number of proposed mutations.
+  size_t StopTraceRecording();
+  // Apply Idx-th trace-based mutation to U.
+  void ApplyTraceBasedMutation(size_t Idx, Unit *U);
+
   void SetDeathCallback();
   static void StaticDeathCallback();
   void DeathCallback();

Modified: llvm/trunk/lib/Fuzzer/FuzzerLoop.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/FuzzerLoop.cpp?rev=236772&r1=236771&r2=236772&view=diff
==============================================================================
--- llvm/trunk/lib/Fuzzer/FuzzerLoop.cpp (original)
+++ llvm/trunk/lib/Fuzzer/FuzzerLoop.cpp Thu May  7 16:02:11 2015
@@ -285,9 +285,14 @@ void Fuzzer::ReportNewCoverage(size_t Ne
 
 void Fuzzer::MutateAndTestOne(Unit *U) {
   for (int i = 0; i < Options.MutateDepth; i++) {
-    MutateWithDFSan(U);
+    StartTraceRecording();
     Mutate(U, Options.MaxLen);
     RunOneAndUpdateCorpus(*U);
+    size_t NumTraceBasedMutations = StopTraceRecording();
+    for (size_t j = 0; j < NumTraceBasedMutations; j++) {
+        ApplyTraceBasedMutation(j, U);
+        RunOneAndUpdateCorpus(*U);
+    }
   }
 }
 

Modified: llvm/trunk/lib/Fuzzer/test/fuzzer.test
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/test/fuzzer.test?rev=236772&r1=236771&r2=236772&view=diff
==============================================================================
--- llvm/trunk/lib/Fuzzer/test/fuzzer.test (original)
+++ llvm/trunk/lib/Fuzzer/test/fuzzer.test Thu May  7 16:02:11 2015
@@ -19,9 +19,9 @@ RUN: not ./LLVMFuzzer-FourIndependentBra
 
 RUN: not ./LLVMFuzzer-CounterTest -use_counters=1 -max_len=6 -seed=1 -timeout=15 2>&1 | FileCheck %s
 
-RUN: not ./LLVMFuzzer-DFSanSimpleCmpTest -seed=1 -timeout=15 2>&1 | FileCheck %s
+RUN: not ./LLVMFuzzer-DFSanSimpleCmpTest -seed=1 -runs=1000000 -timeout=5 2>&1 | FileCheck %s
 
-RUN: not ./LLVMFuzzer-DFSanMemcmpTest -seed=1 -timeout=15 2>&1 | FileCheck %s
+RUN: not ./LLVMFuzzer-DFSanMemcmpTest -seed=1 -runs=100 -timeout=5 2>&1 | FileCheck %s
 
 RUN: not ./LLVMFuzzer-CxxTokensTest -seed=1 -timeout=15 -tokens=%S/../cxx_fuzzer_tokens.txt 2>&1 | FileCheck %s
 





More information about the llvm-commits mailing list