[llvm] r246800 - [libFuzzer] adding a parser for AFL-style dictionaries + tests.

Thu Sep 3 13:23:47 PDT 2015

Author: kcc
Date: Thu Sep  3 15:23:46 2015
New Revision: 246800

URL: http://llvm.org/viewvc/llvm-project?rev=246800&view=rev
Log:
[libFuzzer] adding a parser for AFL-style dictionaries + tests.

Modified:
    llvm/trunk/lib/Fuzzer/FuzzerDriver.cpp
    llvm/trunk/lib/Fuzzer/FuzzerFlags.def
    llvm/trunk/lib/Fuzzer/FuzzerInternal.h
    llvm/trunk/lib/Fuzzer/FuzzerUtil.cpp
    llvm/trunk/lib/Fuzzer/test/FuzzerUnittest.cpp

Modified: llvm/trunk/lib/Fuzzer/FuzzerDriver.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/FuzzerDriver.cpp?rev=246800&r1=246799&r2=246800&view=diff
==============================================================================

--- llvm/trunk/lib/Fuzzer/FuzzerDriver.cpp (original)
+++ llvm/trunk/lib/Fuzzer/FuzzerDriver.cpp Thu Sep  3 15:23:46 2015
@@ -256,6 +256,13 @@ int FuzzerDriver(int argc, char **argv,
   if (Flags.apply_tokens)
     return ApplyTokens(F, Flags.apply_tokens);
 
+  if (Flags.dict)
+    if (!ParseDictionaryFile(FileToString(Flags.dict), &Options.Dictionary))
+      return 1;
+
+  if (Flags.verbosity > 0 && !Options.Dictionary.empty())
+    Printf("Dictionary: %zd entries\n", Options.Dictionary.size());
+
   unsigned Seed = Flags.seed;
   // Initialize Seed.
   if (Seed == 0)

Modified: llvm/trunk/lib/Fuzzer/FuzzerFlags.def
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/FuzzerFlags.def?rev=246800&r1=246799&r2=246800&view=diff
==============================================================================
--- llvm/trunk/lib/Fuzzer/FuzzerFlags.def (original)
+++ llvm/trunk/lib/Fuzzer/FuzzerFlags.def Thu Sep  3 15:23:46 2015
@@ -63,6 +63,7 @@ FUZZER_FLAG_INT(report_slow_units, 10,
     "Report slowest units if they run for more than this number of seconds.")
 FUZZER_FLAG_INT(only_ascii, 0,
                 "If 1, generate only ASCII (isprint+isspace) inputs.")
+FUZZER_FLAG_STRING(dict, "Experimental. Use the dictionary file.")
 FUZZER_FLAG_INT(tbm_depth, 5, "Apply at most this number of consecutive"
                                "trace-based-mutations (tbm).")
 FUZZER_FLAG_INT(tbm_width, 5, "Apply at most this number of independent"

Modified: llvm/trunk/lib/Fuzzer/FuzzerInternal.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/FuzzerInternal.h?rev=246800&r1=246799&r2=246800&view=diff
==============================================================================
--- llvm/trunk/lib/Fuzzer/FuzzerInternal.h (original)
+++ llvm/trunk/lib/Fuzzer/FuzzerInternal.h Thu Sep  3 15:23:46 2015
@@ -57,6 +57,16 @@ bool IsASCII(const Unit &U);
 
 int NumberOfCpuCores();
 
+// Dictionary.
+
+// Parses one dictionary entry.
+// If successfull, write the enty to Unit and returns true,
+// otherwise returns false.
+bool ParseOneDictionaryEntry(const std::string &Str, Unit *U);
+// Parses the dictionary file, fills Units, returns true iff all lines
+// were parsed succesfully.
+bool ParseDictionaryFile(const std::string &Text, std::vector<Unit> *Units);
+
 class Fuzzer {
  public:
   struct FuzzingOptions {
@@ -80,6 +90,7 @@ class Fuzzer {
     std::string OutputCorpus;
     std::string SyncCommand;
     std::vector<std::string> Tokens;
+    std::vector<Unit> Dictionary;
   };
   Fuzzer(UserSuppliedFuzzer &USF, FuzzingOptions Options);
   void AddToCorpus(const Unit &U) { Corpus.push_back(U); }

Modified: llvm/trunk/lib/Fuzzer/FuzzerUtil.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/FuzzerUtil.cpp?rev=246800&r1=246799&r2=246800&view=diff
==============================================================================
--- llvm/trunk/lib/Fuzzer/FuzzerUtil.cpp (original)
+++ llvm/trunk/lib/Fuzzer/FuzzerUtil.cpp Thu Sep  3 15:23:46 2015
@@ -16,6 +16,7 @@
 #include <cassert>
 #include <cstring>
 #include <signal.h>
+#include <sstream>
 #include <unistd.h>
 
 namespace fuzzer {
@@ -92,4 +93,77 @@ bool IsASCII(const Unit &U) {
   return true;
 }
 
+bool ParseOneDictionaryEntry(const std::string &Str, Unit *U) {
+  U->clear();
+  if (Str.empty()) return false;
+  size_t L = 0, R = Str.size() - 1;  // We are parsing the range [L,R].
+  // Skip spaces from both sides.
+  while (L < R && isspace(Str[L])) L++;
+  while (R > L && isspace(Str[R])) R--;
+  if (R - L < 2) return false;
+  // Check the closing "
+  if (Str[R] != '"') return false;
+  R--;
+  // Find the opening "
+  while (L < R && Str[L] != '"') L++;
+  if (L >= R) return false;
+  assert(Str[L] == '\"');
+  L++;
+  assert(L <= R);
+  for (size_t Pos = L; Pos <= R; Pos++) {
+    uint8_t V = (uint8_t)Str[Pos];
+    if (!isprint(V) && !isspace(V)) return false;
+    if (V =='\\') {
+      // Handle '\\'
+      if (Pos + 1 <= R && (Str[Pos + 1] == '\\' || Str[Pos + 1] == '"')) {
+        U->push_back(Str[Pos + 1]);
+        Pos++;
+        continue;
+      }
+      // Handle '\xAB'
+      if (Pos + 3 <= R && Str[Pos + 1] == 'x'
+           && isxdigit(Str[Pos + 2]) && isxdigit(Str[Pos + 3])) {
+        char Hex[] = "0xAA";
+        Hex[2] = Str[Pos + 2];
+        Hex[3] = Str[Pos + 3];
+        U->push_back(strtol(Hex, nullptr, 16));
+        Pos += 3;
+        continue;
+      }
+      return false;  // Invalid escape.
+    } else {
+      // Any other character.
+      U->push_back(V);
+    }
+  }
+  return true;
+}
+
+bool ParseDictionaryFile(const std::string &Text, std::vector<Unit> *Units) {
+  if (Text.empty()) {
+    Printf("ParseDictionaryFile: file does not exist or is empty\n");
+    return false;
+  }
+  std::istringstream ISS(Text);
+  Units->clear();
+  Unit U;
+  int LineNo = 0;
+  std::string S;
+  while (std::getline(ISS, S, '\n')) {
+    LineNo++;
+    size_t Pos = 0;
+    while (Pos < S.size() && isspace(S[Pos])) Pos++;  // Skip spaces.
+    if (Pos == S.size()) continue;  // Empty line.
+    if (S[Pos] == '#') continue;  // Comment line.
+    if (ParseOneDictionaryEntry(S, &U)) {
+      Units->push_back(U);
+    } else {
+      Printf("ParseDictionaryFile: error in line %d\n\t\t%s\n", LineNo,
+             S.c_str());
+      return false;
+    }
+  }
+  return true;
+}
+
 }  // namespace fuzzer

Modified: llvm/trunk/lib/Fuzzer/test/FuzzerUnittest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Fuzzer/test/FuzzerUnittest.cpp?rev=246800&r1=246799&r2=246800&view=diff
==============================================================================
--- llvm/trunk/lib/Fuzzer/test/FuzzerUnittest.cpp (original)
+++ llvm/trunk/lib/Fuzzer/test/FuzzerUnittest.cpp Thu Sep  3 15:23:46 2015
@@ -215,3 +215,51 @@ void TestShuffleBytes(Mutator M, int Num
 
 TEST(FuzzerMutate, ShuffleBytes1) { TestShuffleBytes(Mutate_ShuffleBytes, 1 << 15); }
 TEST(FuzzerMutate, ShuffleBytes2) { TestShuffleBytes(Mutate, 1 << 16); }
+
+TEST(FuzzerDictionary, ParseOneDictionaryEntry) {
+  Unit U;
+  EXPECT_FALSE(ParseOneDictionaryEntry("", &U));
+  EXPECT_FALSE(ParseOneDictionaryEntry(" ", &U));
+  EXPECT_FALSE(ParseOneDictionaryEntry("\t  ", &U));
+  EXPECT_FALSE(ParseOneDictionaryEntry("  \" ", &U));
+  EXPECT_FALSE(ParseOneDictionaryEntry("  zz\" ", &U));
+  EXPECT_FALSE(ParseOneDictionaryEntry("  \"zz ", &U));
+  EXPECT_FALSE(ParseOneDictionaryEntry("  \"\" ", &U));
+  EXPECT_TRUE(ParseOneDictionaryEntry("\"a\"", &U));
+  EXPECT_EQ(U, Unit({'a'}));
+  EXPECT_TRUE(ParseOneDictionaryEntry("\"abc\"", &U));
+  EXPECT_EQ(U, Unit({'a', 'b', 'c'}));
+  EXPECT_TRUE(ParseOneDictionaryEntry("abc=\"abc\"", &U));
+  EXPECT_EQ(U, Unit({'a', 'b', 'c'}));
+  EXPECT_FALSE(ParseOneDictionaryEntry("\"\\\"", &U));
+  EXPECT_TRUE(ParseOneDictionaryEntry("\"\\\\\"", &U));
+  EXPECT_EQ(U, Unit({'\\'}));
+  EXPECT_TRUE(ParseOneDictionaryEntry("\"\\xAB\"", &U));
+  EXPECT_EQ(U, Unit({0xAB}));
+  EXPECT_TRUE(ParseOneDictionaryEntry("\"\\xABz\\xDE\"", &U));
+  EXPECT_EQ(U, Unit({0xAB, 'z', 0xDE}));
+  EXPECT_TRUE(ParseOneDictionaryEntry("\"#\"", &U));
+  EXPECT_EQ(U, Unit({'#'}));
+  EXPECT_TRUE(ParseOneDictionaryEntry("\"\\\"\"", &U));
+  EXPECT_EQ(U, Unit({'"'}));
+}
+
+TEST(FuzzerDictionary, ParseDictionaryFile) {
+  std::vector<Unit> Units;
+  EXPECT_FALSE(ParseDictionaryFile("zzz\n", &Units));
+  EXPECT_FALSE(ParseDictionaryFile("", &Units));
+  EXPECT_TRUE(ParseDictionaryFile("\n", &Units));
+  EXPECT_EQ(Units.size(), 0U);
+  EXPECT_TRUE(ParseDictionaryFile("#zzzz a b c d\n", &Units));
+  EXPECT_EQ(Units.size(), 0U);
+  EXPECT_TRUE(ParseDictionaryFile(" #zzzz\n", &Units));
+  EXPECT_EQ(Units.size(), 0U);
+  EXPECT_TRUE(ParseDictionaryFile("  #zzzz\n", &Units));
+  EXPECT_EQ(Units.size(), 0U);
+  EXPECT_TRUE(ParseDictionaryFile("  #zzzz\naaa=\"aa\"", &Units));
+  EXPECT_EQ(Units, std::vector<Unit>({Unit({'a', 'a'})}));
+  EXPECT_TRUE(
+      ParseDictionaryFile("  #zzzz\naaa=\"aa\"\n\nabc=\"abc\"", &Units));
+  EXPECT_EQ(Units,
+            std::vector<Unit>({Unit({'a', 'a'}), Unit({'a', 'b', 'c'})}));
+}