[clang] a2f2dfd - [clang-fuzzer] Add a tiny tool to generate a fuzzing dictionary for clang

Sam McCall via cfe-commits cfe-commits at lists.llvm.org
Mon May 9 09:32:33 PDT 2022


Author: Sam McCall
Date: 2022-05-09T18:25:29+02:00
New Revision: a2f2dfde48ac6e337a5cf1dfd54a766371627b75

URL: https://github.com/llvm/llvm-project/commit/a2f2dfde48ac6e337a5cf1dfd54a766371627b75
DIFF: https://github.com/llvm/llvm-project/commit/a2f2dfde48ac6e337a5cf1dfd54a766371627b75.diff

LOG: [clang-fuzzer] Add a tiny tool to generate a fuzzing dictionary for clang

It should be useful clang-fuzzer itself, though my own motivation is
to use this in fuzzing clang-pseudo. (clang-tools-extra/pseudo/fuzzer).

Differential Revision: https://reviews.llvm.org/D125166

Added: 
    clang/test/Misc/fuzzer-dictionary.test
    clang/tools/clang-fuzzer/dictionary/CMakeLists.txt
    clang/tools/clang-fuzzer/dictionary/dictionary.c

Modified: 
    clang/test/CMakeLists.txt
    clang/tools/clang-fuzzer/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt
index 9e8fa9c5c4388..5b604b2a3eeba 100644
--- a/clang/test/CMakeLists.txt
+++ b/clang/test/CMakeLists.txt
@@ -58,6 +58,7 @@ list(APPEND CLANG_TEST_DEPS
   apinotes-test
   c-index-test
   clang
+  clang-fuzzer-dictionary
   clang-resource-headers
   clang-format
   clang-tblgen

diff  --git a/clang/test/Misc/fuzzer-dictionary.test b/clang/test/Misc/fuzzer-dictionary.test
new file mode 100644
index 0000000000000..2c97add8aea4e
--- /dev/null
+++ b/clang/test/Misc/fuzzer-dictionary.test
@@ -0,0 +1,4 @@
+RUN: clang-fuzzer-dictionary | FileCheck %s
+CHECK-DAG: or="or"
+CHECK-DAG: catch="catch"
+CHECK-DAG: rawstart="R\x22("

diff  --git a/clang/tools/clang-fuzzer/CMakeLists.txt b/clang/tools/clang-fuzzer/CMakeLists.txt
index 80ae9b611f65b..e68ed8bbcb069 100644
--- a/clang/tools/clang-fuzzer/CMakeLists.txt
+++ b/clang/tools/clang-fuzzer/CMakeLists.txt
@@ -109,6 +109,7 @@ endif()
 
 add_clang_subdirectory(handle-cxx)
 add_clang_subdirectory(handle-llvm)
+add_clang_subdirectory(dictionary)
 
 add_clang_executable(clang-fuzzer
   EXCLUDE_FROM_ALL

diff  --git a/clang/tools/clang-fuzzer/dictionary/CMakeLists.txt b/clang/tools/clang-fuzzer/dictionary/CMakeLists.txt
new file mode 100644
index 0000000000000..0e9a9a7bd0a37
--- /dev/null
+++ b/clang/tools/clang-fuzzer/dictionary/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_clang_executable(clang-fuzzer-dictionary dictionary.c)
+

diff  --git a/clang/tools/clang-fuzzer/dictionary/dictionary.c b/clang/tools/clang-fuzzer/dictionary/dictionary.c
new file mode 100644
index 0000000000000..90490477f70c8
--- /dev/null
+++ b/clang/tools/clang-fuzzer/dictionary/dictionary.c
@@ -0,0 +1,57 @@
+//===-- dictionary.c - Generate fuzzing dictionary for clang --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This binary emits a fuzzing dictionary describing strings that are
+// significant to the clang parser: keywords and other tokens.
+//
+// The dictionary can be used by a fuzzer to reach interesting parser states
+// much more quickly.
+//
+// The output is a single-file dictionary supported by libFuzzer and AFL:
+// https://llvm.org/docs/LibFuzzer.html#dictionaries
+//
+//===----------------------------------------------------------------------===//
+
+#include <stdio.h>
+
+static void emit(const char *Name, const char *Spelling) {
+  static char Hex[] = "0123456789abcdef";
+
+  printf("%s=\"", Name);
+  unsigned char C;
+  while ((C = *Spelling++)) {
+    if (C < 32 || C == '"' || C == '\\')
+      printf("\\x%c%c", Hex[C>>4], Hex[C%16]);
+    else
+      printf("%c", C);
+  }
+  printf("\"\n");
+}
+
+int main(int argc, char **argv) {
+#define PUNCTUATOR(Name, Spelling) emit(#Name, Spelling);
+#define KEYWORD(Name, Criteria) emit(#Name, #Name);
+#define PPKEYWORD(Name) emit(#Name, #Name);
+#define CXX_KEYWORD_OPERATOR(Name, Equivalent) emit(#Name, #Name);
+#define OBJC_AT_KEYWORD(Name) emit(#Name, #Name);
+#define ALIAS(Spelling, Equivalent, Criteria) emit(Spelling, Spelling);
+#include "clang/Basic/TokenKinds.def"
+  // Some other sub-token chunks significant to the lexer.
+  emit("ucn16", "\\u0000");
+  emit("ucn32", "\\U00000000");
+  emit("rawstart", "R\"(");
+  emit("rawend", ")\"");
+  emit("quote", "\"");
+  emit("squote", "'");
+  emit("u8quote", "u8\"");
+  emit("u16quote", "u\"");
+  emit("u32quote", "U\"");
+  emit("esc_nl", "\\\n");
+  emit("hex", "0x");
+}
+


        


More information about the cfe-commits mailing list