[clang-tools-extra] 1616bd9 - [pseudo] Add fuzzer for the pseudoparser.

Sam McCall via cfe-commits cfe-commits at lists.llvm.org
Fri May 6 00:22:35 PDT 2022


Author: Sam McCall
Date: 2022-05-06T09:22:28+02:00
New Revision: 1616bd9ef4eb46a340a8765eab440f99e9008003

URL: https://github.com/llvm/llvm-project/commit/1616bd9ef4eb46a340a8765eab440f99e9008003
DIFF: https://github.com/llvm/llvm-project/commit/1616bd9ef4eb46a340a8765eab440f99e9008003.diff

LOG: [pseudo] Add fuzzer for the pseudoparser.

As confirmation, running this locally found 2 crashes:
 - trivial: crashes on file with no tokens
 - lexer: hits an assertion failure on bytes: 0x5c,0xa,0x5c,0x1,0x65,0x5c,0xa

Differential Revision: https://reviews.llvm.org/D125037

Added: 
    clang-tools-extra/pseudo/fuzzer/CMakeLists.txt
    clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
    clang-tools-extra/pseudo/fuzzer/Main.cpp
    clang-tools-extra/pseudo/test/fuzzer.cpp

Modified: 
    clang-tools-extra/pseudo/CMakeLists.txt
    clang-tools-extra/pseudo/test/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/pseudo/CMakeLists.txt b/clang-tools-extra/pseudo/CMakeLists.txt
index fe7f7c63fb75..e94737f7f3e8 100644
--- a/clang-tools-extra/pseudo/CMakeLists.txt
+++ b/clang-tools-extra/pseudo/CMakeLists.txt
@@ -2,6 +2,7 @@ include_directories(include)
 include_directories(${CMAKE_CURRENT_BINARY_DIR}/include)
 add_subdirectory(lib)
 add_subdirectory(tool)
+add_subdirectory(fuzzer)
 if(CLANG_INCLUDE_TESTS)
   add_subdirectory(unittests)
   add_subdirectory(test)

diff  --git a/clang-tools-extra/pseudo/fuzzer/CMakeLists.txt b/clang-tools-extra/pseudo/fuzzer/CMakeLists.txt
new file mode 100644
index 000000000000..fc583de8b011
--- /dev/null
+++ b/clang-tools-extra/pseudo/fuzzer/CMakeLists.txt
@@ -0,0 +1,14 @@
+set(LLVM_LINK_COMPONENTS
+  FuzzMutate
+  Support
+  )
+
+add_llvm_fuzzer(clang-pseudo-fuzzer
+  Fuzzer.cpp
+  DUMMY_MAIN Main.cpp
+  )
+
+target_link_libraries(clang-pseudo-fuzzer
+  PRIVATE
+  clangPseudo
+  )

diff  --git a/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp b/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
new file mode 100644
index 000000000000..4907fc9f9c04
--- /dev/null
+++ b/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp
@@ -0,0 +1,106 @@
+//===-- Fuzzer.cpp - Fuzz the pseudoparser --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang-pseudo/DirectiveTree.h"
+#include "clang-pseudo/Forest.h"
+#include "clang-pseudo/GLR.h"
+#include "clang-pseudo/Grammar.h"
+#include "clang-pseudo/LRTable.h"
+#include "clang-pseudo/Token.h"
+#include "clang/Basic/LangOptions.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
+namespace clang {
+namespace pseudo {
+namespace {
+
+class Fuzzer {
+  clang::LangOptions LangOpts = clang::pseudo::genericLangOpts();
+  std::unique_ptr<Grammar> G;
+  LRTable T;
+  bool Print;
+
+public:
+  Fuzzer(llvm::StringRef GrammarPath, bool Print) : Print(Print) {
+    llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GrammarText =
+        llvm::MemoryBuffer::getFile(GrammarPath);
+    if (std::error_code EC = GrammarText.getError()) {
+      llvm::errs() << "Error: can't read grammar file '" << GrammarPath
+                   << "': " << EC.message() << "\n";
+      std::exit(1);
+    }
+    std::vector<std::string> Diags;
+    G = Grammar::parseBNF(GrammarText->get()->getBuffer(), Diags);
+    if (!Diags.empty()) {
+      for (const auto &Diag : Diags)
+        llvm::errs() << Diag << "\n";
+      std::exit(1);
+    }
+    T = LRTable::buildSLR(*G);
+  }
+
+  void operator()(llvm::StringRef Code) {
+    std::string CodeStr = Code.str(); // Must be null-terminated.
+    auto RawStream = lex(CodeStr, LangOpts);
+    auto DirectiveStructure = DirectiveTree::parse(RawStream);
+    clang::pseudo::chooseConditionalBranches(DirectiveStructure, RawStream);
+    // FIXME: strip preprocessor directives
+    auto ParseableStream =
+        clang::pseudo::stripComments(cook(RawStream, LangOpts));
+
+    clang::pseudo::ForestArena Arena;
+    clang::pseudo::GSS GSS;
+    auto &Root = glrParse(ParseableStream,
+                          clang::pseudo::ParseParams{*G, T, Arena, GSS});
+    if (Print)
+      llvm::outs() << Root.dumpRecursive(*G);
+  }
+};
+
+Fuzzer *Fuzz = nullptr;
+
+} // namespace
+} // namespace pseudo
+} // namespace clang
+
+extern "C" {
+
+// Set up the fuzzer from command line flags:
+//  -grammar=<file> (required) - path to cxx.bnf
+//  -print                     - used for testing the fuzzer
+int LLVMFuzzerInitialize(int *Argc, char ***Argv) {
+  llvm::StringRef GrammarFile;
+  bool PrintForest = false;
+  auto ConsumeArg = [&](llvm::StringRef Arg) -> bool {
+    if (Arg.consume_front("-grammar=")) {
+      GrammarFile = Arg;
+      return true;
+    } else if (Arg == "-print") {
+      PrintForest = true;
+      return true;
+    }
+    return false;
+  };
+  *Argc = std::remove_if(*Argv + 1, *Argv + *Argc, ConsumeArg) - *Argv;
+
+  if (GrammarFile.empty()) {
+    fprintf(stderr, "Fuzzer needs -grammar=/path/to/cxx.bnf\n");
+    exit(1);
+  }
+  clang::pseudo::Fuzz = new clang::pseudo::Fuzzer(GrammarFile, PrintForest);
+  return 0;
+}
+
+int LLVMFuzzerTestOneInput(uint8_t *Data, size_t Size) {
+  (*clang::pseudo::Fuzz)(llvm::StringRef(reinterpret_cast<char *>(Data), Size));
+  return 0;
+}
+}

diff  --git a/clang-tools-extra/pseudo/fuzzer/Main.cpp b/clang-tools-extra/pseudo/fuzzer/Main.cpp
new file mode 100644
index 000000000000..542a3007a399
--- /dev/null
+++ b/clang-tools-extra/pseudo/fuzzer/Main.cpp
@@ -0,0 +1,16 @@
+//===--- Main.cpp - Entry point to sanity check the fuzzer ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/FuzzMutate/FuzzerCLI.h"
+
+extern "C" int LLVMFuzzerInitialize(int *, char ***);
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *, size_t);
+int main(int argc, char *argv[]) {
+  return llvm::runFuzzerOnInputs(argc, argv, LLVMFuzzerTestOneInput,
+                                 LLVMFuzzerInitialize);
+}

diff  --git a/clang-tools-extra/pseudo/test/CMakeLists.txt b/clang-tools-extra/pseudo/test/CMakeLists.txt
index 250186cb78c1..712527f78140 100644
--- a/clang-tools-extra/pseudo/test/CMakeLists.txt
+++ b/clang-tools-extra/pseudo/test/CMakeLists.txt
@@ -1,5 +1,6 @@
 set(CLANG_PSEUDO_TEST_DEPS
   clang-pseudo
+  clang-pseudo-fuzzer
   ClangPseudoTests
   )
 

diff  --git a/clang-tools-extra/pseudo/test/fuzzer.cpp b/clang-tools-extra/pseudo/test/fuzzer.cpp
new file mode 100644
index 000000000000..6f4d093ed2ad
--- /dev/null
+++ b/clang-tools-extra/pseudo/test/fuzzer.cpp
@@ -0,0 +1,4 @@
+// RUN: clang-pseudo-fuzzer -grammar=%cxx-bnf-file -print %s | FileCheck %s
+int x;
+// CHECK: translation-unit := declaration-seq
+// CHECK: simple-type-specifier := INT


        


More information about the cfe-commits mailing list