[llvm] r296323 - llvm-mc-fuzzer: add support for assembly

Brian Cain via llvm-commits llvm-commits at lists.llvm.org
Sun Feb 26 22:22:17 PST 2017


Author: bcain
Date: Mon Feb 27 00:22:17 2017
New Revision: 296323

URL: http://llvm.org/viewvc/llvm-project?rev=296323&view=rev
Log:
llvm-mc-fuzzer: add support for assembly

This creates an llvm-mc-disassemble-fuzzer from the existing llvm-mc-fuzzer
and finishing the assemble support in llvm-mc-assemble-fuzzer.


Added:
    llvm/trunk/tools/llvm-mc-assemble-fuzzer/
    llvm/trunk/tools/llvm-mc-assemble-fuzzer/CMakeLists.txt
    llvm/trunk/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp
    llvm/trunk/tools/llvm-mc-disassemble-fuzzer/
    llvm/trunk/tools/llvm-mc-disassemble-fuzzer/CMakeLists.txt
    llvm/trunk/tools/llvm-mc-disassemble-fuzzer/llvm-mc-disassemble-fuzzer.cpp
Removed:
    llvm/trunk/tools/llvm-mc-fuzzer/

Added: llvm/trunk/tools/llvm-mc-assemble-fuzzer/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mc-assemble-fuzzer/CMakeLists.txt?rev=296323&view=auto
==============================================================================
--- llvm/trunk/tools/llvm-mc-assemble-fuzzer/CMakeLists.txt (added)
+++ llvm/trunk/tools/llvm-mc-assemble-fuzzer/CMakeLists.txt Mon Feb 27 00:22:17 2017
@@ -0,0 +1,19 @@
+if( LLVM_USE_SANITIZE_COVERAGE )
+  include_directories(BEFORE
+    ${CMAKE_CURRENT_SOURCE_DIR}/../../lib/Fuzzer)
+
+  set(LLVM_LINK_COMPONENTS
+      AllTargetsAsmPrinters
+      AllTargetsAsmParsers
+      AllTargetsDescs
+      AllTargetsInfos
+      MC
+      MCParser
+      Support
+      )
+  add_llvm_tool(llvm-mc-assemble-fuzzer 
+                llvm-mc-assemble-fuzzer.cpp)
+  target_link_libraries(llvm-mc-assemble-fuzzer
+                        LLVMFuzzer
+                        )
+endif()

Added: llvm/trunk/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp?rev=296323&view=auto
==============================================================================
--- llvm/trunk/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp (added)
+++ llvm/trunk/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp Mon Feb 27 00:22:17 2017
@@ -0,0 +1,313 @@
+//===--- llvm-mc-fuzzer.cpp - Fuzzer for the MC layer ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "FuzzerInterface.h"
+#include "llvm-c/Target.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCParser/AsmLexer.h"
+#include "llvm/MC/MCParser/MCTargetAsmParser.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetOptionsCommandFlags.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/ToolOutputFile.h"
+
+using namespace llvm;
+
+static cl::opt<std::string>
+    TripleName("triple", cl::desc("Target triple to assemble for, "
+                                  "see -version for available targets"));
+
+static cl::opt<std::string>
+    MCPU("mcpu",
+         cl::desc("Target a specific cpu type (-mcpu=help for details)"),
+         cl::value_desc("cpu-name"), cl::init(""));
+
+// This is useful for variable-length instruction sets.
+static cl::opt<unsigned> InsnLimit(
+    "insn-limit",
+    cl::desc("Limit the number of instructions to process (0 for no limit)"),
+    cl::value_desc("count"), cl::init(0));
+
+static cl::list<std::string>
+    MAttrs("mattr", cl::CommaSeparated,
+           cl::desc("Target specific attributes (-mattr=help for details)"),
+           cl::value_desc("a1,+a2,-a3,..."));
+// The feature string derived from -mattr's values.
+std::string FeaturesStr;
+
+static cl::list<std::string>
+    FuzzerArgs("fuzzer-args", cl::Positional,
+               cl::desc("Options to pass to the fuzzer"), cl::ZeroOrMore,
+               cl::PositionalEatsArgs);
+static std::vector<char *> ModifiedArgv;
+
+enum OutputFileType {
+  OFT_Null,
+  OFT_AssemblyFile,
+  OFT_ObjectFile
+};
+static cl::opt<OutputFileType>
+FileType("filetype", cl::init(OFT_AssemblyFile),
+  cl::desc("Choose an output file type:"),
+  cl::values(
+       clEnumValN(OFT_AssemblyFile, "asm",
+                  "Emit an assembly ('.s') file"),
+       clEnumValN(OFT_Null, "null",
+                  "Don't emit anything (for timing purposes)"),
+       clEnumValN(OFT_ObjectFile, "obj",
+                  "Emit a native object ('.o') file")));
+
+
+class LLVMFuzzerInputBuffer : public MemoryBuffer
+{
+  public:
+    LLVMFuzzerInputBuffer(const uint8_t *data_, size_t size_)
+      : Data(reinterpret_cast<const char *>(data_)), 
+        Size(size_) {
+        init(Data, Data+Size, false);
+      }
+
+
+    virtual BufferKind getBufferKind() const {
+      return MemoryBuffer_Malloc; // it's not disk-backed so I think that's
+                                  // the intent ... though AFAIK it
+                                  // probably came from an mmap or sbrk
+    }
+
+  private:
+    const char *Data;
+    size_t Size;
+};
+
+static int AssembleInput(const char *ProgName, const Target *TheTarget,
+                         SourceMgr &SrcMgr, MCContext &Ctx, MCStreamer &Str,
+                         MCAsmInfo &MAI, MCSubtargetInfo &STI,
+                         MCInstrInfo &MCII, MCTargetOptions &MCOptions) {
+  static const bool NoInitialTextSection = false;
+
+  std::unique_ptr<MCAsmParser> Parser(
+    createMCAsmParser(SrcMgr, Ctx, Str, MAI));
+
+  std::unique_ptr<MCTargetAsmParser> TAP(
+    TheTarget->createMCAsmParser(STI, *Parser, MCII, MCOptions));
+
+  if (!TAP) {
+    errs() << ProgName
+           << ": error: this target '" << TripleName
+           << "', does not support assembly parsing.\n";
+    abort();
+  }
+
+  Parser->setTargetParser(*TAP);
+
+  return Parser->Run(NoInitialTextSection);
+}
+
+
+int AssembleOneInput(const uint8_t *Data, size_t Size) {
+  const bool ShowInst = false;
+  const bool AsmVerbose = false;
+  const bool UseDwarfDirectory = true;
+
+  Triple TheTriple(Triple::normalize(TripleName));
+
+  SourceMgr SrcMgr;
+
+  std::unique_ptr<MemoryBuffer> BufferPtr(new LLVMFuzzerInputBuffer(Data, Size));
+
+  // Tell SrcMgr about this buffer, which is what the parser will pick up.
+  SrcMgr.AddNewSourceBuffer(std::move(BufferPtr), SMLoc());
+
+  static const std::vector<std::string> NoIncludeDirs;
+  SrcMgr.setIncludeDirs(NoIncludeDirs);
+
+  static std::string ArchName;
+  std::string Error;
+  const Target *TheTarget = TargetRegistry::lookupTarget(ArchName, TheTriple,
+      Error);
+  if (!TheTarget) {
+    errs() << "error: this target '" << TheTriple.normalize()
+      << "/" << ArchName << "', was not found: '" << Error << "'\n";
+
+    abort();
+  }
+
+  std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TripleName));
+  if (!MRI) {
+    errs() << "Unable to create target register info!";
+    abort();
+  }
+
+  std::unique_ptr<MCAsmInfo> MAI(TheTarget->createMCAsmInfo(*MRI, TripleName));
+  if (!MAI) {
+    errs() << "Unable to create target asm info!";
+    abort();
+  }
+
+
+  MCObjectFileInfo MOFI;
+  MCContext Ctx(MAI.get(), MRI.get(), &MOFI, &SrcMgr);
+
+  static const bool UsePIC = false;
+  static const CodeModel::Model CMModel = CodeModel::Default;
+  MOFI.InitMCObjectFileInfo(TheTriple, UsePIC, CMModel, Ctx);
+
+  const unsigned OutputAsmVariant = 0;
+  std::unique_ptr<MCInstrInfo> MCII(TheTarget->createMCInstrInfo());
+  MCInstPrinter *IP = TheTarget->createMCInstPrinter(Triple(TripleName), OutputAsmVariant,
+      *MAI, *MCII, *MRI);
+  if (!IP) {
+    errs()
+      << "error: unable to create instruction printer for target triple '"
+      << TheTriple.normalize() << "' with assembly variant "
+      << OutputAsmVariant << ".\n";
+
+    abort();
+  }
+
+  const char *ProgName = "llvm-mc-fuzzer";
+  std::unique_ptr<MCSubtargetInfo> STI(
+      TheTarget->createMCSubtargetInfo(TripleName, MCPU, FeaturesStr));
+  MCCodeEmitter *CE = nullptr;
+  MCAsmBackend *MAB = nullptr;
+
+  MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags();
+
+  std::string OutputString;
+  raw_string_ostream Out(OutputString);
+  auto FOut = llvm::make_unique<formatted_raw_ostream>(Out);
+
+  std::unique_ptr<MCStreamer> Str;
+
+  if (FileType == OFT_AssemblyFile) {
+    Str.reset(TheTarget->createAsmStreamer(
+        Ctx,  std::move(FOut), AsmVerbose,
+        UseDwarfDirectory, IP, CE, MAB, ShowInst));
+  } else {
+    assert(FileType == OFT_ObjectFile && "Invalid file type!");
+
+    std::error_code EC;
+    const std::string OutputFilename = "-";
+    auto Out = llvm::make_unique<tool_output_file>(OutputFilename, EC,
+                                                 sys::fs::F_None);
+    if (EC) {
+      errs() << EC.message() << '\n';
+      abort();
+    }
+
+    // Don't waste memory on names of temp labels.
+    Ctx.setUseNamesOnTempLabels(false);
+
+    std::unique_ptr<buffer_ostream> BOS;
+    raw_pwrite_stream *OS = &Out->os();
+    if (!Out->os().supportsSeeking()) {
+      BOS = make_unique<buffer_ostream>(Out->os());
+      OS = BOS.get();
+    }
+
+    MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx);
+    MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*MRI, TripleName, MCPU,
+                                                      MCOptions);
+    Str.reset(TheTarget->createMCObjectStreamer(
+        TheTriple, Ctx, *MAB, *OS, CE, *STI, MCOptions.MCRelaxAll,
+        MCOptions.MCIncrementalLinkerCompatible,
+        /*DWARFMustBeAtTheEnd*/ false));
+  }
+  const int Res = AssembleInput(ProgName, TheTarget, SrcMgr, Ctx, *Str, *MAI, *STI,
+      *MCII, MCOptions);
+
+  (void) Res;
+
+  return 0;
+}
+
+int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  return AssembleOneInput(Data, Size);
+}
+
+int LLVMFuzzerInitialize(int *argc, char ***argv) {
+  // The command line is unusual compared to other fuzzers due to the need to
+  // specify the target. Options like -triple, -mcpu, and -mattr work like
+  // their counterparts in llvm-mc, while -fuzzer-args collects options for the
+  // fuzzer itself.
+  //
+  // Examples:
+  //
+  // Fuzz the big-endian MIPS32R6 disassembler using 100,000 inputs of up to
+  // 4-bytes each and use the contents of ./corpus as the test corpus:
+  //   llvm-mc-fuzzer -triple mips-linux-gnu -mcpu=mips32r6 -disassemble \
+  //       -fuzzer-args -max_len=4 -runs=100000 ./corpus
+  //
+  // Infinitely fuzz the little-endian MIPS64R2 disassembler with the MSA
+  // feature enabled using up to 64-byte inputs:
+  //   llvm-mc-fuzzer -triple mipsel-linux-gnu -mcpu=mips64r2 -mattr=msa \
+  //       -disassemble -fuzzer-args ./corpus
+  //
+  // If your aim is to find instructions that are not tested, then it is
+  // advisable to constrain the maximum input size to a single instruction
+  // using -max_len as in the first example. This results in a test corpus of
+  // individual instructions that test unique paths. Without this constraint,
+  // there will be considerable redundancy in the corpus.
+
+  char **OriginalArgv = *argv;
+
+  LLVMInitializeAllTargetInfos();
+  LLVMInitializeAllTargetMCs();
+  LLVMInitializeAllAsmParsers();
+
+  cl::ParseCommandLineOptions(*argc, OriginalArgv);
+
+  // Rebuild the argv without the arguments llvm-mc-fuzzer consumed so that
+  // the driver can parse its arguments.
+  //
+  // FuzzerArgs cannot provide the non-const pointer that OriginalArgv needs.
+  // Re-use the strings from OriginalArgv instead of copying FuzzerArg to a
+  // non-const buffer to avoid the need to clean up when the fuzzer terminates.
+  ModifiedArgv.push_back(OriginalArgv[0]);
+  for (const auto &FuzzerArg : FuzzerArgs) {
+    for (int i = 1; i < *argc; ++i) {
+      if (FuzzerArg == OriginalArgv[i])
+        ModifiedArgv.push_back(OriginalArgv[i]);
+    }
+  }
+  *argc = ModifiedArgv.size();
+  *argv = ModifiedArgv.data();
+
+  // Package up features to be passed to target/subtarget
+  // We have to pass it via a global since the callback doesn't
+  // permit any user data.
+  if (MAttrs.size()) {
+    SubtargetFeatures Features;
+    for (unsigned i = 0; i != MAttrs.size(); ++i)
+      Features.AddFeature(MAttrs[i]);
+    FeaturesStr = Features.getString();
+  }
+
+  if (TripleName.empty())
+    TripleName = sys::getDefaultTargetTriple();
+
+  return 0;
+}

Added: llvm/trunk/tools/llvm-mc-disassemble-fuzzer/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mc-disassemble-fuzzer/CMakeLists.txt?rev=296323&view=auto
==============================================================================
--- llvm/trunk/tools/llvm-mc-disassemble-fuzzer/CMakeLists.txt (added)
+++ llvm/trunk/tools/llvm-mc-disassemble-fuzzer/CMakeLists.txt Mon Feb 27 00:22:17 2017
@@ -0,0 +1,21 @@
+if( LLVM_USE_SANITIZE_COVERAGE )
+  include_directories(BEFORE
+    ${CMAKE_CURRENT_SOURCE_DIR}/../../lib/Fuzzer)
+
+  set(LLVM_LINK_COMPONENTS
+      AllTargetsAsmPrinters
+      AllTargetsDescs
+      AllTargetsDisassemblers
+      AllTargetsInfos
+      MC
+      MCDisassembler
+      MCParser
+      Support
+      )
+  add_llvm_tool(llvm-mc-disassemble-fuzzer 
+                llvm-mc-disassemble-fuzzer.cpp)
+
+  target_link_libraries(llvm-mc-disassemble-fuzzer
+                        LLVMFuzzer
+                        )
+endif()

Added: llvm/trunk/tools/llvm-mc-disassemble-fuzzer/llvm-mc-disassemble-fuzzer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-mc-disassemble-fuzzer/llvm-mc-disassemble-fuzzer.cpp?rev=296323&view=auto
==============================================================================
--- llvm/trunk/tools/llvm-mc-disassemble-fuzzer/llvm-mc-disassemble-fuzzer.cpp (added)
+++ llvm/trunk/tools/llvm-mc-disassemble-fuzzer/llvm-mc-disassemble-fuzzer.cpp Mon Feb 27 00:22:17 2017
@@ -0,0 +1,143 @@
+//===--- llvm-mc-fuzzer.cpp - Fuzzer for the MC layer ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "FuzzerInterface.h"
+#include "llvm-c/Disassembler.h"
+#include "llvm-c/Target.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+const unsigned AssemblyTextBufSize = 80;
+
+static cl::opt<std::string>
+    TripleName("triple", cl::desc("Target triple to assemble for, "
+                                  "see -version for available targets"));
+
+static cl::opt<std::string>
+    MCPU("mcpu",
+         cl::desc("Target a specific cpu type (-mcpu=help for details)"),
+         cl::value_desc("cpu-name"), cl::init(""));
+
+// This is useful for variable-length instruction sets.
+static cl::opt<unsigned> InsnLimit(
+    "insn-limit",
+    cl::desc("Limit the number of instructions to process (0 for no limit)"),
+    cl::value_desc("count"), cl::init(0));
+
+static cl::list<std::string>
+    MAttrs("mattr", cl::CommaSeparated,
+           cl::desc("Target specific attributes (-mattr=help for details)"),
+           cl::value_desc("a1,+a2,-a3,..."));
+// The feature string derived from -mattr's values.
+std::string FeaturesStr;
+
+static cl::list<std::string>
+    FuzzerArgs("fuzzer-args", cl::Positional,
+               cl::desc("Options to pass to the fuzzer"), cl::ZeroOrMore,
+               cl::PositionalEatsArgs);
+static std::vector<char *> ModifiedArgv;
+
+int DisassembleOneInput(const uint8_t *Data, size_t Size) {
+  char AssemblyText[AssemblyTextBufSize];
+
+  std::vector<uint8_t> DataCopy(Data, Data + Size);
+
+  LLVMDisasmContextRef Ctx = LLVMCreateDisasmCPUFeatures(
+      TripleName.c_str(), MCPU.c_str(), FeaturesStr.c_str(), nullptr, 0,
+      nullptr, nullptr);
+  assert(Ctx);
+  uint8_t *p = DataCopy.data();
+  unsigned Consumed;
+  unsigned InstructionsProcessed = 0;
+  do {
+    Consumed = LLVMDisasmInstruction(Ctx, p, Size, 0, AssemblyText,
+                                     AssemblyTextBufSize);
+    Size -= Consumed;
+    p += Consumed;
+
+    InstructionsProcessed ++;
+    if (InsnLimit != 0 && InstructionsProcessed < InsnLimit)
+      break;
+  } while (Consumed != 0);
+  LLVMDisasmDispose(Ctx);
+  return 0;
+}
+
+int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  return DisassembleOneInput(Data, Size);
+}
+
+int LLVMFuzzerInitialize(int *argc, char ***argv) {
+  // The command line is unusual compared to other fuzzers due to the need to
+  // specify the target. Options like -triple, -mcpu, and -mattr work like
+  // their counterparts in llvm-mc, while -fuzzer-args collects options for the
+  // fuzzer itself.
+  //
+  // Examples:
+  //
+  // Fuzz the big-endian MIPS32R6 disassembler using 100,000 inputs of up to
+  // 4-bytes each and use the contents of ./corpus as the test corpus:
+  //   llvm-mc-fuzzer -triple mips-linux-gnu -mcpu=mips32r6 -disassemble \
+  //       -fuzzer-args -max_len=4 -runs=100000 ./corpus
+  //
+  // Infinitely fuzz the little-endian MIPS64R2 disassembler with the MSA
+  // feature enabled using up to 64-byte inputs:
+  //   llvm-mc-fuzzer -triple mipsel-linux-gnu -mcpu=mips64r2 -mattr=msa \
+  //       -disassemble -fuzzer-args ./corpus
+  //
+  // If your aim is to find instructions that are not tested, then it is
+  // advisable to constrain the maximum input size to a single instruction
+  // using -max_len as in the first example. This results in a test corpus of
+  // individual instructions that test unique paths. Without this constraint,
+  // there will be considerable redundancy in the corpus.
+
+  char **OriginalArgv = *argv;
+
+  LLVMInitializeAllTargetInfos();
+  LLVMInitializeAllTargetMCs();
+  LLVMInitializeAllDisassemblers();
+
+  cl::ParseCommandLineOptions(*argc, OriginalArgv);
+
+  // Rebuild the argv without the arguments llvm-mc-fuzzer consumed so that
+  // the driver can parse its arguments.
+  //
+  // FuzzerArgs cannot provide the non-const pointer that OriginalArgv needs.
+  // Re-use the strings from OriginalArgv instead of copying FuzzerArg to a
+  // non-const buffer to avoid the need to clean up when the fuzzer terminates.
+  ModifiedArgv.push_back(OriginalArgv[0]);
+  for (const auto &FuzzerArg : FuzzerArgs) {
+    for (int i = 1; i < *argc; ++i) {
+      if (FuzzerArg == OriginalArgv[i])
+        ModifiedArgv.push_back(OriginalArgv[i]);
+    }
+  }
+  *argc = ModifiedArgv.size();
+  *argv = ModifiedArgv.data();
+
+  // Package up features to be passed to target/subtarget
+  // We have to pass it via a global since the callback doesn't
+  // permit any user data.
+  if (MAttrs.size()) {
+    SubtargetFeatures Features;
+    for (unsigned i = 0; i != MAttrs.size(); ++i)
+      Features.AddFeature(MAttrs[i]);
+    FeaturesStr = Features.getString();
+  }
+
+  if (TripleName.empty())
+    TripleName = sys::getDefaultTargetTriple();
+
+  return 0;
+}




More information about the llvm-commits mailing list