[llvm-branch-commits] [llvm] [BOLT] Add binary introspection/JIT manager (PR #81346)

Rafael Auler via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Fri Feb 9 19:54:52 PST 2024


https://github.com/rafaelauler created https://github.com/llvm/llvm-project/pull/81346

Add a class that allows a process to introspect or investigate itself by disassembling its memory contents just-in-time with BOLT. An example is shown in a new unittest binary.

This leverages the new ability to use BOLT as a library instead of as a regular executable that processes input binaries, demonstrating how to use BOLT as a library.

>From 1167a59a185cbc113d6ca30f223e09be1abc8494 Mon Sep 17 00:00:00 2001
From: Rafael Auler <rafaelauler at fb.com>
Date: Thu, 8 Feb 2024 19:57:14 -0800
Subject: [PATCH] [BOLT] Add binary introspection/JIT manager

Add a class that allows a process to introspect or investigate itself
by disassembling its memory contents just-in-time with BOLT. An
example is shown in a new unittest binary.

This leverages the new ability to use BOLT as a library instead of as
a regular executable that processes input binaries, demonstrating how
to use BOLT as a library.
---
 bolt/include/bolt/Core/BinaryContext.h        |   9 +-
 bolt/include/bolt/Core/BinaryFunction.h       |   1 +
 bolt/include/bolt/Core/BinarySection.h        |   3 +-
 .../include/bolt/Rewrite/JITRewriteInstance.h | 105 +++++
 bolt/lib/Core/BinaryContext.cpp               |  39 +-
 bolt/lib/Rewrite/CMakeLists.txt               |   1 +
 bolt/lib/Rewrite/DWARFRewriter.cpp            |   2 +-
 bolt/lib/Rewrite/JITRewriteInstance.cpp       | 367 ++++++++++++++++++
 bolt/lib/Rewrite/MachORewriteInstance.cpp     |  35 +-
 bolt/lib/Rewrite/RewriteInstance.cpp          |  19 +-
 bolt/unittests/CMakeLists.txt                 |   1 +
 bolt/unittests/Core/BinaryContext.cpp         |   4 +-
 bolt/unittests/Core/MCPlusBuilder.cpp         |   4 +-
 bolt/unittests/Rewrite/CMakeLists.txt         |  27 ++
 bolt/unittests/Rewrite/JITRewriteInstance.cpp |  99 +++++
 15 files changed, 654 insertions(+), 62 deletions(-)
 create mode 100644 bolt/include/bolt/Rewrite/JITRewriteInstance.h
 create mode 100644 bolt/lib/Rewrite/JITRewriteInstance.cpp
 create mode 100644 bolt/unittests/Rewrite/CMakeLists.txt
 create mode 100644 bolt/unittests/Rewrite/JITRewriteInstance.cpp

diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index 30336c4e3a74fe..e5af8685d9b0d0 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -212,9 +212,6 @@ class BinaryContext {
   /// input file to internal section representation.
   DenseMap<SectionRef, BinarySection *> SectionRefToBinarySection;
 
-  /// Low level section registration.
-  BinarySection &registerSection(BinarySection *Section);
-
   /// Store all functions in the binary, sorted by original address.
   std::map<uint64_t, BinaryFunction> BinaryFunctions;
 
@@ -265,7 +262,8 @@ class BinaryContext {
 
 public:
   static Expected<std::unique_ptr<BinaryContext>>
-  createBinaryContext(const ObjectFile *File, bool IsPIC,
+  createBinaryContext(Triple TheTriple, StringRef InputFileName,
+                      SubtargetFeatures *Features, bool IsPIC,
                       std::unique_ptr<DWARFContext> DwCtx,
                       JournalingStreams Logger);
 
@@ -1049,6 +1047,9 @@ class BinaryContext {
   BinarySection &registerSection(const Twine &SectionName,
                                  const BinarySection &OriginalSection);
 
+  /// Low level section registration.
+  BinarySection &registerSection(BinarySection *Section);
+
   /// Register or update the information for the section with the given
   /// /p Name.  If the section already exists, the information in the
   /// section will be updated with the new data.
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index a177178769e456..68c626d9a2f452 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -655,6 +655,7 @@ class BinaryFunction {
   BinaryFunction(const BinaryFunction &) = delete;
 
   friend class MachORewriteInstance;
+  friend class JITRewriteInstance;
   friend class RewriteInstance;
   friend class BinaryContext;
   friend class DataReader;
diff --git a/bolt/include/bolt/Core/BinarySection.h b/bolt/include/bolt/Core/BinarySection.h
index a85dbf28950e31..d4f9b5955b2029 100644
--- a/bolt/include/bolt/Core/BinarySection.h
+++ b/bolt/include/bolt/Core/BinarySection.h
@@ -50,7 +50,7 @@ class BinarySection {
   std::string Name;            // Section name
   const SectionRef Section;    // SectionRef for input binary sections.
   StringRef Contents;          // Input section contents
-  const uint64_t Address;      // Address of section in input binary (may be 0)
+  uint64_t Address;            // Address of section in input binary (may be 0)
   const uint64_t Size;         // Input section size
   uint64_t InputFileOffset{0}; // Offset in the input binary
   unsigned Alignment;          // alignment in bytes (must be > 0)
@@ -461,6 +461,7 @@ class BinarySection {
   uint32_t getIndex() const { return Index; }
 
   // mutation
+  void setAddress(uint64_t Address) { this->Address = Address; }
   void setOutputAddress(uint64_t Address) { OutputAddress = Address; }
   void setOutputFileOffset(uint64_t Offset) { OutputFileOffset = Offset; }
   void setSectionID(StringRef ID) {
diff --git a/bolt/include/bolt/Rewrite/JITRewriteInstance.h b/bolt/include/bolt/Rewrite/JITRewriteInstance.h
new file mode 100644
index 00000000000000..9833845deaa3d6
--- /dev/null
+++ b/bolt/include/bolt/Rewrite/JITRewriteInstance.h
@@ -0,0 +1,105 @@
+//===- bolt/Rewrite/JITRewriteInstance.h - in-memory rewriter ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface to control BOLT as JIT library
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BOLT_JIT_REWRITE_REWRITE_INSTANCE_H
+#define BOLT_JIT_REWRITE_REWRITE_INSTANCE_H
+
+#include "bolt/Utils/NameResolver.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/StringSaver.h"
+#include <memory>
+
+namespace llvm {
+
+namespace object {
+class ObjectFile;
+}
+
+namespace bolt {
+
+class BinaryContext;
+class ProfileReaderBase;
+struct JournalingStreams;
+
+/// Allows a process to instrospect itself by running BOLT to disassemble its
+/// its own address space.
+class JITRewriteInstance {
+  std::unique_ptr<BinaryContext> BC;
+  NameResolver NR;
+  StringSaver StrPool;
+  BumpPtrAllocator StrAllocator;
+  std::unique_ptr<ProfileReaderBase> ProfileReader;
+
+  void adjustCommandLineOptions();
+  Error preprocessProfileData();
+  Error processProfileDataPreCFG();
+  Error processProfileData();
+  Error disassembleFunctions();
+  Error buildFunctionsCFG();
+  void postProcessFunctions();
+  JITRewriteInstance(JournalingStreams Logger, bool IsPIC, Error &Err);
+
+public:
+  /// Create BOLT data structures/interface to deal with disassembly. Logger
+  /// contains the streams used for BOLT to report events (regular or errors)
+  /// that might happen while BOLT is trying to reconstruct a function from
+  /// binary level.
+  static Expected<std::unique_ptr<JITRewriteInstance>>
+  createJITRewriteInstance(JournalingStreams Logger, bool IsPIC);
+  ~JITRewriteInstance();
+
+  /// This is the main entry point used to make BOLT aware of a fragment of
+  /// memory space in the process. The user might need to reconstruct the
+  /// original ELF type/flags, such as using SHT_PROGBITS to inform
+  /// this is allocatable region and flags SHF_ALLOC | SHF_EXECINSTR to
+  /// flag a section containing code.
+  void registerJITSection(StringRef Name, uint64_t Address, StringRef Data,
+                          unsigned Alignment, unsigned ELFType,
+                          unsigned ELFFlags);
+
+  /// Communicate to BOLT the boundaries of a function in a section of memory
+  /// previously registered with registerJITSection.
+  void registerJITFunction(StringRef Name, uintptr_t Addr, size_t Size);
+
+  /// In case the user is using LLVM as an in-process JIT, and the user has
+  /// access over the ObjectFile instance loaded in memory, instead of using
+  /// registerJITSection/registerJITFunction pair, the user can just forward
+  /// that object here and JITRewriteInstance will read this object and call
+  /// registerJITSection/registerJITFunction the appropriate number of times
+  /// to map this object to BOLT.
+  Error notifyObjectLoaded(const object::ObjectFile &Obj);
+
+  /// Mark all functions added so far as non-simple, so BOLT will skip them.
+  void disableAllFunctions();
+
+  /// Mark an specific function as simple, so BOLT will try to disassemble it.
+  void processFunctionContaining(uint64_t Address);
+
+  /// Supply a profile file for BOLT to attach edge counts to the disassembled
+  /// functions.
+  Error setProfile(StringRef FileName);
+
+  /// Run all the necessary steps to disassemble registered sections and
+  /// functions (process what we have so far).
+  Error run();
+
+  /// Print all BOLT's processed functions
+  void printAll(raw_ostream &OS);
+
+  /// Print a specific function processed by BOLT
+  void printFunctionContaining(raw_ostream &OS, uint64_t Address);
+};
+
+} // namespace bolt
+} // namespace llvm
+
+#endif
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index d544ece13a832f..9a2780e8f3cecc 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -162,28 +162,30 @@ BinaryContext::~BinaryContext() {
 
 /// Create BinaryContext for a given architecture \p ArchName and
 /// triple \p TripleName.
-Expected<std::unique_ptr<BinaryContext>>
-BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
-                                   std::unique_ptr<DWARFContext> DwCtx,
-                                   JournalingStreams Logger) {
+Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext(
+    Triple TheTriple, StringRef InputFileName, SubtargetFeatures *Features,
+    bool IsPIC, std::unique_ptr<DWARFContext> DwCtx, JournalingStreams Logger) {
   StringRef ArchName = "";
   std::string FeaturesStr = "";
-  switch (File->getArch()) {
+  switch (TheTriple.getArch()) {
   case llvm::Triple::x86_64:
+    if (Features)
+      return createFatalBOLTError(
+          "x86_64 target does not use SubtargetFeatures");
     ArchName = "x86-64";
     FeaturesStr = "+nopl";
     break;
   case llvm::Triple::aarch64:
+    if (Features)
+      return createFatalBOLTError(
+          "AArch64 target does not use SubtargetFeatures");
     ArchName = "aarch64";
     FeaturesStr = "+all";
     break;
   case llvm::Triple::riscv64: {
     ArchName = "riscv64";
-    Expected<SubtargetFeatures> Features = File->getFeatures();
-
-    if (auto E = Features.takeError())
-      return std::move(E);
-
+    if (!Features)
+      return createFatalBOLTError("RISCV target needs SubtargetFeatures");
     // We rely on relaxation for some transformations (e.g., promoting all calls
     // to PseudoCALL and then making JITLink relax them). Since the relax
     // feature is not stored in the object file, we manually enable it.
@@ -196,12 +198,11 @@ BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
                              "BOLT-ERROR: Unrecognized machine in ELF file");
   }
 
-  auto TheTriple = std::make_unique<Triple>(File->makeTriple());
-  const std::string TripleName = TheTriple->str();
+  const std::string TripleName = TheTriple.str();
 
   std::string Error;
   const Target *TheTarget =
-      TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error);
+      TargetRegistry::lookupTarget(std::string(ArchName), TheTriple, Error);
   if (!TheTarget)
     return createStringError(make_error_code(std::errc::not_supported),
                              Twine("BOLT-ERROR: ", Error));
@@ -240,13 +241,13 @@ BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
         Twine("BOLT-ERROR: no instruction info for target ", TripleName));
 
   std::unique_ptr<MCContext> Ctx(
-      new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
+      new MCContext(TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
   std::unique_ptr<MCObjectFileInfo> MOFI(
       TheTarget->createMCObjectFileInfo(*Ctx, IsPIC));
   Ctx->setObjectFileInfo(MOFI.get());
   // We do not support X86 Large code model. Change this in the future.
   bool Large = false;
-  if (TheTriple->getArch() == llvm::Triple::aarch64)
+  if (TheTriple.getArch() == llvm::Triple::aarch64)
     Large = true;
   unsigned LSDAEncoding =
       Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
@@ -273,7 +274,7 @@ BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
 
   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
   std::unique_ptr<MCInstPrinter> InstructionPrinter(
-      TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo,
+      TheTarget->createMCInstPrinter(TheTriple, AsmPrinterVariant, *AsmInfo,
                                      *MII, *MRI));
   if (!InstructionPrinter)
     return createStringError(
@@ -285,8 +286,8 @@ BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
       TheTarget->createMCCodeEmitter(*MII, *Ctx));
 
   auto BC = std::make_unique<BinaryContext>(
-      std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget,
-      std::string(TripleName), std::move(MCE), std::move(MOFI),
+      std::move(Ctx), std::move(DwCtx), std::make_unique<Triple>(TheTriple),
+      TheTarget, std::string(TripleName), std::move(MCE), std::move(MOFI),
       std::move(AsmInfo), std::move(MII), std::move(STI),
       std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI),
       std::move(DisAsm), Logger);
@@ -296,7 +297,7 @@ BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
   BC->MAB = std::unique_ptr<MCAsmBackend>(
       BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions()));
 
-  BC->setFilename(File->getFileName());
+  BC->setFilename(InputFileName);
 
   BC->HasFixedLoadAddress = !IsPIC;
 
diff --git a/bolt/lib/Rewrite/CMakeLists.txt b/bolt/lib/Rewrite/CMakeLists.txt
index 6890f52e2b28bb..0bb80fbeef277e 100644
--- a/bolt/lib/Rewrite/CMakeLists.txt
+++ b/bolt/lib/Rewrite/CMakeLists.txt
@@ -17,6 +17,7 @@ add_llvm_library(LLVMBOLTRewrite
   DWARFRewriter.cpp
   ExecutableFileMemoryManager.cpp
   JITLinkLinker.cpp
+  JITRewriteInstance.cpp
   LinuxKernelRewriter.cpp
   MachORewriteInstance.cpp
   MetadataManager.cpp
diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp
index 27fa937c7508c3..6fb9e9fafd0e38 100644
--- a/bolt/lib/Rewrite/DWARFRewriter.cpp
+++ b/bolt/lib/Rewrite/DWARFRewriter.cpp
@@ -1652,7 +1652,7 @@ namespace {
 std::unique_ptr<BinaryContext>
 createDwarfOnlyBC(const object::ObjectFile &File) {
   return cantFail(BinaryContext::createBinaryContext(
-      &File, false,
+      File.makeTriple(), File.getFileName(), nullptr, false,
       DWARFContext::create(File, DWARFContext::ProcessDebugRelocations::Ignore,
                            nullptr, "", WithColor::defaultErrorHandler,
                            WithColor::defaultWarningHandler),
diff --git a/bolt/lib/Rewrite/JITRewriteInstance.cpp b/bolt/lib/Rewrite/JITRewriteInstance.cpp
new file mode 100644
index 00000000000000..51537d261f0359
--- /dev/null
+++ b/bolt/lib/Rewrite/JITRewriteInstance.cpp
@@ -0,0 +1,367 @@
+//===- bolt/Rewrite/JITRewriteInstance.cpp - JIT rewriter -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "bolt/Rewrite/JITRewriteInstance.h"
+#include "bolt/Core/BinaryContext.h"
+#include "bolt/Core/BinaryEmitter.h"
+#include "bolt/Core/BinaryFunction.h"
+#include "bolt/Core/JumpTable.h"
+#include "bolt/Core/MCPlusBuilder.h"
+#include "bolt/Profile/DataAggregator.h"
+#include "bolt/Rewrite/BinaryPassManager.h"
+#include "bolt/Rewrite/RewriteInstance.h"
+#include "bolt/Utils/Utils.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/Object/SymbolSize.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/FileSystem.h"
+#include <memory>
+
+namespace opts {
+
+using namespace llvm;
+extern cl::opt<unsigned> AlignText;
+extern cl::opt<bool> PrintSections;
+extern cl::opt<bool> PrintDisasm;
+extern cl::opt<bool> PrintCFG;
+extern cl::opt<unsigned> Verbosity;
+} // namespace opts
+
+namespace llvm {
+namespace bolt {
+
+#define DEBUG_TYPE "bolt"
+
+Expected<std::unique_ptr<JITRewriteInstance>>
+JITRewriteInstance::createJITRewriteInstance(JournalingStreams Logger,
+                                             bool IsPIC) {
+  Error Err = Error::success();
+  std::unique_ptr<JITRewriteInstance> JITRI(
+      new JITRewriteInstance(Logger, IsPIC, Err));
+  if (Err)
+    return std::move(Err);
+  return std::move(JITRI);
+}
+
+JITRewriteInstance::JITRewriteInstance(JournalingStreams Logger, bool IsPIC,
+                                       Error &Err)
+    : StrPool(StrAllocator) {
+  ErrorAsOutParameter EAO(&Err);
+  Triple TheTriple(sys::getDefaultTargetTriple().c_str());
+
+  auto BCOrErr = BinaryContext::createBinaryContext(
+      TheTriple, StringRef("JIT input file"), nullptr, IsPIC, nullptr, Logger);
+  if (Error E = BCOrErr.takeError()) {
+    Err = std::move(E);
+    return;
+  }
+  BC = std::move(BCOrErr.get());
+  BC->initializeTarget(std::unique_ptr<MCPlusBuilder>(
+      createMCPlusBuilder(BC->TheTriple->getArch(), BC->MIA.get(),
+                          BC->MII.get(), BC->MRI.get(), BC->STI.get())));
+  BC->FirstAllocAddress = 0;
+  BC->LayoutStartAddress = 0xffffffffffffffff;
+}
+
+JITRewriteInstance::~JITRewriteInstance() {}
+
+void JITRewriteInstance::adjustCommandLineOptions() {
+  if (!opts::AlignText.getNumOccurrences())
+    opts::AlignText = BC->PageAlign;
+}
+
+Error JITRewriteInstance::preprocessProfileData() {
+  if (!ProfileReader)
+    return Error::success();
+  if (Error E = ProfileReader->preprocessProfile(*BC.get()))
+    return Error(std::move(E));
+  return Error::success();
+}
+
+Error JITRewriteInstance::processProfileDataPreCFG() {
+  if (!ProfileReader)
+    return Error::success();
+  if (Error E = ProfileReader->readProfilePreCFG(*BC.get()))
+    return Error(std::move(E));
+  return Error::success();
+}
+
+Error JITRewriteInstance::processProfileData() {
+  if (!ProfileReader)
+    return Error::success();
+  if (Error E = ProfileReader->readProfile(*BC.get()))
+    return Error(std::move(E));
+  return Error::success();
+}
+
+Error JITRewriteInstance::disassembleFunctions() {
+  for (auto &BFI : BC->getBinaryFunctions()) {
+    BinaryFunction &Function = BFI.second;
+    if (!Function.isSimple())
+      continue;
+    if (Error E = Function.disassemble())
+      return Error(std::move(E));
+    if (opts::PrintDisasm)
+      Function.print(BC->outs(), "after disassembly");
+  }
+  return Error::success();
+}
+
+Error JITRewriteInstance::buildFunctionsCFG() {
+  for (auto &BFI : BC->getBinaryFunctions()) {
+    BinaryFunction &Function = BFI.second;
+    if (!Function.isSimple())
+      continue;
+    if (auto NewE = handleErrors(
+            Function.buildCFG(/*AllocId*/ 0), [&](const BOLTError &E) -> Error {
+              if (E.isFatal())
+                return Error(std::make_unique<BOLTError>(std::move(E)));
+              if (!E.getMessage().empty())
+                E.log(BC->errs());
+              return Error::success();
+            })) {
+      return Error(std::move(NewE));
+    }
+  }
+  return Error::success();
+}
+
+void JITRewriteInstance::postProcessFunctions() {
+  for (auto &BFI : BC->getBinaryFunctions()) {
+    BinaryFunction &Function = BFI.second;
+    if (Function.empty() || !Function.isSimple())
+      continue;
+    Function.postProcessCFG();
+    if (opts::PrintCFG)
+      Function.print(outs(), "after building cfg");
+  }
+}
+
+void JITRewriteInstance::registerJITSection(StringRef Name, uint64_t Address,
+                                            StringRef Data, unsigned Alignment,
+                                            unsigned ELFType,
+                                            unsigned ELFFlags) {
+  auto *Sec =
+      new BinarySection(*BC, Name, const_cast<uint8_t *>(Data.bytes_begin()),
+                        Data.size(), Alignment, ELFType, ELFFlags);
+  Sec->setAddress(Address);
+  BC->registerSection(Sec);
+}
+
+void JITRewriteInstance::registerJITFunction(StringRef Name, uintptr_t Addr,
+                                             size_t Size) {
+  if (ErrorOr<BinarySection &> Sec = BC->getSectionForAddress(Addr))
+    BC->createBinaryFunction(Name.str(), *Sec, Addr, Size);
+}
+
+Error JITRewriteInstance::notifyObjectLoaded(const object::ObjectFile &Obj) {
+  for (const object::SectionRef &Section : Obj.sections()) {
+    Expected<StringRef> SectionName = Section.getName();
+    if (Error E = SectionName.takeError())
+      return Error(std::move(E));
+    // Only register sections with names.
+    if (SectionName->empty())
+      continue;
+
+    StringRef UniqueSectionName = StrPool.save(NR.uniquify(*SectionName));
+    unsigned ELFType = ELFSectionRef(Section).getType();
+    unsigned ELFFlags = ELFSectionRef(Section).getFlags();
+    if (ELFType == ELF::SHT_NOBITS)
+      continue;
+
+    const uint64_t Address = Section.getAddress();
+    const uint64_t Size = Section.getSize();
+    StringRef Contents =
+        StringRef(reinterpret_cast<const char *>(Address), Size);
+    if (Contents.empty())
+      continue;
+
+    this->registerJITSection(UniqueSectionName, Section.getAddress(), Contents,
+                             Section.getAlignment().value(), ELFType, ELFFlags);
+    LLVM_DEBUG(
+        dbgs() << "BOLT-DEBUG: registering section " << *SectionName << " @ 0x"
+               << Twine::utohexstr(Section.getAddress()) << ":0x"
+               << Twine::utohexstr(Section.getAddress() + Section.getSize())
+               << "\n");
+  }
+
+  if (opts::PrintSections) {
+    BC->outs() << "BOLT-INFO: Sections from original binary:\n";
+    BC->printSections(BC->outs());
+  }
+
+  std::vector<SymbolRef> FunctionSymbols;
+  for (const SymbolRef &S : Obj.symbols()) {
+    auto TypeOrErr = S.getType();
+    if (Error E = TypeOrErr.takeError())
+      return Error(std::move(E));
+    SymbolRef::Type Type = *TypeOrErr;
+    if (Type == SymbolRef::ST_Function)
+      FunctionSymbols.push_back(S);
+  }
+
+  if (FunctionSymbols.empty())
+    return Error::success();
+
+  Error SortErrors = Error::success();
+  llvm::stable_sort(FunctionSymbols, [&](const SymbolRef &LHS,
+                                         const SymbolRef &RHS) {
+    auto LHSAddrOrErr = LHS.getAddress();
+    auto RHSAddrOrErr = RHS.getAddress();
+    if (auto E =
+            joinErrors(LHSAddrOrErr.takeError(), RHSAddrOrErr.takeError())) {
+      SortErrors = joinErrors(std::move(SortErrors), std::move(E));
+      return false;
+    }
+    return *LHSAddrOrErr < *RHSAddrOrErr;
+  });
+  if (SortErrors)
+    return Error(std::move(SortErrors));
+
+  for (size_t Index = 0; Index < FunctionSymbols.size(); ++Index) {
+    auto AddrOrErr = FunctionSymbols[Index].getAddress();
+    if (auto E = AddrOrErr.takeError())
+      return Error(std::move(E));
+
+    const uint64_t Address = *AddrOrErr;
+    ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address);
+    if (!Section)
+      continue;
+
+    auto NameOrErr = FunctionSymbols[Index].getName();
+    auto FlagsOrErr = FunctionSymbols[Index].getFlags();
+    auto SecOrErr = FunctionSymbols[Index].getSection();
+    if (auto E = joinErrors(
+            joinErrors(NameOrErr.takeError(), FlagsOrErr.takeError()),
+            SecOrErr.takeError()))
+      return Error(std::move(E));
+    std::string SymbolName = NameOrErr->str();
+    // Uniquify names of local symbols.
+    if (!(*FlagsOrErr & SymbolRef::SF_Global))
+      SymbolName = NR.uniquify(SymbolName);
+
+    section_iterator S = *SecOrErr;
+    uint64_t EndAddress = S->getAddress() + S->getSize();
+
+    size_t NFIndex = Index + 1;
+    // Skip aliases.
+    auto NextAddrOrErr = FunctionSymbols[NFIndex].getAddress();
+    if (auto E = NextAddrOrErr.takeError())
+      return Error(std::move(E));
+    uint64_t NextAddr = *NextAddrOrErr;
+    while (NFIndex < FunctionSymbols.size() && NextAddr == Address) {
+      ++NFIndex;
+      auto NFAddrOrErr = FunctionSymbols[NFIndex].getAddress();
+      if (auto E = NFAddrOrErr.takeError())
+        return Error(std::move(E));
+      NextAddr = *NFAddrOrErr;
+    }
+
+    auto NFSecOrErr = FunctionSymbols[NFIndex].getSection();
+    if (auto E = NFSecOrErr.takeError())
+      return Error(std::move(E));
+    if (NFIndex < FunctionSymbols.size() && S == *NFSecOrErr) {
+      auto EndAddressOrErr = FunctionSymbols[NFIndex].getAddress();
+      if (auto E = EndAddressOrErr.takeError())
+        return Error(std::move(E));
+      EndAddress = *EndAddressOrErr;
+    }
+
+    const uint64_t SymbolSize = EndAddress - Address;
+    const auto It = BC->getBinaryFunctions().find(Address);
+    if (It == BC->getBinaryFunctions().end()) {
+      LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating binary function for "
+                        << SymbolName << "\n");
+      BC->createBinaryFunction(std::move(SymbolName), *Section, Address,
+                               SymbolSize);
+    } else {
+      It->second.addAlternativeName(std::move(SymbolName));
+    }
+  }
+
+  for (auto &BFI : BC->getBinaryFunctions()) {
+    BinaryFunction &Function = BFI.second;
+    Function.setMaxSize(Function.getSize());
+
+    ErrorOr<ArrayRef<uint8_t>> FunctionData = Function.getData();
+    if (!FunctionData) {
+      BC->errs() << "BOLT-ERROR: corresponding section is non-executable or "
+                 << "empty for function " << Function << '\n';
+      continue;
+    }
+
+    if (Function.getSize() == 0)
+      Function.setSimple(false);
+  }
+
+  return Error::success();
+}
+
+void JITRewriteInstance::disableAllFunctions() {
+  for (auto &BFI : BC->getBinaryFunctions()) {
+    BinaryFunction &Function = BFI.second;
+    Function.setSimple(false);
+  }
+}
+
+void JITRewriteInstance::processFunctionContaining(uint64_t Address) {
+  if (BinaryFunction *Func = BC->getBinaryFunctionContainingAddress(Address))
+    Func->setSimple(true);
+}
+
+Error JITRewriteInstance::setProfile(StringRef Filename) {
+  if (!sys::fs::exists(Filename))
+    return errorCodeToError(make_error_code(errc::no_such_file_or_directory));
+
+  ProfileReader = std::make_unique<DataAggregator>(Filename);
+  return Error::success();
+}
+
+Error JITRewriteInstance::run() {
+  adjustCommandLineOptions();
+
+  if (Error E = preprocessProfileData())
+    return Error(std::move(E));
+
+  if (Error E = disassembleFunctions())
+    return Error(std::move(E));
+
+  if (Error E = processProfileDataPreCFG())
+    return Error(std::move(E));
+
+  if (Error E = buildFunctionsCFG())
+    return Error(std::move(E));
+
+  if (Error E = processProfileData())
+    return Error(std::move(E));
+
+  postProcessFunctions();
+
+  return Error::success();
+}
+
+void JITRewriteInstance::printAll(raw_ostream &OS) {
+  for (auto &BFI : BC->getBinaryFunctions()) {
+    BinaryFunction &Function = BFI.second;
+    if (Function.empty())
+      continue;
+    Function.print(OS, "after building cfg");
+  }
+}
+
+void JITRewriteInstance::printFunctionContaining(raw_ostream &OS,
+                                                 uint64_t Address) {
+  if (BinaryFunction *Func = BC->getBinaryFunctionContainingAddress(Address)) {
+    OS << formatv("Printing function containg address {0:x}\n", Address);
+    Func->print(OS, "JIT on-demand inspection");
+  }
+}
+
+} // namespace bolt
+} // namespace llvm
diff --git a/bolt/lib/Rewrite/MachORewriteInstance.cpp b/bolt/lib/Rewrite/MachORewriteInstance.cpp
index 0970a0507ebe88..172cb640bf911a 100644
--- a/bolt/lib/Rewrite/MachORewriteInstance.cpp
+++ b/bolt/lib/Rewrite/MachORewriteInstance.cpp
@@ -18,6 +18,7 @@
 #include "bolt/Rewrite/BinaryPassManager.h"
 #include "bolt/Rewrite/ExecutableFileMemoryManager.h"
 #include "bolt/Rewrite/JITLinkLinker.h"
+#include "bolt/Rewrite/RewriteInstance.h"
 #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h"
 #include "bolt/Utils/Utils.h"
 #include "llvm/MC/MCObjectStreamer.h"
@@ -54,37 +55,6 @@ extern cl::opt<unsigned> Verbosity;
 namespace llvm {
 namespace bolt {
 
-extern MCPlusBuilder *createX86MCPlusBuilder(const MCInstrAnalysis *,
-                                             const MCInstrInfo *,
-                                             const MCRegisterInfo *,
-                                             const MCSubtargetInfo *);
-extern MCPlusBuilder *createAArch64MCPlusBuilder(const MCInstrAnalysis *,
-                                                 const MCInstrInfo *,
-                                                 const MCRegisterInfo *,
-                                                 const MCSubtargetInfo *);
-
-namespace {
-
-MCPlusBuilder *createMCPlusBuilder(const Triple::ArchType Arch,
-                                   const MCInstrAnalysis *Analysis,
-                                   const MCInstrInfo *Info,
-                                   const MCRegisterInfo *RegInfo,
-                                   const MCSubtargetInfo *STI) {
-#ifdef X86_AVAILABLE
-  if (Arch == Triple::x86_64)
-    return createX86MCPlusBuilder(Analysis, Info, RegInfo, STI);
-#endif
-
-#ifdef AARCH64_AVAILABLE
-  if (Arch == Triple::aarch64)
-    return createAArch64MCPlusBuilder(Analysis, Info, RegInfo, STI);
-#endif
-
-  llvm_unreachable("architecture unsupported by MCPlusBuilder");
-}
-
-} // anonymous namespace
-
 #define DEBUG_TYPE "bolt"
 
 Expected<std::unique_ptr<MachORewriteInstance>>
@@ -103,7 +73,8 @@ MachORewriteInstance::MachORewriteInstance(object::MachOObjectFile *InputFile,
     : InputFile(InputFile), ToolPath(ToolPath) {
   ErrorAsOutParameter EAO(&Err);
   auto BCOrErr = BinaryContext::createBinaryContext(
-      InputFile, /* IsPIC */ true, DWARFContext::create(*InputFile),
+      InputFile->makeTriple(), InputFile->getFileName(), nullptr,
+      /* IsPIC */ true, DWARFContext::create(*InputFile),
       {llvm::outs(), llvm::errs()});
   if (Error E = BCOrErr.takeError()) {
     Err = std::move(E);
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index db093bfc2d8b78..2fdf7a89d901a7 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -269,6 +269,10 @@ namespace bolt {
 
 extern const char *BoltRevision;
 
+// Weird location for createMCPlusBuilder, but this is here to avoid a
+// cyclic dependency of libCore (its natural place) and libTarget. libRewrite
+// can depend on libTarget, but not libCore. Since libRewrite is the only
+// user of this function, we define it here.
 MCPlusBuilder *createMCPlusBuilder(const Triple::ArchType Arch,
                                    const MCInstrAnalysis *Analysis,
                                    const MCInstrInfo *Info,
@@ -346,8 +350,21 @@ RewriteInstance::RewriteInstance(ELFObjectFileBase *File, const int Argc,
   Stderr.SetUnbuffered();
   LLVM_DEBUG(dbgs().SetUnbuffered());
 
+  // Read RISCV subtarget features from input file
+  std::unique_ptr<SubtargetFeatures> Features;
+  Triple TheTriple = File->makeTriple();
+  if (TheTriple.getArch() == llvm::Triple::riscv64) {
+    Expected<SubtargetFeatures> FeaturesOrErr = File->getFeatures();
+    if (auto E = FeaturesOrErr.takeError()) {
+      Err = std::move(E);
+      return;
+    } else {
+      Features.reset(new SubtargetFeatures(*FeaturesOrErr));
+    }
+  }
+
   auto BCOrErr = BinaryContext::createBinaryContext(
-      File, IsPIC,
+      TheTriple, File->getFileName(), Features.get(), IsPIC,
       DWARFContext::create(*File, DWARFContext::ProcessDebugRelocations::Ignore,
                            nullptr, opts::DWPPathName,
                            WithColor::defaultErrorHandler,
diff --git a/bolt/unittests/CMakeLists.txt b/bolt/unittests/CMakeLists.txt
index 77159e92dec557..de874476d2fc6a 100644
--- a/bolt/unittests/CMakeLists.txt
+++ b/bolt/unittests/CMakeLists.txt
@@ -7,3 +7,4 @@ endfunction()
 
 add_subdirectory(Core)
 add_subdirectory(Profile)
+add_subdirectory(Rewrite)
diff --git a/bolt/unittests/Core/BinaryContext.cpp b/bolt/unittests/Core/BinaryContext.cpp
index 1fbb07bca966a7..19f0b22486583d 100644
--- a/bolt/unittests/Core/BinaryContext.cpp
+++ b/bolt/unittests/Core/BinaryContext.cpp
@@ -40,8 +40,8 @@ struct BinaryContextTester : public testing::TestWithParam<Triple::ArchType> {
 
   void initializeBOLT() {
     BC = cantFail(BinaryContext::createBinaryContext(
-        ObjFile.get(), true, DWARFContext::create(*ObjFile.get()),
-        {llvm::outs(), llvm::errs()}));
+        ObjFile->makeTriple(), ObjFile->getFileName(), nullptr, true,
+        DWARFContext::create(*ObjFile.get()), {llvm::outs(), llvm::errs()}));
     ASSERT_FALSE(!BC);
   }
 
diff --git a/bolt/unittests/Core/MCPlusBuilder.cpp b/bolt/unittests/Core/MCPlusBuilder.cpp
index 63448039c53e67..240319aa71b866 100644
--- a/bolt/unittests/Core/MCPlusBuilder.cpp
+++ b/bolt/unittests/Core/MCPlusBuilder.cpp
@@ -50,8 +50,8 @@ struct MCPlusBuilderTester : public testing::TestWithParam<Triple::ArchType> {
 
   void initializeBolt() {
     BC = cantFail(BinaryContext::createBinaryContext(
-        ObjFile.get(), true, DWARFContext::create(*ObjFile.get()),
-        {llvm::outs(), llvm::errs()}));
+        ObjFile->makeTriple(), ObjFile->getFileName(), nullptr, true,
+        DWARFContext::create(*ObjFile.get()), {llvm::outs(), llvm::errs()}));
     ASSERT_FALSE(!BC);
     BC->initializeTarget(std::unique_ptr<MCPlusBuilder>(
         createMCPlusBuilder(GetParam(), BC->MIA.get(), BC->MII.get(),
diff --git a/bolt/unittests/Rewrite/CMakeLists.txt b/bolt/unittests/Rewrite/CMakeLists.txt
new file mode 100644
index 00000000000000..89da98fb5f4cdd
--- /dev/null
+++ b/bolt/unittests/Rewrite/CMakeLists.txt
@@ -0,0 +1,27 @@
+set(LLVM_LINK_COMPONENTS
+  DebugInfoDWARF
+  Object
+  MC
+  ${LLVM_TARGETS_TO_BUILD}
+  )
+
+add_bolt_unittest(BOLTRewriteTests
+  JITRewriteInstance.cpp
+
+  DISABLE_LLVM_LINK_LLVM_DYLIB
+  )
+
+target_link_libraries(BOLTRewriteTests
+  PRIVATE
+  LLVMBOLTCore
+  LLVMBOLTRewrite
+  )
+
+foreach (tgt ${BOLT_TARGETS_TO_BUILD})
+  include_directories(
+    ${LLVM_MAIN_SRC_DIR}/lib/Target/${tgt}
+    ${LLVM_BINARY_DIR}/lib/Target/${tgt}
+  )
+  string(TOUPPER "${tgt}" upper)
+  target_compile_definitions(BOLTRewriteTests PRIVATE "${upper}_AVAILABLE")
+endforeach()
diff --git a/bolt/unittests/Rewrite/JITRewriteInstance.cpp b/bolt/unittests/Rewrite/JITRewriteInstance.cpp
new file mode 100644
index 00000000000000..185b770769c3c9
--- /dev/null
+++ b/bolt/unittests/Rewrite/JITRewriteInstance.cpp
@@ -0,0 +1,99 @@
+#include "bolt/Rewrite/JITRewriteInstance.h"
+#include "bolt/Core/BinaryContext.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/Support/TargetSelect.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::ELF;
+using namespace bolt;
+
+namespace {
+struct JITRewriteInstanceTester
+    : public testing::TestWithParam<Triple::ArchType> {
+  void SetUp() override {
+    initalizeLLVM();
+    initializeBOLT();
+  }
+
+protected:
+  void initalizeLLVM() {
+    llvm::InitializeAllTargetInfos();
+    llvm::InitializeAllTargetMCs();
+    llvm::InitializeAllAsmParsers();
+    llvm::InitializeAllDisassemblers();
+    llvm::InitializeAllTargets();
+    llvm::InitializeAllAsmPrinters();
+  }
+
+  void initializeBOLT() {
+    BOLTJIT = cantFail(bolt::JITRewriteInstance::createJITRewriteInstance(
+        {llvm::outs(), llvm::errs()}, /*IsPIC*/ false));
+    ASSERT_FALSE(!BOLTJIT);
+  }
+
+  std::unique_ptr<JITRewriteInstance> BOLTJIT;
+};
+} // namespace
+
+#ifdef X86_AVAILABLE
+
+// clang-format off
+extern "C" __attribute((naked)) int fib(int n)
+{
+  __asm__ __volatile__(
+    "pushq   %%r14\n"
+    "pushq   %%rbx\n"
+    "pushq   %%rax\n"
+    "movl    %%edi, %%r14d\n"
+    "xorl    %%ebx, %%ebx\n"
+    "cmpl    $0x2, %%edi\n"
+    "jge     .Ltmp0\n"
+    "movl    %%r14d, %%ecx\n"
+    "jmp     .Ltmp1\n"
+    ".Ltmp0:\n"
+    "xorl    %%ebx, %%ebx\n"
+    "nopw    %%cs:(%%rax,%%rax)\n"
+    ".Ltmp2:\n"
+    "leal    -0x1(%%r14), %%edi\n"
+    "callq   fib\n"
+    "leal    -0x2(%%r14), %%ecx\n"
+    "addl    %%eax, %%ebx\n"
+    "cmpl    $0x3, %%r14d\n"
+    "movl    %%ecx, %%r14d\n"
+    "ja      .Ltmp2\n"
+    ".Ltmp1:\n"
+    "addl    %%ecx, %%ebx\n"
+    "movl    %%ebx, %%eax\n"
+    "addq    $0x8, %%rsp\n"
+    "popq    %%rbx\n"
+    "popq    %%r14\n"
+    "retq\n"
+    :::);
+}
+// clang-format on
+
+INSTANTIATE_TEST_SUITE_P(X86, JITRewriteInstanceTester,
+                         ::testing::Values(Triple::x86_64));
+
+TEST_P(JITRewriteInstanceTester, DisassembleFib) {
+  EXPECT_EQ(fib(7), 13);
+
+  // BOLT JIT test/example
+  // Analyze fib function in this binary
+  // Disassemble 63 bytes
+  uint64_t Address = reinterpret_cast<uint64_t>(&fib);
+  StringRef Data = StringRef(reinterpret_cast<const char *>(&fib), 63);
+
+  BOLTJIT->registerJITSection(StringRef(".text.example"), Address, Data, 1,
+                              ELF::SHT_PROGBITS,
+                              ELF::SHF_ALLOC | ELF::SHF_EXECINSTR);
+  BOLTJIT->registerJITFunction(StringRef("fib"), Address, 63);
+  ASSERT_FALSE(BOLTJIT->run());
+
+  // Print to screen
+  BOLTJIT->printAll(outs());
+}
+
+#endif



More information about the llvm-branch-commits mailing list