[llvm-branch-commits] [llvm] [BOLT] Add binary introspection/JIT manager (PR #81346)
Rafael Auler via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Feb 9 19:54:52 PST 2024
https://github.com/rafaelauler created https://github.com/llvm/llvm-project/pull/81346
Add a class that allows a process to introspect or investigate itself by disassembling its memory contents just-in-time with BOLT. An example is shown in a new unittest binary.
This leverages the new ability to use BOLT as a library instead of as a regular executable that processes input binaries, demonstrating how to use BOLT as a library.
>From 1167a59a185cbc113d6ca30f223e09be1abc8494 Mon Sep 17 00:00:00 2001
From: Rafael Auler <rafaelauler at fb.com>
Date: Thu, 8 Feb 2024 19:57:14 -0800
Subject: [PATCH] [BOLT] Add binary introspection/JIT manager
Add a class that allows a process to introspect or investigate itself
by disassembling its memory contents just-in-time with BOLT. An
example is shown in a new unittest binary.
This leverages the new ability to use BOLT as a library instead of as
a regular executable that processes input binaries, demonstrating how
to use BOLT as a library.
---
bolt/include/bolt/Core/BinaryContext.h | 9 +-
bolt/include/bolt/Core/BinaryFunction.h | 1 +
bolt/include/bolt/Core/BinarySection.h | 3 +-
.../include/bolt/Rewrite/JITRewriteInstance.h | 105 +++++
bolt/lib/Core/BinaryContext.cpp | 39 +-
bolt/lib/Rewrite/CMakeLists.txt | 1 +
bolt/lib/Rewrite/DWARFRewriter.cpp | 2 +-
bolt/lib/Rewrite/JITRewriteInstance.cpp | 367 ++++++++++++++++++
bolt/lib/Rewrite/MachORewriteInstance.cpp | 35 +-
bolt/lib/Rewrite/RewriteInstance.cpp | 19 +-
bolt/unittests/CMakeLists.txt | 1 +
bolt/unittests/Core/BinaryContext.cpp | 4 +-
bolt/unittests/Core/MCPlusBuilder.cpp | 4 +-
bolt/unittests/Rewrite/CMakeLists.txt | 27 ++
bolt/unittests/Rewrite/JITRewriteInstance.cpp | 99 +++++
15 files changed, 654 insertions(+), 62 deletions(-)
create mode 100644 bolt/include/bolt/Rewrite/JITRewriteInstance.h
create mode 100644 bolt/lib/Rewrite/JITRewriteInstance.cpp
create mode 100644 bolt/unittests/Rewrite/CMakeLists.txt
create mode 100644 bolt/unittests/Rewrite/JITRewriteInstance.cpp
diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index 30336c4e3a74fe..e5af8685d9b0d0 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -212,9 +212,6 @@ class BinaryContext {
/// input file to internal section representation.
DenseMap<SectionRef, BinarySection *> SectionRefToBinarySection;
- /// Low level section registration.
- BinarySection ®isterSection(BinarySection *Section);
-
/// Store all functions in the binary, sorted by original address.
std::map<uint64_t, BinaryFunction> BinaryFunctions;
@@ -265,7 +262,8 @@ class BinaryContext {
public:
static Expected<std::unique_ptr<BinaryContext>>
- createBinaryContext(const ObjectFile *File, bool IsPIC,
+ createBinaryContext(Triple TheTriple, StringRef InputFileName,
+ SubtargetFeatures *Features, bool IsPIC,
std::unique_ptr<DWARFContext> DwCtx,
JournalingStreams Logger);
@@ -1049,6 +1047,9 @@ class BinaryContext {
BinarySection ®isterSection(const Twine &SectionName,
const BinarySection &OriginalSection);
+ /// Low level section registration.
+ BinarySection ®isterSection(BinarySection *Section);
+
/// Register or update the information for the section with the given
/// /p Name. If the section already exists, the information in the
/// section will be updated with the new data.
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index a177178769e456..68c626d9a2f452 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -655,6 +655,7 @@ class BinaryFunction {
BinaryFunction(const BinaryFunction &) = delete;
friend class MachORewriteInstance;
+ friend class JITRewriteInstance;
friend class RewriteInstance;
friend class BinaryContext;
friend class DataReader;
diff --git a/bolt/include/bolt/Core/BinarySection.h b/bolt/include/bolt/Core/BinarySection.h
index a85dbf28950e31..d4f9b5955b2029 100644
--- a/bolt/include/bolt/Core/BinarySection.h
+++ b/bolt/include/bolt/Core/BinarySection.h
@@ -50,7 +50,7 @@ class BinarySection {
std::string Name; // Section name
const SectionRef Section; // SectionRef for input binary sections.
StringRef Contents; // Input section contents
- const uint64_t Address; // Address of section in input binary (may be 0)
+ uint64_t Address; // Address of section in input binary (may be 0)
const uint64_t Size; // Input section size
uint64_t InputFileOffset{0}; // Offset in the input binary
unsigned Alignment; // alignment in bytes (must be > 0)
@@ -461,6 +461,7 @@ class BinarySection {
uint32_t getIndex() const { return Index; }
// mutation
+ void setAddress(uint64_t Address) { this->Address = Address; }
void setOutputAddress(uint64_t Address) { OutputAddress = Address; }
void setOutputFileOffset(uint64_t Offset) { OutputFileOffset = Offset; }
void setSectionID(StringRef ID) {
diff --git a/bolt/include/bolt/Rewrite/JITRewriteInstance.h b/bolt/include/bolt/Rewrite/JITRewriteInstance.h
new file mode 100644
index 00000000000000..9833845deaa3d6
--- /dev/null
+++ b/bolt/include/bolt/Rewrite/JITRewriteInstance.h
@@ -0,0 +1,105 @@
+//===- bolt/Rewrite/JITRewriteInstance.h - in-memory rewriter ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface to control BOLT as JIT library
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BOLT_JIT_REWRITE_REWRITE_INSTANCE_H
+#define BOLT_JIT_REWRITE_REWRITE_INSTANCE_H
+
+#include "bolt/Utils/NameResolver.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/StringSaver.h"
+#include <memory>
+
+namespace llvm {
+
+namespace object {
+class ObjectFile;
+}
+
+namespace bolt {
+
+class BinaryContext;
+class ProfileReaderBase;
+struct JournalingStreams;
+
+/// Allows a process to instrospect itself by running BOLT to disassemble its
+/// its own address space.
+class JITRewriteInstance {
+ std::unique_ptr<BinaryContext> BC;
+ NameResolver NR;
+ StringSaver StrPool;
+ BumpPtrAllocator StrAllocator;
+ std::unique_ptr<ProfileReaderBase> ProfileReader;
+
+ void adjustCommandLineOptions();
+ Error preprocessProfileData();
+ Error processProfileDataPreCFG();
+ Error processProfileData();
+ Error disassembleFunctions();
+ Error buildFunctionsCFG();
+ void postProcessFunctions();
+ JITRewriteInstance(JournalingStreams Logger, bool IsPIC, Error &Err);
+
+public:
+ /// Create BOLT data structures/interface to deal with disassembly. Logger
+ /// contains the streams used for BOLT to report events (regular or errors)
+ /// that might happen while BOLT is trying to reconstruct a function from
+ /// binary level.
+ static Expected<std::unique_ptr<JITRewriteInstance>>
+ createJITRewriteInstance(JournalingStreams Logger, bool IsPIC);
+ ~JITRewriteInstance();
+
+ /// This is the main entry point used to make BOLT aware of a fragment of
+ /// memory space in the process. The user might need to reconstruct the
+ /// original ELF type/flags, such as using SHT_PROGBITS to inform
+ /// this is allocatable region and flags SHF_ALLOC | SHF_EXECINSTR to
+ /// flag a section containing code.
+ void registerJITSection(StringRef Name, uint64_t Address, StringRef Data,
+ unsigned Alignment, unsigned ELFType,
+ unsigned ELFFlags);
+
+ /// Communicate to BOLT the boundaries of a function in a section of memory
+ /// previously registered with registerJITSection.
+ void registerJITFunction(StringRef Name, uintptr_t Addr, size_t Size);
+
+ /// In case the user is using LLVM as an in-process JIT, and the user has
+ /// access over the ObjectFile instance loaded in memory, instead of using
+ /// registerJITSection/registerJITFunction pair, the user can just forward
+ /// that object here and JITRewriteInstance will read this object and call
+ /// registerJITSection/registerJITFunction the appropriate number of times
+ /// to map this object to BOLT.
+ Error notifyObjectLoaded(const object::ObjectFile &Obj);
+
+ /// Mark all functions added so far as non-simple, so BOLT will skip them.
+ void disableAllFunctions();
+
+ /// Mark an specific function as simple, so BOLT will try to disassemble it.
+ void processFunctionContaining(uint64_t Address);
+
+ /// Supply a profile file for BOLT to attach edge counts to the disassembled
+ /// functions.
+ Error setProfile(StringRef FileName);
+
+ /// Run all the necessary steps to disassemble registered sections and
+ /// functions (process what we have so far).
+ Error run();
+
+ /// Print all BOLT's processed functions
+ void printAll(raw_ostream &OS);
+
+ /// Print a specific function processed by BOLT
+ void printFunctionContaining(raw_ostream &OS, uint64_t Address);
+};
+
+} // namespace bolt
+} // namespace llvm
+
+#endif
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index d544ece13a832f..9a2780e8f3cecc 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -162,28 +162,30 @@ BinaryContext::~BinaryContext() {
/// Create BinaryContext for a given architecture \p ArchName and
/// triple \p TripleName.
-Expected<std::unique_ptr<BinaryContext>>
-BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
- std::unique_ptr<DWARFContext> DwCtx,
- JournalingStreams Logger) {
+Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext(
+ Triple TheTriple, StringRef InputFileName, SubtargetFeatures *Features,
+ bool IsPIC, std::unique_ptr<DWARFContext> DwCtx, JournalingStreams Logger) {
StringRef ArchName = "";
std::string FeaturesStr = "";
- switch (File->getArch()) {
+ switch (TheTriple.getArch()) {
case llvm::Triple::x86_64:
+ if (Features)
+ return createFatalBOLTError(
+ "x86_64 target does not use SubtargetFeatures");
ArchName = "x86-64";
FeaturesStr = "+nopl";
break;
case llvm::Triple::aarch64:
+ if (Features)
+ return createFatalBOLTError(
+ "AArch64 target does not use SubtargetFeatures");
ArchName = "aarch64";
FeaturesStr = "+all";
break;
case llvm::Triple::riscv64: {
ArchName = "riscv64";
- Expected<SubtargetFeatures> Features = File->getFeatures();
-
- if (auto E = Features.takeError())
- return std::move(E);
-
+ if (!Features)
+ return createFatalBOLTError("RISCV target needs SubtargetFeatures");
// We rely on relaxation for some transformations (e.g., promoting all calls
// to PseudoCALL and then making JITLink relax them). Since the relax
// feature is not stored in the object file, we manually enable it.
@@ -196,12 +198,11 @@ BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
"BOLT-ERROR: Unrecognized machine in ELF file");
}
- auto TheTriple = std::make_unique<Triple>(File->makeTriple());
- const std::string TripleName = TheTriple->str();
+ const std::string TripleName = TheTriple.str();
std::string Error;
const Target *TheTarget =
- TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error);
+ TargetRegistry::lookupTarget(std::string(ArchName), TheTriple, Error);
if (!TheTarget)
return createStringError(make_error_code(std::errc::not_supported),
Twine("BOLT-ERROR: ", Error));
@@ -240,13 +241,13 @@ BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
Twine("BOLT-ERROR: no instruction info for target ", TripleName));
std::unique_ptr<MCContext> Ctx(
- new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
+ new MCContext(TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
std::unique_ptr<MCObjectFileInfo> MOFI(
TheTarget->createMCObjectFileInfo(*Ctx, IsPIC));
Ctx->setObjectFileInfo(MOFI.get());
// We do not support X86 Large code model. Change this in the future.
bool Large = false;
- if (TheTriple->getArch() == llvm::Triple::aarch64)
+ if (TheTriple.getArch() == llvm::Triple::aarch64)
Large = true;
unsigned LSDAEncoding =
Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
@@ -273,7 +274,7 @@ BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
std::unique_ptr<MCInstPrinter> InstructionPrinter(
- TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo,
+ TheTarget->createMCInstPrinter(TheTriple, AsmPrinterVariant, *AsmInfo,
*MII, *MRI));
if (!InstructionPrinter)
return createStringError(
@@ -285,8 +286,8 @@ BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
TheTarget->createMCCodeEmitter(*MII, *Ctx));
auto BC = std::make_unique<BinaryContext>(
- std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget,
- std::string(TripleName), std::move(MCE), std::move(MOFI),
+ std::move(Ctx), std::move(DwCtx), std::make_unique<Triple>(TheTriple),
+ TheTarget, std::string(TripleName), std::move(MCE), std::move(MOFI),
std::move(AsmInfo), std::move(MII), std::move(STI),
std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI),
std::move(DisAsm), Logger);
@@ -296,7 +297,7 @@ BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
BC->MAB = std::unique_ptr<MCAsmBackend>(
BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions()));
- BC->setFilename(File->getFileName());
+ BC->setFilename(InputFileName);
BC->HasFixedLoadAddress = !IsPIC;
diff --git a/bolt/lib/Rewrite/CMakeLists.txt b/bolt/lib/Rewrite/CMakeLists.txt
index 6890f52e2b28bb..0bb80fbeef277e 100644
--- a/bolt/lib/Rewrite/CMakeLists.txt
+++ b/bolt/lib/Rewrite/CMakeLists.txt
@@ -17,6 +17,7 @@ add_llvm_library(LLVMBOLTRewrite
DWARFRewriter.cpp
ExecutableFileMemoryManager.cpp
JITLinkLinker.cpp
+ JITRewriteInstance.cpp
LinuxKernelRewriter.cpp
MachORewriteInstance.cpp
MetadataManager.cpp
diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp
index 27fa937c7508c3..6fb9e9fafd0e38 100644
--- a/bolt/lib/Rewrite/DWARFRewriter.cpp
+++ b/bolt/lib/Rewrite/DWARFRewriter.cpp
@@ -1652,7 +1652,7 @@ namespace {
std::unique_ptr<BinaryContext>
createDwarfOnlyBC(const object::ObjectFile &File) {
return cantFail(BinaryContext::createBinaryContext(
- &File, false,
+ File.makeTriple(), File.getFileName(), nullptr, false,
DWARFContext::create(File, DWARFContext::ProcessDebugRelocations::Ignore,
nullptr, "", WithColor::defaultErrorHandler,
WithColor::defaultWarningHandler),
diff --git a/bolt/lib/Rewrite/JITRewriteInstance.cpp b/bolt/lib/Rewrite/JITRewriteInstance.cpp
new file mode 100644
index 00000000000000..51537d261f0359
--- /dev/null
+++ b/bolt/lib/Rewrite/JITRewriteInstance.cpp
@@ -0,0 +1,367 @@
+//===- bolt/Rewrite/JITRewriteInstance.cpp - JIT rewriter -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "bolt/Rewrite/JITRewriteInstance.h"
+#include "bolt/Core/BinaryContext.h"
+#include "bolt/Core/BinaryEmitter.h"
+#include "bolt/Core/BinaryFunction.h"
+#include "bolt/Core/JumpTable.h"
+#include "bolt/Core/MCPlusBuilder.h"
+#include "bolt/Profile/DataAggregator.h"
+#include "bolt/Rewrite/BinaryPassManager.h"
+#include "bolt/Rewrite/RewriteInstance.h"
+#include "bolt/Utils/Utils.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/Object/SymbolSize.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/FileSystem.h"
+#include <memory>
+
+namespace opts {
+
+using namespace llvm;
+extern cl::opt<unsigned> AlignText;
+extern cl::opt<bool> PrintSections;
+extern cl::opt<bool> PrintDisasm;
+extern cl::opt<bool> PrintCFG;
+extern cl::opt<unsigned> Verbosity;
+} // namespace opts
+
+namespace llvm {
+namespace bolt {
+
+#define DEBUG_TYPE "bolt"
+
+Expected<std::unique_ptr<JITRewriteInstance>>
+JITRewriteInstance::createJITRewriteInstance(JournalingStreams Logger,
+ bool IsPIC) {
+ Error Err = Error::success();
+ std::unique_ptr<JITRewriteInstance> JITRI(
+ new JITRewriteInstance(Logger, IsPIC, Err));
+ if (Err)
+ return std::move(Err);
+ return std::move(JITRI);
+}
+
+JITRewriteInstance::JITRewriteInstance(JournalingStreams Logger, bool IsPIC,
+ Error &Err)
+ : StrPool(StrAllocator) {
+ ErrorAsOutParameter EAO(&Err);
+ Triple TheTriple(sys::getDefaultTargetTriple().c_str());
+
+ auto BCOrErr = BinaryContext::createBinaryContext(
+ TheTriple, StringRef("JIT input file"), nullptr, IsPIC, nullptr, Logger);
+ if (Error E = BCOrErr.takeError()) {
+ Err = std::move(E);
+ return;
+ }
+ BC = std::move(BCOrErr.get());
+ BC->initializeTarget(std::unique_ptr<MCPlusBuilder>(
+ createMCPlusBuilder(BC->TheTriple->getArch(), BC->MIA.get(),
+ BC->MII.get(), BC->MRI.get(), BC->STI.get())));
+ BC->FirstAllocAddress = 0;
+ BC->LayoutStartAddress = 0xffffffffffffffff;
+}
+
+JITRewriteInstance::~JITRewriteInstance() {}
+
+void JITRewriteInstance::adjustCommandLineOptions() {
+ if (!opts::AlignText.getNumOccurrences())
+ opts::AlignText = BC->PageAlign;
+}
+
+Error JITRewriteInstance::preprocessProfileData() {
+ if (!ProfileReader)
+ return Error::success();
+ if (Error E = ProfileReader->preprocessProfile(*BC.get()))
+ return Error(std::move(E));
+ return Error::success();
+}
+
+Error JITRewriteInstance::processProfileDataPreCFG() {
+ if (!ProfileReader)
+ return Error::success();
+ if (Error E = ProfileReader->readProfilePreCFG(*BC.get()))
+ return Error(std::move(E));
+ return Error::success();
+}
+
+Error JITRewriteInstance::processProfileData() {
+ if (!ProfileReader)
+ return Error::success();
+ if (Error E = ProfileReader->readProfile(*BC.get()))
+ return Error(std::move(E));
+ return Error::success();
+}
+
+Error JITRewriteInstance::disassembleFunctions() {
+ for (auto &BFI : BC->getBinaryFunctions()) {
+ BinaryFunction &Function = BFI.second;
+ if (!Function.isSimple())
+ continue;
+ if (Error E = Function.disassemble())
+ return Error(std::move(E));
+ if (opts::PrintDisasm)
+ Function.print(BC->outs(), "after disassembly");
+ }
+ return Error::success();
+}
+
+Error JITRewriteInstance::buildFunctionsCFG() {
+ for (auto &BFI : BC->getBinaryFunctions()) {
+ BinaryFunction &Function = BFI.second;
+ if (!Function.isSimple())
+ continue;
+ if (auto NewE = handleErrors(
+ Function.buildCFG(/*AllocId*/ 0), [&](const BOLTError &E) -> Error {
+ if (E.isFatal())
+ return Error(std::make_unique<BOLTError>(std::move(E)));
+ if (!E.getMessage().empty())
+ E.log(BC->errs());
+ return Error::success();
+ })) {
+ return Error(std::move(NewE));
+ }
+ }
+ return Error::success();
+}
+
+void JITRewriteInstance::postProcessFunctions() {
+ for (auto &BFI : BC->getBinaryFunctions()) {
+ BinaryFunction &Function = BFI.second;
+ if (Function.empty() || !Function.isSimple())
+ continue;
+ Function.postProcessCFG();
+ if (opts::PrintCFG)
+ Function.print(outs(), "after building cfg");
+ }
+}
+
+void JITRewriteInstance::registerJITSection(StringRef Name, uint64_t Address,
+ StringRef Data, unsigned Alignment,
+ unsigned ELFType,
+ unsigned ELFFlags) {
+ auto *Sec =
+ new BinarySection(*BC, Name, const_cast<uint8_t *>(Data.bytes_begin()),
+ Data.size(), Alignment, ELFType, ELFFlags);
+ Sec->setAddress(Address);
+ BC->registerSection(Sec);
+}
+
+void JITRewriteInstance::registerJITFunction(StringRef Name, uintptr_t Addr,
+ size_t Size) {
+ if (ErrorOr<BinarySection &> Sec = BC->getSectionForAddress(Addr))
+ BC->createBinaryFunction(Name.str(), *Sec, Addr, Size);
+}
+
+Error JITRewriteInstance::notifyObjectLoaded(const object::ObjectFile &Obj) {
+ for (const object::SectionRef &Section : Obj.sections()) {
+ Expected<StringRef> SectionName = Section.getName();
+ if (Error E = SectionName.takeError())
+ return Error(std::move(E));
+ // Only register sections with names.
+ if (SectionName->empty())
+ continue;
+
+ StringRef UniqueSectionName = StrPool.save(NR.uniquify(*SectionName));
+ unsigned ELFType = ELFSectionRef(Section).getType();
+ unsigned ELFFlags = ELFSectionRef(Section).getFlags();
+ if (ELFType == ELF::SHT_NOBITS)
+ continue;
+
+ const uint64_t Address = Section.getAddress();
+ const uint64_t Size = Section.getSize();
+ StringRef Contents =
+ StringRef(reinterpret_cast<const char *>(Address), Size);
+ if (Contents.empty())
+ continue;
+
+ this->registerJITSection(UniqueSectionName, Section.getAddress(), Contents,
+ Section.getAlignment().value(), ELFType, ELFFlags);
+ LLVM_DEBUG(
+ dbgs() << "BOLT-DEBUG: registering section " << *SectionName << " @ 0x"
+ << Twine::utohexstr(Section.getAddress()) << ":0x"
+ << Twine::utohexstr(Section.getAddress() + Section.getSize())
+ << "\n");
+ }
+
+ if (opts::PrintSections) {
+ BC->outs() << "BOLT-INFO: Sections from original binary:\n";
+ BC->printSections(BC->outs());
+ }
+
+ std::vector<SymbolRef> FunctionSymbols;
+ for (const SymbolRef &S : Obj.symbols()) {
+ auto TypeOrErr = S.getType();
+ if (Error E = TypeOrErr.takeError())
+ return Error(std::move(E));
+ SymbolRef::Type Type = *TypeOrErr;
+ if (Type == SymbolRef::ST_Function)
+ FunctionSymbols.push_back(S);
+ }
+
+ if (FunctionSymbols.empty())
+ return Error::success();
+
+ Error SortErrors = Error::success();
+ llvm::stable_sort(FunctionSymbols, [&](const SymbolRef &LHS,
+ const SymbolRef &RHS) {
+ auto LHSAddrOrErr = LHS.getAddress();
+ auto RHSAddrOrErr = RHS.getAddress();
+ if (auto E =
+ joinErrors(LHSAddrOrErr.takeError(), RHSAddrOrErr.takeError())) {
+ SortErrors = joinErrors(std::move(SortErrors), std::move(E));
+ return false;
+ }
+ return *LHSAddrOrErr < *RHSAddrOrErr;
+ });
+ if (SortErrors)
+ return Error(std::move(SortErrors));
+
+ for (size_t Index = 0; Index < FunctionSymbols.size(); ++Index) {
+ auto AddrOrErr = FunctionSymbols[Index].getAddress();
+ if (auto E = AddrOrErr.takeError())
+ return Error(std::move(E));
+
+ const uint64_t Address = *AddrOrErr;
+ ErrorOr<BinarySection &> Section = BC->getSectionForAddress(Address);
+ if (!Section)
+ continue;
+
+ auto NameOrErr = FunctionSymbols[Index].getName();
+ auto FlagsOrErr = FunctionSymbols[Index].getFlags();
+ auto SecOrErr = FunctionSymbols[Index].getSection();
+ if (auto E = joinErrors(
+ joinErrors(NameOrErr.takeError(), FlagsOrErr.takeError()),
+ SecOrErr.takeError()))
+ return Error(std::move(E));
+ std::string SymbolName = NameOrErr->str();
+ // Uniquify names of local symbols.
+ if (!(*FlagsOrErr & SymbolRef::SF_Global))
+ SymbolName = NR.uniquify(SymbolName);
+
+ section_iterator S = *SecOrErr;
+ uint64_t EndAddress = S->getAddress() + S->getSize();
+
+ size_t NFIndex = Index + 1;
+ // Skip aliases.
+ auto NextAddrOrErr = FunctionSymbols[NFIndex].getAddress();
+ if (auto E = NextAddrOrErr.takeError())
+ return Error(std::move(E));
+ uint64_t NextAddr = *NextAddrOrErr;
+ while (NFIndex < FunctionSymbols.size() && NextAddr == Address) {
+ ++NFIndex;
+ auto NFAddrOrErr = FunctionSymbols[NFIndex].getAddress();
+ if (auto E = NFAddrOrErr.takeError())
+ return Error(std::move(E));
+ NextAddr = *NFAddrOrErr;
+ }
+
+ auto NFSecOrErr = FunctionSymbols[NFIndex].getSection();
+ if (auto E = NFSecOrErr.takeError())
+ return Error(std::move(E));
+ if (NFIndex < FunctionSymbols.size() && S == *NFSecOrErr) {
+ auto EndAddressOrErr = FunctionSymbols[NFIndex].getAddress();
+ if (auto E = EndAddressOrErr.takeError())
+ return Error(std::move(E));
+ EndAddress = *EndAddressOrErr;
+ }
+
+ const uint64_t SymbolSize = EndAddress - Address;
+ const auto It = BC->getBinaryFunctions().find(Address);
+ if (It == BC->getBinaryFunctions().end()) {
+ LLVM_DEBUG(dbgs() << "BOLT-DEBUG: creating binary function for "
+ << SymbolName << "\n");
+ BC->createBinaryFunction(std::move(SymbolName), *Section, Address,
+ SymbolSize);
+ } else {
+ It->second.addAlternativeName(std::move(SymbolName));
+ }
+ }
+
+ for (auto &BFI : BC->getBinaryFunctions()) {
+ BinaryFunction &Function = BFI.second;
+ Function.setMaxSize(Function.getSize());
+
+ ErrorOr<ArrayRef<uint8_t>> FunctionData = Function.getData();
+ if (!FunctionData) {
+ BC->errs() << "BOLT-ERROR: corresponding section is non-executable or "
+ << "empty for function " << Function << '\n';
+ continue;
+ }
+
+ if (Function.getSize() == 0)
+ Function.setSimple(false);
+ }
+
+ return Error::success();
+}
+
+void JITRewriteInstance::disableAllFunctions() {
+ for (auto &BFI : BC->getBinaryFunctions()) {
+ BinaryFunction &Function = BFI.second;
+ Function.setSimple(false);
+ }
+}
+
+void JITRewriteInstance::processFunctionContaining(uint64_t Address) {
+ if (BinaryFunction *Func = BC->getBinaryFunctionContainingAddress(Address))
+ Func->setSimple(true);
+}
+
+Error JITRewriteInstance::setProfile(StringRef Filename) {
+ if (!sys::fs::exists(Filename))
+ return errorCodeToError(make_error_code(errc::no_such_file_or_directory));
+
+ ProfileReader = std::make_unique<DataAggregator>(Filename);
+ return Error::success();
+}
+
+Error JITRewriteInstance::run() {
+ adjustCommandLineOptions();
+
+ if (Error E = preprocessProfileData())
+ return Error(std::move(E));
+
+ if (Error E = disassembleFunctions())
+ return Error(std::move(E));
+
+ if (Error E = processProfileDataPreCFG())
+ return Error(std::move(E));
+
+ if (Error E = buildFunctionsCFG())
+ return Error(std::move(E));
+
+ if (Error E = processProfileData())
+ return Error(std::move(E));
+
+ postProcessFunctions();
+
+ return Error::success();
+}
+
+void JITRewriteInstance::printAll(raw_ostream &OS) {
+ for (auto &BFI : BC->getBinaryFunctions()) {
+ BinaryFunction &Function = BFI.second;
+ if (Function.empty())
+ continue;
+ Function.print(OS, "after building cfg");
+ }
+}
+
+void JITRewriteInstance::printFunctionContaining(raw_ostream &OS,
+ uint64_t Address) {
+ if (BinaryFunction *Func = BC->getBinaryFunctionContainingAddress(Address)) {
+ OS << formatv("Printing function containg address {0:x}\n", Address);
+ Func->print(OS, "JIT on-demand inspection");
+ }
+}
+
+} // namespace bolt
+} // namespace llvm
diff --git a/bolt/lib/Rewrite/MachORewriteInstance.cpp b/bolt/lib/Rewrite/MachORewriteInstance.cpp
index 0970a0507ebe88..172cb640bf911a 100644
--- a/bolt/lib/Rewrite/MachORewriteInstance.cpp
+++ b/bolt/lib/Rewrite/MachORewriteInstance.cpp
@@ -18,6 +18,7 @@
#include "bolt/Rewrite/BinaryPassManager.h"
#include "bolt/Rewrite/ExecutableFileMemoryManager.h"
#include "bolt/Rewrite/JITLinkLinker.h"
+#include "bolt/Rewrite/RewriteInstance.h"
#include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h"
#include "bolt/Utils/Utils.h"
#include "llvm/MC/MCObjectStreamer.h"
@@ -54,37 +55,6 @@ extern cl::opt<unsigned> Verbosity;
namespace llvm {
namespace bolt {
-extern MCPlusBuilder *createX86MCPlusBuilder(const MCInstrAnalysis *,
- const MCInstrInfo *,
- const MCRegisterInfo *,
- const MCSubtargetInfo *);
-extern MCPlusBuilder *createAArch64MCPlusBuilder(const MCInstrAnalysis *,
- const MCInstrInfo *,
- const MCRegisterInfo *,
- const MCSubtargetInfo *);
-
-namespace {
-
-MCPlusBuilder *createMCPlusBuilder(const Triple::ArchType Arch,
- const MCInstrAnalysis *Analysis,
- const MCInstrInfo *Info,
- const MCRegisterInfo *RegInfo,
- const MCSubtargetInfo *STI) {
-#ifdef X86_AVAILABLE
- if (Arch == Triple::x86_64)
- return createX86MCPlusBuilder(Analysis, Info, RegInfo, STI);
-#endif
-
-#ifdef AARCH64_AVAILABLE
- if (Arch == Triple::aarch64)
- return createAArch64MCPlusBuilder(Analysis, Info, RegInfo, STI);
-#endif
-
- llvm_unreachable("architecture unsupported by MCPlusBuilder");
-}
-
-} // anonymous namespace
-
#define DEBUG_TYPE "bolt"
Expected<std::unique_ptr<MachORewriteInstance>>
@@ -103,7 +73,8 @@ MachORewriteInstance::MachORewriteInstance(object::MachOObjectFile *InputFile,
: InputFile(InputFile), ToolPath(ToolPath) {
ErrorAsOutParameter EAO(&Err);
auto BCOrErr = BinaryContext::createBinaryContext(
- InputFile, /* IsPIC */ true, DWARFContext::create(*InputFile),
+ InputFile->makeTriple(), InputFile->getFileName(), nullptr,
+ /* IsPIC */ true, DWARFContext::create(*InputFile),
{llvm::outs(), llvm::errs()});
if (Error E = BCOrErr.takeError()) {
Err = std::move(E);
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index db093bfc2d8b78..2fdf7a89d901a7 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -269,6 +269,10 @@ namespace bolt {
extern const char *BoltRevision;
+// Weird location for createMCPlusBuilder, but this is here to avoid a
+// cyclic dependency of libCore (its natural place) and libTarget. libRewrite
+// can depend on libTarget, but not libCore. Since libRewrite is the only
+// user of this function, we define it here.
MCPlusBuilder *createMCPlusBuilder(const Triple::ArchType Arch,
const MCInstrAnalysis *Analysis,
const MCInstrInfo *Info,
@@ -346,8 +350,21 @@ RewriteInstance::RewriteInstance(ELFObjectFileBase *File, const int Argc,
Stderr.SetUnbuffered();
LLVM_DEBUG(dbgs().SetUnbuffered());
+ // Read RISCV subtarget features from input file
+ std::unique_ptr<SubtargetFeatures> Features;
+ Triple TheTriple = File->makeTriple();
+ if (TheTriple.getArch() == llvm::Triple::riscv64) {
+ Expected<SubtargetFeatures> FeaturesOrErr = File->getFeatures();
+ if (auto E = FeaturesOrErr.takeError()) {
+ Err = std::move(E);
+ return;
+ } else {
+ Features.reset(new SubtargetFeatures(*FeaturesOrErr));
+ }
+ }
+
auto BCOrErr = BinaryContext::createBinaryContext(
- File, IsPIC,
+ TheTriple, File->getFileName(), Features.get(), IsPIC,
DWARFContext::create(*File, DWARFContext::ProcessDebugRelocations::Ignore,
nullptr, opts::DWPPathName,
WithColor::defaultErrorHandler,
diff --git a/bolt/unittests/CMakeLists.txt b/bolt/unittests/CMakeLists.txt
index 77159e92dec557..de874476d2fc6a 100644
--- a/bolt/unittests/CMakeLists.txt
+++ b/bolt/unittests/CMakeLists.txt
@@ -7,3 +7,4 @@ endfunction()
add_subdirectory(Core)
add_subdirectory(Profile)
+add_subdirectory(Rewrite)
diff --git a/bolt/unittests/Core/BinaryContext.cpp b/bolt/unittests/Core/BinaryContext.cpp
index 1fbb07bca966a7..19f0b22486583d 100644
--- a/bolt/unittests/Core/BinaryContext.cpp
+++ b/bolt/unittests/Core/BinaryContext.cpp
@@ -40,8 +40,8 @@ struct BinaryContextTester : public testing::TestWithParam<Triple::ArchType> {
void initializeBOLT() {
BC = cantFail(BinaryContext::createBinaryContext(
- ObjFile.get(), true, DWARFContext::create(*ObjFile.get()),
- {llvm::outs(), llvm::errs()}));
+ ObjFile->makeTriple(), ObjFile->getFileName(), nullptr, true,
+ DWARFContext::create(*ObjFile.get()), {llvm::outs(), llvm::errs()}));
ASSERT_FALSE(!BC);
}
diff --git a/bolt/unittests/Core/MCPlusBuilder.cpp b/bolt/unittests/Core/MCPlusBuilder.cpp
index 63448039c53e67..240319aa71b866 100644
--- a/bolt/unittests/Core/MCPlusBuilder.cpp
+++ b/bolt/unittests/Core/MCPlusBuilder.cpp
@@ -50,8 +50,8 @@ struct MCPlusBuilderTester : public testing::TestWithParam<Triple::ArchType> {
void initializeBolt() {
BC = cantFail(BinaryContext::createBinaryContext(
- ObjFile.get(), true, DWARFContext::create(*ObjFile.get()),
- {llvm::outs(), llvm::errs()}));
+ ObjFile->makeTriple(), ObjFile->getFileName(), nullptr, true,
+ DWARFContext::create(*ObjFile.get()), {llvm::outs(), llvm::errs()}));
ASSERT_FALSE(!BC);
BC->initializeTarget(std::unique_ptr<MCPlusBuilder>(
createMCPlusBuilder(GetParam(), BC->MIA.get(), BC->MII.get(),
diff --git a/bolt/unittests/Rewrite/CMakeLists.txt b/bolt/unittests/Rewrite/CMakeLists.txt
new file mode 100644
index 00000000000000..89da98fb5f4cdd
--- /dev/null
+++ b/bolt/unittests/Rewrite/CMakeLists.txt
@@ -0,0 +1,27 @@
+set(LLVM_LINK_COMPONENTS
+ DebugInfoDWARF
+ Object
+ MC
+ ${LLVM_TARGETS_TO_BUILD}
+ )
+
+add_bolt_unittest(BOLTRewriteTests
+ JITRewriteInstance.cpp
+
+ DISABLE_LLVM_LINK_LLVM_DYLIB
+ )
+
+target_link_libraries(BOLTRewriteTests
+ PRIVATE
+ LLVMBOLTCore
+ LLVMBOLTRewrite
+ )
+
+foreach (tgt ${BOLT_TARGETS_TO_BUILD})
+ include_directories(
+ ${LLVM_MAIN_SRC_DIR}/lib/Target/${tgt}
+ ${LLVM_BINARY_DIR}/lib/Target/${tgt}
+ )
+ string(TOUPPER "${tgt}" upper)
+ target_compile_definitions(BOLTRewriteTests PRIVATE "${upper}_AVAILABLE")
+endforeach()
diff --git a/bolt/unittests/Rewrite/JITRewriteInstance.cpp b/bolt/unittests/Rewrite/JITRewriteInstance.cpp
new file mode 100644
index 00000000000000..185b770769c3c9
--- /dev/null
+++ b/bolt/unittests/Rewrite/JITRewriteInstance.cpp
@@ -0,0 +1,99 @@
+#include "bolt/Rewrite/JITRewriteInstance.h"
+#include "bolt/Core/BinaryContext.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/Support/TargetSelect.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::object;
+using namespace llvm::ELF;
+using namespace bolt;
+
+namespace {
+struct JITRewriteInstanceTester
+ : public testing::TestWithParam<Triple::ArchType> {
+ void SetUp() override {
+ initalizeLLVM();
+ initializeBOLT();
+ }
+
+protected:
+ void initalizeLLVM() {
+ llvm::InitializeAllTargetInfos();
+ llvm::InitializeAllTargetMCs();
+ llvm::InitializeAllAsmParsers();
+ llvm::InitializeAllDisassemblers();
+ llvm::InitializeAllTargets();
+ llvm::InitializeAllAsmPrinters();
+ }
+
+ void initializeBOLT() {
+ BOLTJIT = cantFail(bolt::JITRewriteInstance::createJITRewriteInstance(
+ {llvm::outs(), llvm::errs()}, /*IsPIC*/ false));
+ ASSERT_FALSE(!BOLTJIT);
+ }
+
+ std::unique_ptr<JITRewriteInstance> BOLTJIT;
+};
+} // namespace
+
+#ifdef X86_AVAILABLE
+
+// clang-format off
+extern "C" __attribute((naked)) int fib(int n)
+{
+ __asm__ __volatile__(
+ "pushq %%r14\n"
+ "pushq %%rbx\n"
+ "pushq %%rax\n"
+ "movl %%edi, %%r14d\n"
+ "xorl %%ebx, %%ebx\n"
+ "cmpl $0x2, %%edi\n"
+ "jge .Ltmp0\n"
+ "movl %%r14d, %%ecx\n"
+ "jmp .Ltmp1\n"
+ ".Ltmp0:\n"
+ "xorl %%ebx, %%ebx\n"
+ "nopw %%cs:(%%rax,%%rax)\n"
+ ".Ltmp2:\n"
+ "leal -0x1(%%r14), %%edi\n"
+ "callq fib\n"
+ "leal -0x2(%%r14), %%ecx\n"
+ "addl %%eax, %%ebx\n"
+ "cmpl $0x3, %%r14d\n"
+ "movl %%ecx, %%r14d\n"
+ "ja .Ltmp2\n"
+ ".Ltmp1:\n"
+ "addl %%ecx, %%ebx\n"
+ "movl %%ebx, %%eax\n"
+ "addq $0x8, %%rsp\n"
+ "popq %%rbx\n"
+ "popq %%r14\n"
+ "retq\n"
+ :::);
+}
+// clang-format on
+
+INSTANTIATE_TEST_SUITE_P(X86, JITRewriteInstanceTester,
+ ::testing::Values(Triple::x86_64));
+
+TEST_P(JITRewriteInstanceTester, DisassembleFib) {
+ EXPECT_EQ(fib(7), 13);
+
+ // BOLT JIT test/example
+ // Analyze fib function in this binary
+ // Disassemble 63 bytes
+ uint64_t Address = reinterpret_cast<uint64_t>(&fib);
+ StringRef Data = StringRef(reinterpret_cast<const char *>(&fib), 63);
+
+ BOLTJIT->registerJITSection(StringRef(".text.example"), Address, Data, 1,
+ ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_EXECINSTR);
+ BOLTJIT->registerJITFunction(StringRef("fib"), Address, 63);
+ ASSERT_FALSE(BOLTJIT->run());
+
+ // Print to screen
+ BOLTJIT->printAll(outs());
+}
+
+#endif
More information about the llvm-branch-commits
mailing list