[llvm] 88e3358 - [ORC][JITLink] Non-debuginfo JITLink perf jitdump support.
Lang Hames via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 17 21:10:35 PDT 2023
Author: Prem Chintalapudi
Date: 2023-09-18T04:10:29Z
New Revision: 88e3358f331d727e7bbcddb2821ef89d25d1ab7a
URL: https://github.com/llvm/llvm-project/commit/88e3358f331d727e7bbcddb2821ef89d25d1ab7a
DIFF: https://github.com/llvm/llvm-project/commit/88e3358f331d727e7bbcddb2821ef89d25d1ab7a.diff
LOG: [ORC][JITLink] Non-debuginfo JITLink perf jitdump support.
This patch ports PerfJITEventListener to a JITLink plugin, but adds unwind
record support and drops debuginfo support temporarily. Debuginfo can be
enabled in the future by providing a way to obtain a DWARFContext from a
LinkGraph.
Reviewed By: lhames
Differential Revision: https://reviews.llvm.org/D146169
Added:
llvm/include/llvm/ExecutionEngine/Orc/PerfSupportPlugin.h
llvm/include/llvm/ExecutionEngine/Orc/Shared/PerfSharedStructs.h
llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h
llvm/lib/ExecutionEngine/Orc/PerfSupportPlugin.cpp
llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp
llvm/test/ExecutionEngine/JITLink/x86-64/ELF_perf.s
Modified:
llvm/lib/ExecutionEngine/Orc/CMakeLists.txt
llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt
llvm/tools/llvm-jitlink/llvm-jitlink.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/PerfSupportPlugin.h b/llvm/include/llvm/ExecutionEngine/Orc/PerfSupportPlugin.h
new file mode 100644
index 000000000000000..c663377b17b652b
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/PerfSupportPlugin.h
@@ -0,0 +1,65 @@
+//===----- PerfSupportPlugin.h ----- Utils for perf support -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Handles support for registering code with perf
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_PERFSUPPORTPLUGIN_H
+#define LLVM_EXECUTIONENGINE_ORC_PERFSUPPORTPLUGIN_H
+
+#include "llvm/ExecutionEngine/Orc/Shared/PerfSharedStructs.h"
+
+#include "llvm/ExecutionEngine/Orc/Core.h"
+#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
+
+namespace llvm {
+namespace orc {
+
+/// Log perf jitdump events for each object (see
+/// https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jitdump-specification.txt).
+/// Currently has support for dumping code load records and unwind info records.
+class PerfSupportPlugin : public ObjectLinkingLayer::Plugin {
+public:
+ PerfSupportPlugin(ExecutorProcessControl &EPC,
+ ExecutorAddr RegisterPerfStartAddr,
+ ExecutorAddr RegisterPerfEndAddr,
+ ExecutorAddr RegisterPerfImplAddr, bool EmitUnwindInfo);
+ ~PerfSupportPlugin();
+
+ void modifyPassConfig(MaterializationResponsibility &MR,
+ jitlink::LinkGraph &G,
+ jitlink::PassConfiguration &Config) override;
+
+ Error notifyFailed(MaterializationResponsibility &MR) override {
+ return Error::success();
+ }
+
+ Error notifyRemovingResources(JITDylib &JD, ResourceKey K) override {
+ return Error::success();
+ }
+
+ void notifyTransferringResources(JITDylib &JD, ResourceKey DstKey,
+ ResourceKey SrcKey) override {}
+
+ static Expected<std::unique_ptr<PerfSupportPlugin>>
+ Create(ExecutorProcessControl &EPC, JITDylib &JD, bool EmitUnwindInfo);
+
+private:
+ ExecutorProcessControl &EPC;
+ ExecutorAddr RegisterPerfStartAddr;
+ ExecutorAddr RegisterPerfEndAddr;
+ ExecutorAddr RegisterPerfImplAddr;
+ std::atomic<uint64_t> CodeIndex;
+ bool EmitUnwindInfo;
+};
+
+} // namespace orc
+} // namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_PERFSUPPORTPLUGIN_H
\ No newline at end of file
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/PerfSharedStructs.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/PerfSharedStructs.h
new file mode 100644
index 000000000000000..f4788bcebc3cf97
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/PerfSharedStructs.h
@@ -0,0 +1,233 @@
+//===--- PerfSharedStructs.h --- RPC Structs for perf support ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Structs and serialization to share perf-related information
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_PERFSHAREDSTRUCTS_H
+#define LLVM_EXECUTIONENGINE_ORC_SHARED_PERFSHAREDSTRUCTS_H
+
+#include "llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h"
+
+namespace llvm {
+
+namespace orc {
+
+// The following are POD struct definitions from the perf jit specification
+
+enum class PerfJITRecordType {
+ JIT_CODE_LOAD = 0,
+ JIT_CODE_MOVE = 1, // not emitted, code isn't moved
+ JIT_CODE_DEBUG_INFO = 2,
+ JIT_CODE_CLOSE = 3, // not emitted, unnecessary
+ JIT_CODE_UNWINDING_INFO = 4, // not emitted
+
+ JIT_CODE_MAX
+};
+
+struct PerfJITRecordPrefix {
+ PerfJITRecordType Id; // record type identifier, uint32_t
+ uint32_t TotalSize;
+};
+struct PerfJITCodeLoadRecord {
+ PerfJITRecordPrefix Prefix;
+
+ uint32_t Pid;
+ uint32_t Tid;
+ uint64_t Vma;
+ uint64_t CodeAddr;
+ uint64_t CodeSize;
+ uint64_t CodeIndex;
+ std::string Name;
+};
+
+struct PerfJITDebugEntry {
+ uint64_t Addr;
+ uint32_t Lineno; // source line number starting at 1
+ uint32_t Discrim; // column discriminator, 0 is default
+ std::string Name;
+};
+
+struct PerfJITDebugInfoRecord {
+ PerfJITRecordPrefix Prefix;
+
+ uint64_t CodeAddr;
+ std::vector<PerfJITDebugEntry> Entries;
+};
+
+struct PerfJITCodeUnwindingInfoRecord {
+ PerfJITRecordPrefix Prefix;
+
+ uint64_t UnwindDataSize;
+ uint64_t EHFrameHdrSize;
+ uint64_t MappedSize;
+ // Union, one will always be 0/"", the other has data
+ uint64_t EHFrameHdrAddr;
+ std::string EHFrameHdr;
+
+ uint64_t EHFrameAddr;
+ // size is UnwindDataSize - EHFrameHdrSize
+};
+
+// Batch vehicle for minimizing RPC calls for perf jit records
+struct PerfJITRecordBatch {
+ std::vector<PerfJITDebugInfoRecord> DebugInfoRecords;
+ std::vector<PerfJITCodeLoadRecord> CodeLoadRecords;
+ // only valid if record size > 0
+ PerfJITCodeUnwindingInfoRecord UnwindingRecord;
+};
+
+// SPS traits for Records
+
+namespace shared {
+
+using SPSPerfJITRecordPrefix = SPSTuple<uint32_t, uint32_t>;
+
+template <>
+class SPSSerializationTraits<SPSPerfJITRecordPrefix, PerfJITRecordPrefix> {
+public:
+ static size_t size(const PerfJITRecordPrefix &Val) {
+ return SPSPerfJITRecordPrefix::AsArgList::size(
+ static_cast<uint32_t>(Val.Id), Val.TotalSize);
+ }
+ static bool deserialize(SPSInputBuffer &IB, PerfJITRecordPrefix &Val) {
+ uint32_t Id;
+ if (!SPSPerfJITRecordPrefix::AsArgList::deserialize(IB, Id, Val.TotalSize))
+ return false;
+ Val.Id = static_cast<PerfJITRecordType>(Id);
+ return true;
+ }
+ static bool serialize(SPSOutputBuffer &OB, const PerfJITRecordPrefix &Val) {
+ return SPSPerfJITRecordPrefix::AsArgList::serialize(
+ OB, static_cast<uint32_t>(Val.Id), Val.TotalSize);
+ }
+};
+
+using SPSPerfJITCodeLoadRecord =
+ SPSTuple<SPSPerfJITRecordPrefix, uint32_t, uint32_t, uint64_t, uint64_t,
+ uint64_t, uint64_t, SPSString>;
+
+template <>
+class SPSSerializationTraits<SPSPerfJITCodeLoadRecord, PerfJITCodeLoadRecord> {
+public:
+ static size_t size(const PerfJITCodeLoadRecord &Val) {
+ return SPSPerfJITCodeLoadRecord::AsArgList::size(
+ Val.Prefix, Val.Pid, Val.Tid, Val.Vma, Val.CodeAddr, Val.CodeSize,
+ Val.CodeIndex, Val.Name);
+ }
+
+ static bool deserialize(SPSInputBuffer &IB, PerfJITCodeLoadRecord &Val) {
+ return SPSPerfJITCodeLoadRecord::AsArgList::deserialize(
+ IB, Val.Prefix, Val.Pid, Val.Tid, Val.Vma, Val.CodeAddr, Val.CodeSize,
+ Val.CodeIndex, Val.Name);
+ }
+
+ static bool serialize(SPSOutputBuffer &OB, const PerfJITCodeLoadRecord &Val) {
+ return SPSPerfJITCodeLoadRecord::AsArgList::serialize(
+ OB, Val.Prefix, Val.Pid, Val.Tid, Val.Vma, Val.CodeAddr, Val.CodeSize,
+ Val.CodeIndex, Val.Name);
+ }
+};
+
+using SPSPerfJITDebugEntry = SPSTuple<uint64_t, uint32_t, uint32_t, SPSString>;
+
+template <>
+class SPSSerializationTraits<SPSPerfJITDebugEntry, PerfJITDebugEntry> {
+public:
+ static size_t size(const PerfJITDebugEntry &Val) {
+ return SPSPerfJITDebugEntry::AsArgList::size(Val.Addr, Val.Lineno,
+ Val.Discrim, Val.Name);
+ }
+
+ static bool deserialize(SPSInputBuffer &IB, PerfJITDebugEntry &Val) {
+ return SPSPerfJITDebugEntry::AsArgList::deserialize(
+ IB, Val.Addr, Val.Lineno, Val.Discrim, Val.Name);
+ }
+
+ static bool serialize(SPSOutputBuffer &OB, const PerfJITDebugEntry &Val) {
+ return SPSPerfJITDebugEntry::AsArgList::serialize(OB, Val.Addr, Val.Lineno,
+ Val.Discrim, Val.Name);
+ }
+};
+
+using SPSPerfJITDebugInfoRecord = SPSTuple<SPSPerfJITRecordPrefix, uint64_t,
+ SPSSequence<SPSPerfJITDebugEntry>>;
+
+template <>
+class SPSSerializationTraits<SPSPerfJITDebugInfoRecord,
+ PerfJITDebugInfoRecord> {
+public:
+ static size_t size(const PerfJITDebugInfoRecord &Val) {
+ return SPSPerfJITDebugInfoRecord::AsArgList::size(Val.Prefix, Val.CodeAddr,
+ Val.Entries);
+ }
+ static bool deserialize(SPSInputBuffer &IB, PerfJITDebugInfoRecord &Val) {
+ return SPSPerfJITDebugInfoRecord::AsArgList::deserialize(
+ IB, Val.Prefix, Val.CodeAddr, Val.Entries);
+ }
+ static bool serialize(SPSOutputBuffer &OB,
+ const PerfJITDebugInfoRecord &Val) {
+ return SPSPerfJITDebugInfoRecord::AsArgList::serialize(
+ OB, Val.Prefix, Val.CodeAddr, Val.Entries);
+ }
+};
+
+using SPSPerfJITCodeUnwindingInfoRecord =
+ SPSTuple<SPSPerfJITRecordPrefix, uint64_t, uint64_t, uint64_t, uint64_t,
+ SPSString, uint64_t>;
+template <>
+class SPSSerializationTraits<SPSPerfJITCodeUnwindingInfoRecord,
+ PerfJITCodeUnwindingInfoRecord> {
+public:
+ static size_t size(const PerfJITCodeUnwindingInfoRecord &Val) {
+ return SPSPerfJITCodeUnwindingInfoRecord::AsArgList::size(
+ Val.Prefix, Val.UnwindDataSize, Val.EHFrameHdrSize, Val.MappedSize,
+ Val.EHFrameHdrAddr, Val.EHFrameHdr, Val.EHFrameAddr);
+ }
+ static bool deserialize(SPSInputBuffer &IB,
+ PerfJITCodeUnwindingInfoRecord &Val) {
+ return SPSPerfJITCodeUnwindingInfoRecord::AsArgList::deserialize(
+ IB, Val.Prefix, Val.UnwindDataSize, Val.EHFrameHdrSize, Val.MappedSize,
+ Val.EHFrameHdrAddr, Val.EHFrameHdr, Val.EHFrameAddr);
+ }
+ static bool serialize(SPSOutputBuffer &OB,
+ const PerfJITCodeUnwindingInfoRecord &Val) {
+ return SPSPerfJITCodeUnwindingInfoRecord::AsArgList::serialize(
+ OB, Val.Prefix, Val.UnwindDataSize, Val.EHFrameHdrSize, Val.MappedSize,
+ Val.EHFrameHdrAddr, Val.EHFrameHdr, Val.EHFrameAddr);
+ }
+};
+
+using SPSPerfJITRecordBatch = SPSTuple<SPSSequence<SPSPerfJITCodeLoadRecord>,
+ SPSSequence<SPSPerfJITDebugInfoRecord>,
+ SPSPerfJITCodeUnwindingInfoRecord>;
+template <>
+class SPSSerializationTraits<SPSPerfJITRecordBatch, PerfJITRecordBatch> {
+public:
+ static size_t size(const PerfJITRecordBatch &Val) {
+ return SPSPerfJITRecordBatch::AsArgList::size(
+ Val.CodeLoadRecords, Val.DebugInfoRecords, Val.UnwindingRecord);
+ }
+ static bool deserialize(SPSInputBuffer &IB, PerfJITRecordBatch &Val) {
+ return SPSPerfJITRecordBatch::AsArgList::deserialize(
+ IB, Val.CodeLoadRecords, Val.DebugInfoRecords, Val.UnwindingRecord);
+ }
+ static bool serialize(SPSOutputBuffer &OB, const PerfJITRecordBatch &Val) {
+ return SPSPerfJITRecordBatch::AsArgList::serialize(
+ OB, Val.CodeLoadRecords, Val.DebugInfoRecords, Val.UnwindingRecord);
+ }
+};
+
+} // namespace shared
+
+} // namespace orc
+
+} // namespace llvm
+
+#endif
\ No newline at end of file
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h
new file mode 100644
index 000000000000000..1d8e33f8013b9f8
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h
@@ -0,0 +1,28 @@
+//===------- JITLoaderPerf.h --- Register profiler objects ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Register objects for access by profilers via the perf JIT interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_JITLOADERPERF_H
+#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_JITLOADERPERF_H
+
+#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
+#include <cstdint>
+
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size);
+
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size);
+
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size);
+
+#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_JITLOADERPERF_H
\ No newline at end of file
diff --git a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt
index c15c2eac0d044d2..3256ed8b7362c66 100644
--- a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt
+++ b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt
@@ -41,6 +41,7 @@ add_llvm_component_library(LLVMOrcJIT
ObjectTransformLayer.cpp
OrcABISupport.cpp
OrcV2CBindings.cpp
+ PerfSupportPlugin.cpp
RTDyldObjectLinkingLayer.cpp
SimpleRemoteEPC.cpp
Speculation.cpp
diff --git a/llvm/lib/ExecutionEngine/Orc/PerfSupportPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/PerfSupportPlugin.cpp
new file mode 100644
index 000000000000000..fd7acbd4446c524
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/PerfSupportPlugin.cpp
@@ -0,0 +1,297 @@
+//===----- PerfSupportPlugin.cpp --- Utils for perf support -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Handles support for registering code with perf
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/PerfSupportPlugin.h"
+
+#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
+
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/ExecutionEngine/JITLink/x86_64.h"
+#include "llvm/ExecutionEngine/Orc/LookupAndRecordAddrs.h"
+
+#define DEBUG_TYPE "orc"
+
+using namespace llvm;
+using namespace llvm::orc;
+using namespace llvm::jitlink;
+
+namespace {
+
+// Creates an EH frame header prepared for a 32-bit relative relocation
+// to the start of the .eh_frame section. Absolute injects a 64-bit absolute
+// address space offset 4 bytes from the start instead of 4 bytes
+Expected<std::string> createX64EHFrameHeader(Section &EHFrame,
+ support::endianness endianness,
+ bool absolute) {
+ uint8_t Version = 1;
+ uint8_t EhFramePtrEnc = 0;
+ if (absolute) {
+ EhFramePtrEnc |= dwarf::DW_EH_PE_sdata8 | dwarf::DW_EH_PE_absptr;
+ } else {
+ EhFramePtrEnc |= dwarf::DW_EH_PE_sdata4 | dwarf::DW_EH_PE_datarel;
+ }
+ uint8_t FDECountEnc = dwarf::DW_EH_PE_omit;
+ uint8_t TableEnc = dwarf::DW_EH_PE_omit;
+ // X86_64_64 relocation to the start of the .eh_frame section
+ uint32_t EHFrameRelocation = 0;
+ // uint32_t FDECount = 0;
+ // Skip the FDE binary search table
+ // We'd have to reprocess the CIEs to get this information,
+ // which seems like more trouble than it's worth
+ // TODO consider implementing this.
+ // binary search table goes here
+
+ size_t HeaderSize =
+ (sizeof(Version) + sizeof(EhFramePtrEnc) + sizeof(FDECountEnc) +
+ sizeof(TableEnc) +
+ (absolute ? sizeof(uint64_t) : sizeof(EHFrameRelocation)));
+ std::string HeaderContent(HeaderSize, '\0');
+ BinaryStreamWriter Writer(
+ MutableArrayRef<uint8_t>(
+ reinterpret_cast<uint8_t *>(HeaderContent.data()), HeaderSize),
+ endianness);
+ if (auto Err = Writer.writeInteger(Version))
+ return std::move(Err);
+ if (auto Err = Writer.writeInteger(EhFramePtrEnc))
+ return std::move(Err);
+ if (auto Err = Writer.writeInteger(FDECountEnc))
+ return std::move(Err);
+ if (auto Err = Writer.writeInteger(TableEnc))
+ return std::move(Err);
+ if (absolute) {
+ uint64_t EHFrameAddr = SectionRange(EHFrame).getStart().getValue();
+ if (auto Err = Writer.writeInteger(EHFrameAddr))
+ return std::move(Err);
+ } else {
+ if (auto Err = Writer.writeInteger(EHFrameRelocation))
+ return std::move(Err);
+ }
+ return HeaderContent;
+}
+
+constexpr StringRef RegisterPerfStartSymbolName =
+ "llvm_orc_registerJITLoaderPerfStart";
+constexpr StringRef RegisterPerfEndSymbolName =
+ "llvm_orc_registerJITLoaderPerfEnd";
+constexpr StringRef RegisterPerfImplSymbolName =
+ "llvm_orc_registerJITLoaderPerfImpl";
+
+static PerfJITCodeLoadRecord
+getCodeLoadRecord(const Symbol &Sym, std::atomic<uint64_t> &CodeIndex) {
+ PerfJITCodeLoadRecord Record;
+ auto Name = Sym.getName();
+ auto Addr = Sym.getAddress();
+ auto Size = Sym.getSize();
+ Record.Prefix.Id = PerfJITRecordType::JIT_CODE_LOAD;
+ // Runtime sets PID
+ Record.Pid = 0;
+ // Runtime sets TID
+ Record.Tid = 0;
+ Record.Vma = Addr.getValue();
+ Record.CodeAddr = Addr.getValue();
+ Record.CodeSize = Size;
+ Record.CodeIndex = CodeIndex++;
+ Record.Name = Name.str();
+ // Initialize last, once all the other fields are filled
+ Record.Prefix.TotalSize =
+ (2 * sizeof(uint32_t) // id, total_size
+ + sizeof(uint64_t) // timestamp
+ + 2 * sizeof(uint32_t) // pid, tid
+ + 4 * sizeof(uint64_t) // vma, code_addr, code_size, code_index
+ + Name.size() + 1 // symbol name
+ + Record.CodeSize // code
+ );
+ return Record;
+}
+
+static std::optional<PerfJITDebugInfoRecord>
+getDebugInfoRecord(const Symbol &Sym, DWARFContext *DC) {
+ if (!DC) {
+ LLVM_DEBUG(dbgs() << "No debug info available\n");
+ return std::nullopt;
+ }
+ auto &Section = Sym.getBlock().getSection();
+ auto Addr = Sym.getAddress();
+ auto Size = Sym.getSize();
+ auto SAddr = object::SectionedAddress{Addr.getValue(), Section.getOrdinal()};
+ LLVM_DEBUG(dbgs() << "Getting debug info for symbol " << Sym.getName()
+ << " at address " << Addr.getValue() << " with size "
+ << Size << "\n"
+ << "Section ordinal: " << Section.getOrdinal() << "\n");
+ auto LInfo = DC->getLineInfoForAddressRange(
+ SAddr, Size, DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath);
+ if (LInfo.empty()) {
+ // No line info available
+ LLVM_DEBUG(dbgs() << "No line info available\n");
+ return std::nullopt;
+ }
+ PerfJITDebugInfoRecord Record;
+ Record.Prefix.Id = PerfJITRecordType::JIT_CODE_DEBUG_INFO;
+ Record.CodeAddr = Addr.getValue();
+ for (const auto &Entry : LInfo) {
+ auto Addr = Entry.first;
+ // The function re-created by perf is preceded by a elf
+ // header. Need to adjust for that, otherwise the results are
+ // wrong.
+ Addr += 0x40;
+ Record.Entries.push_back({Addr, Entry.second.Line,
+ Entry.second.Discriminator,
+ Entry.second.FileName});
+ }
+ size_t EntriesBytes = (2 // record header
+ + 2 // record fields
+ ) *
+ sizeof(uint64_t);
+ for (const auto &Entry : Record.Entries) {
+ EntriesBytes +=
+ sizeof(uint64_t) + 2 * sizeof(uint32_t); // Addr, Line/Discrim
+ EntriesBytes += Entry.Name.size() + 1; // Name
+ }
+ Record.Prefix.TotalSize = EntriesBytes;
+ LLVM_DEBUG(dbgs() << "Created debug info record\n"
+ << "Total size: " << Record.Prefix.TotalSize << "\n"
+ << "Nr entries: " << Record.Entries.size() << "\n");
+ return Record;
+}
+
+static Expected<PerfJITCodeUnwindingInfoRecord>
+getUnwindingRecord(LinkGraph &G) {
+ PerfJITCodeUnwindingInfoRecord Record;
+ Record.Prefix.Id = PerfJITRecordType::JIT_CODE_UNWINDING_INFO;
+ Record.Prefix.TotalSize = 0;
+ auto Eh_frame = G.findSectionByName(".eh_frame");
+ if (!Eh_frame) {
+ LLVM_DEBUG(dbgs() << "No .eh_frame section found\n");
+ return Record;
+ }
+ if (!G.getTargetTriple().isOSBinFormatELF()) {
+ LLVM_DEBUG(dbgs() << "Not an ELF file, will not emit unwinding info\n");
+ return Record;
+ }
+ auto SR = SectionRange(*Eh_frame);
+ auto EHFrameSize = SR.getSize();
+ auto Eh_frame_hdr = G.findSectionByName(".eh_frame_hdr");
+ if (!Eh_frame_hdr) {
+ if (G.getTargetTriple().getArch() == Triple::x86_64) {
+ auto Hdr = createX64EHFrameHeader(*Eh_frame, G.getEndianness(), true);
+ if (!Hdr)
+ return Hdr.takeError();
+ Record.EHFrameHdr = std::move(*Hdr);
+ } else {
+ LLVM_DEBUG(dbgs() << "No .eh_frame_hdr section found\n");
+ return Record;
+ }
+ Record.EHFrameHdrAddr = 0;
+ Record.EHFrameHdrSize = Record.EHFrameHdr.size();
+ Record.UnwindDataSize = EHFrameSize + Record.EHFrameHdrSize;
+ Record.MappedSize = 0; // Because the EHFrame header was not mapped
+ } else {
+ auto SR = SectionRange(*Eh_frame_hdr);
+ Record.EHFrameHdrAddr = SR.getStart().getValue();
+ Record.EHFrameHdrSize = SR.getSize();
+ Record.UnwindDataSize = EHFrameSize + Record.EHFrameHdrSize;
+ Record.MappedSize = Record.UnwindDataSize;
+ }
+ Record.EHFrameAddr = SR.getStart().getValue();
+ Record.Prefix.TotalSize =
+ (2 * sizeof(uint32_t) // id, total_size
+ + sizeof(uint64_t) // timestamp
+ +
+ 3 * sizeof(uint64_t) // unwind_data_size, eh_frame_hdr_size, mapped_size
+ + Record.UnwindDataSize // eh_frame_hdr, eh_frame
+ );
+ LLVM_DEBUG(dbgs() << "Created unwind record\n"
+ << "Total size: " << Record.Prefix.TotalSize << "\n"
+ << "Unwind size: " << Record.UnwindDataSize << "\n"
+ << "EHFrame size: " << EHFrameSize << "\n"
+ << "EHFrameHdr size: " << Record.EHFrameHdrSize << "\n");
+ return Record;
+}
+
+static PerfJITRecordBatch getRecords(ExecutionSession &ES, LinkGraph &G,
+ DWARFContext *DC,
+ std::atomic<uint64_t> &CodeIndex,
+ bool EmitUnwindInfo) {
+ PerfJITRecordBatch Batch;
+ for (auto Sym : G.defined_symbols()) {
+ if (!Sym->hasName() || !Sym->isCallable())
+ continue;
+ auto DebugInfo = getDebugInfoRecord(*Sym, DC);
+ if (DebugInfo)
+ Batch.DebugInfoRecords.push_back(std::move(*DebugInfo));
+ Batch.CodeLoadRecords.push_back(getCodeLoadRecord(*Sym, CodeIndex));
+ }
+ if (EmitUnwindInfo) {
+ auto UWR = getUnwindingRecord(G);
+ if (!UWR) {
+ ES.reportError(UWR.takeError());
+ } else {
+ Batch.UnwindingRecord = std::move(*UWR);
+ }
+ } else {
+ Batch.UnwindingRecord.Prefix.TotalSize = 0;
+ }
+ return Batch;
+}
+} // namespace
+
+PerfSupportPlugin::PerfSupportPlugin(ExecutorProcessControl &EPC,
+ ExecutorAddr RegisterPerfStartAddr,
+ ExecutorAddr RegisterPerfEndAddr,
+ ExecutorAddr RegisterPerfImplAddr,
+ bool EmitUnwindInfo)
+ : EPC(EPC), RegisterPerfStartAddr(RegisterPerfStartAddr),
+ RegisterPerfEndAddr(RegisterPerfEndAddr),
+ RegisterPerfImplAddr(RegisterPerfImplAddr), CodeIndex(0),
+ EmitUnwindInfo(EmitUnwindInfo) {
+ cantFail(EPC.callSPSWrapper<void()>(RegisterPerfStartAddr));
+}
+PerfSupportPlugin::~PerfSupportPlugin() {
+ cantFail(EPC.callSPSWrapper<void()>(RegisterPerfEndAddr));
+}
+
+void PerfSupportPlugin::modifyPassConfig(MaterializationResponsibility &MR,
+ LinkGraph &G,
+ PassConfiguration &Config) {
+ Config.PostFixupPasses.push_back([this](LinkGraph &G) {
+ // TODO get an actual DWARFContext for line info
+ DWARFContext *DWC = nullptr;
+ auto Batch = getRecords(EPC.getExecutionSession(), G, DWC, CodeIndex,
+ EmitUnwindInfo);
+ G.allocActions().push_back(
+ {cantFail(shared::WrapperFunctionCall::Create<
+ shared::SPSArgList<shared::SPSPerfJITRecordBatch>>(
+ RegisterPerfImplAddr, Batch)),
+ {}});
+ return Error::success();
+ });
+}
+
+Expected<std::unique_ptr<PerfSupportPlugin>>
+PerfSupportPlugin::Create(ExecutorProcessControl &EPC, JITDylib &JD,
+ bool EmitUnwindInfo) {
+ if (!EPC.getTargetTriple().isOSBinFormatELF()) {
+ return make_error<StringError>(
+ "Perf support only available for ELF LinkGraphs!",
+ inconvertibleErrorCode());
+ }
+ auto &ES = EPC.getExecutionSession();
+ ExecutorAddr StartAddr, EndAddr, ImplAddr;
+ if (auto Err = lookupAndRecordAddrs(
+ ES, LookupKind::Static, makeJITDylibSearchOrder({&JD}),
+ {{ES.intern(RegisterPerfStartSymbolName), &StartAddr},
+ {ES.intern(RegisterPerfEndSymbolName), &EndAddr},
+ {ES.intern(RegisterPerfImplSymbolName), &ImplAddr}}))
+ return std::move(Err);
+ return std::make_unique<PerfSupportPlugin>(EPC, StartAddr, EndAddr, ImplAddr,
+ EmitUnwindInfo);
+}
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt
index d9cd7b6dad98896..f2005dc1775e3c8 100644
--- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt
@@ -5,6 +5,7 @@ endif()
add_llvm_component_library(LLVMOrcTargetProcess
ExecutorSharedMemoryMapperService.cpp
JITLoaderGDB.cpp
+ JITLoaderPerf.cpp
OrcRTBootstrap.cpp
RegisterEHFrames.cpp
SimpleExecutorDylibManager.cpp
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp
new file mode 100644
index 000000000000000..731b4a173850d6a
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp
@@ -0,0 +1,457 @@
+//===------- JITLoaderPerf.cpp - Register profiler objects ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Register objects for access by profilers via the perf JIT interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h"
+
+#include "llvm/ExecutionEngine/Orc/Shared/PerfSharedStructs.h"
+
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Threading.h"
+
+#include <mutex>
+#include <optional>
+
+#ifdef __linux__
+
+#include <sys/mman.h> // mmap()
+#include <time.h> // clock_gettime(), time(), localtime_r() */
+#include <unistd.h> // for read(), close()
+
+#define DEBUG_TYPE "orc"
+
+// language identifier (XXX: should we generate something better from debug
+// info?)
+#define JIT_LANG "llvm-IR"
+#define LLVM_PERF_JIT_MAGIC \
+ ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 | \
+ (uint32_t)'D')
+#define LLVM_PERF_JIT_VERSION 1
+
+using namespace llvm;
+using namespace llvm::orc;
+
+struct PerfState {
+ // cache lookups
+ uint32_t Pid;
+
+ // base directory for output data
+ std::string JitPath;
+
+ // output data stream, closed via Dumpstream
+ int DumpFd = -1;
+
+ // output data stream
+ std::unique_ptr<raw_fd_ostream> Dumpstream;
+
+ // perf mmap marker
+ void *MarkerAddr = NULL;
+};
+
+// prevent concurrent dumps from messing up the output file
+static std::mutex Mutex;
+static std::optional<PerfState> State;
+
+struct RecHeader {
+ uint32_t Id;
+ uint32_t TotalSize;
+ uint64_t Timestamp;
+};
+
+struct DIR {
+ RecHeader Prefix;
+ uint64_t CodeAddr;
+ uint64_t NrEntry;
+};
+
+struct DIE {
+ uint64_t CodeAddr;
+ uint32_t Line;
+ uint32_t Discrim;
+};
+
+struct CLR {
+ RecHeader Prefix;
+ uint32_t Pid;
+ uint32_t Tid;
+ uint64_t Vma;
+ uint64_t CodeAddr;
+ uint64_t CodeSize;
+ uint64_t CodeIndex;
+};
+
+struct UWR {
+ RecHeader Prefix;
+ uint64_t UnwindDataSize;
+ uint64_t EhFrameHeaderSize;
+ uint64_t MappedSize;
+};
+
+static inline uint64_t timespec_to_ns(const struct timespec *TS) {
+ const uint64_t NanoSecPerSec = 1000000000;
+ return ((uint64_t)TS->tv_sec * NanoSecPerSec) + TS->tv_nsec;
+}
+
+static inline uint64_t perf_get_timestamp() {
+ timespec TS;
+ if (clock_gettime(CLOCK_MONOTONIC, &TS))
+ return 0;
+
+ return timespec_to_ns(&TS);
+}
+
+static void writeDebugRecord(const PerfJITDebugInfoRecord &DebugRecord) {
+ assert(State && "PerfState not initialized");
+ LLVM_DEBUG(dbgs() << "Writing debug record with "
+ << DebugRecord.Entries.size() << " entries\n");
+ size_t Written = 0;
+ DIR Dir{RecHeader{static_cast<uint32_t>(DebugRecord.Prefix.Id),
+ DebugRecord.Prefix.TotalSize, perf_get_timestamp()},
+ DebugRecord.CodeAddr, DebugRecord.Entries.size()};
+ State->Dumpstream->write(reinterpret_cast<const char *>(&Dir), sizeof(Dir));
+ Written += sizeof(Dir);
+ for (auto &Die : DebugRecord.Entries) {
+ DIE d{Die.Addr, Die.Lineno, Die.Discrim};
+ State->Dumpstream->write(reinterpret_cast<const char *>(&d), sizeof(d));
+ State->Dumpstream->write(Die.Name.data(), Die.Name.size() + 1);
+ Written += sizeof(d) + Die.Name.size() + 1;
+ }
+ LLVM_DEBUG(dbgs() << "wrote " << Written << " bytes of debug info\n");
+}
+
+static void writeCodeRecord(const PerfJITCodeLoadRecord &CodeRecord) {
+ assert(State && "PerfState not initialized");
+ uint32_t Tid = get_threadid();
+ LLVM_DEBUG(dbgs() << "Writing code record with code size "
+ << CodeRecord.CodeSize << " and code index "
+ << CodeRecord.CodeIndex << "\n");
+ CLR Clr{RecHeader{static_cast<uint32_t>(CodeRecord.Prefix.Id),
+ CodeRecord.Prefix.TotalSize, perf_get_timestamp()},
+ State->Pid,
+ Tid,
+ CodeRecord.Vma,
+ CodeRecord.CodeAddr,
+ CodeRecord.CodeSize,
+ CodeRecord.CodeIndex};
+ LLVM_DEBUG(dbgs() << "wrote " << sizeof(Clr) << " bytes of CLR, "
+ << CodeRecord.Name.size() + 1 << " bytes of name, "
+ << CodeRecord.CodeSize << " bytes of code\n");
+ State->Dumpstream->write(reinterpret_cast<const char *>(&Clr), sizeof(Clr));
+ State->Dumpstream->write(CodeRecord.Name.data(), CodeRecord.Name.size() + 1);
+ State->Dumpstream->write((const char *)CodeRecord.CodeAddr,
+ CodeRecord.CodeSize);
+}
+
+static void
+writeUnwindRecord(const PerfJITCodeUnwindingInfoRecord &UnwindRecord) {
+ assert(State && "PerfState not initialized");
+ dbgs() << "Writing unwind record with unwind data size "
+ << UnwindRecord.UnwindDataSize << " and EH frame header size "
+ << UnwindRecord.EHFrameHdrSize << " and mapped size "
+ << UnwindRecord.MappedSize << "\n";
+ UWR Uwr{RecHeader{static_cast<uint32_t>(UnwindRecord.Prefix.Id),
+ UnwindRecord.Prefix.TotalSize, perf_get_timestamp()},
+ UnwindRecord.UnwindDataSize, UnwindRecord.EHFrameHdrSize,
+ UnwindRecord.MappedSize};
+ LLVM_DEBUG(dbgs() << "wrote " << sizeof(Uwr) << " bytes of UWR, "
+ << UnwindRecord.EHFrameHdrSize
+ << " bytes of EH frame header, "
+ << UnwindRecord.UnwindDataSize - UnwindRecord.EHFrameHdrSize
+ << " bytes of EH frame\n");
+ State->Dumpstream->write(reinterpret_cast<const char *>(&Uwr), sizeof(Uwr));
+ if (UnwindRecord.EHFrameHdrAddr)
+ State->Dumpstream->write((const char *)UnwindRecord.EHFrameHdrAddr,
+ UnwindRecord.EHFrameHdrSize);
+ else
+ State->Dumpstream->write(UnwindRecord.EHFrameHdr.data(),
+ UnwindRecord.EHFrameHdrSize);
+ State->Dumpstream->write((const char *)UnwindRecord.EHFrameAddr,
+ UnwindRecord.UnwindDataSize -
+ UnwindRecord.EHFrameHdrSize);
+}
+
+static Error registerJITLoaderPerfImpl(const PerfJITRecordBatch &Batch) {
+ if (!State)
+ return make_error<StringError>("PerfState not initialized",
+ inconvertibleErrorCode());
+
+ // Serialize the batch
+ std::lock_guard<std::mutex> Lock(Mutex);
+ if (Batch.UnwindingRecord.Prefix.TotalSize > 0)
+ writeUnwindRecord(Batch.UnwindingRecord);
+
+ for (const auto &DebugInfo : Batch.DebugInfoRecords)
+ writeDebugRecord(DebugInfo);
+
+ for (const auto &CodeLoad : Batch.CodeLoadRecords)
+ writeCodeRecord(CodeLoad);
+
+ State->Dumpstream->flush();
+
+ return Error::success();
+}
+
+struct Header {
+ uint32_t Magic; // characters "JiTD"
+ uint32_t Version; // header version
+ uint32_t TotalSize; // total size of header
+ uint32_t ElfMach; // elf mach target
+ uint32_t Pad1; // reserved
+ uint32_t Pid;
+ uint64_t Timestamp; // timestamp
+ uint64_t Flags; // flags
+};
+
+static Error OpenMarker(PerfState &State) {
+ // We mmap the jitdump to create an MMAP RECORD in perf.data file. The mmap
+ // is captured either live (perf record running when we mmap) or in deferred
+ // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump
+ // file for more meta data info about the jitted code. Perf report/annotate
+ // detect this special filename and process the jitdump file.
+ //
+ // Mapping must be PROT_EXEC to ensure it is captured by perf record
+ // even when not using -d option.
+ State.MarkerAddr =
+ ::mmap(NULL, sys::Process::getPageSizeEstimate(), PROT_READ | PROT_EXEC,
+ MAP_PRIVATE, State.DumpFd, 0);
+
+ if (State.MarkerAddr == MAP_FAILED)
+ return make_error<llvm::StringError>("could not mmap JIT marker",
+ inconvertibleErrorCode());
+
+ return Error::success();
+}
+
+void CloseMarker(PerfState &State) {
+ if (!State.MarkerAddr)
+ return;
+
+ munmap(State.MarkerAddr, sys::Process::getPageSizeEstimate());
+ State.MarkerAddr = nullptr;
+}
+
+static Expected<Header> FillMachine(PerfState &State) {
+ Header Hdr;
+ Hdr.Magic = LLVM_PERF_JIT_MAGIC;
+ Hdr.Version = LLVM_PERF_JIT_VERSION;
+ Hdr.TotalSize = sizeof(Hdr);
+ Hdr.Pid = State.Pid;
+ Hdr.Timestamp = perf_get_timestamp();
+
+ char Id[16];
+ struct {
+ uint16_t e_type;
+ uint16_t e_machine;
+ } Info;
+
+ size_t RequiredMemory = sizeof(Id) + sizeof(Info);
+
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
+ MemoryBuffer::getFileSlice("/proc/self/exe", RequiredMemory, 0);
+
+ // This'll not guarantee that enough data was actually read from the
+ // underlying file. Instead the trailing part of the buffer would be
+ // zeroed. Given the ELF signature check below that seems ok though,
+ // it's unlikely that the file ends just after that, and the
+ // consequence would just be that perf wouldn't recognize the
+ // signature.
+ if (!MB)
+ return make_error<llvm::StringError>("could not open /proc/self/exe",
+ MB.getError());
+
+ memcpy(&Id, (*MB)->getBufferStart(), sizeof(Id));
+ memcpy(&Info, (*MB)->getBufferStart() + sizeof(Id), sizeof(Info));
+
+ // check ELF signature
+ if (Id[0] != 0x7f || Id[1] != 'E' || Id[2] != 'L' || Id[3] != 'F')
+ return make_error<llvm::StringError>("invalid ELF signature",
+ inconvertibleErrorCode());
+
+ Hdr.ElfMach = Info.e_machine;
+
+ return Hdr;
+}
+
+static Error InitDebuggingDir(PerfState &State) {
+ time_t Time;
+ struct tm LocalTime;
+ char TimeBuffer[sizeof("YYYYMMDD")];
+ SmallString<64> Path;
+
+ // search for location to dump data to
+ if (const char *BaseDir = getenv("JITDUMPDIR"))
+ Path.append(BaseDir);
+ else if (!sys::path::home_directory(Path))
+ Path = ".";
+
+ // create debug directory
+ Path += "/.debug/jit/";
+ if (auto EC = sys::fs::create_directories(Path)) {
+ std::string ErrStr;
+ raw_string_ostream ErrStream(ErrStr);
+ ErrStream << "could not create jit cache directory " << Path << ": "
+ << EC.message() << "\n";
+ return make_error<StringError>(std::move(ErrStr), inconvertibleErrorCode());
+ }
+
+ // create unique directory for dump data related to this process
+ time(&Time);
+ localtime_r(&Time, &LocalTime);
+ strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime);
+ Path += JIT_LANG "-jit-";
+ Path += TimeBuffer;
+
+ SmallString<128> UniqueDebugDir;
+
+ using sys::fs::createUniqueDirectory;
+ if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) {
+ std::string ErrStr;
+ raw_string_ostream ErrStream(ErrStr);
+ ErrStream << "could not create unique jit cache directory "
+ << UniqueDebugDir << ": " << EC.message() << "\n";
+ return make_error<StringError>(std::move(ErrStr), inconvertibleErrorCode());
+ }
+
+ State.JitPath = std::string(UniqueDebugDir.str());
+
+ return Error::success();
+}
+
+static Error registerJITLoaderPerfStartImpl() {
+ PerfState Tentative;
+ Tentative.Pid = sys::Process::getProcessId();
+ // check if clock-source is supported
+ if (!perf_get_timestamp())
+ return make_error<StringError>("kernel does not support CLOCK_MONOTONIC",
+ inconvertibleErrorCode());
+
+ if (auto Err = InitDebuggingDir(Tentative))
+ return Err;
+
+ std::string Filename;
+ raw_string_ostream FilenameBuf(Filename);
+ FilenameBuf << Tentative.JitPath << "/jit-" << Tentative.Pid << ".dump";
+
+ // Need to open ourselves, because we need to hand the FD to OpenMarker() and
+ // raw_fd_ostream doesn't expose the FD.
+ using sys::fs::openFileForWrite;
+ if (auto EC = openFileForReadWrite(FilenameBuf.str(), Tentative.DumpFd,
+ sys::fs::CD_CreateNew, sys::fs::OF_None)) {
+ std::string ErrStr;
+ raw_string_ostream ErrStream(ErrStr);
+ ErrStream << "could not open JIT dump file " << FilenameBuf.str() << ": "
+ << EC.message() << "\n";
+ return make_error<StringError>(std::move(ErrStr), inconvertibleErrorCode());
+ }
+
+ Tentative.Dumpstream =
+ std::make_unique<raw_fd_ostream>(Tentative.DumpFd, true);
+
+ auto Header = FillMachine(Tentative);
+ if (!Header)
+ return Header.takeError();
+
+ // signal this process emits JIT information
+ if (auto Err = OpenMarker(Tentative))
+ return Err;
+
+ Tentative.Dumpstream->write(reinterpret_cast<const char *>(&Header.get()),
+ sizeof(*Header));
+
+ // Everything initialized, can do profiling now.
+ if (Tentative.Dumpstream->has_error())
+ return make_error<StringError>("could not write JIT dump header",
+ inconvertibleErrorCode());
+
+ State = std::move(Tentative);
+ return Error::success();
+}
+
+static Error registerJITLoaderPerfEndImpl() {
+ if (!State)
+ return make_error<StringError>("PerfState not initialized",
+ inconvertibleErrorCode());
+
+ RecHeader Close;
+ Close.Id = static_cast<uint32_t>(PerfJITRecordType::JIT_CODE_CLOSE);
+ Close.TotalSize = sizeof(Close);
+ Close.Timestamp = perf_get_timestamp();
+ State->Dumpstream->write(reinterpret_cast<const char *>(&Close),
+ sizeof(Close));
+ if (State->MarkerAddr)
+ CloseMarker(*State);
+
+ State.reset();
+ return Error::success();
+}
+
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) {
+ using namespace orc::shared;
+ return WrapperFunction<SPSError(SPSPerfJITRecordBatch)>::handle(
+ Data, Size, registerJITLoaderPerfImpl)
+ .release();
+}
+
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) {
+ using namespace orc::shared;
+ return WrapperFunction<SPSError()>::handle(Data, Size,
+ registerJITLoaderPerfStartImpl)
+ .release();
+}
+
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) {
+ using namespace orc::shared;
+ return WrapperFunction<SPSError()>::handle(Data, Size,
+ registerJITLoaderPerfEndImpl)
+ .release();
+}
+
+#else
+
+using namespace llvm;
+using namespace llvm::orc;
+
+static Error badOS() {
+ using namespace llvm;
+ return llvm::make_error<StringError>(
+ "unsupported OS (perf support is only available on linux!)",
+ inconvertibleErrorCode());
+}
+
+static Error badOSBatch(PerfJITRecordBatch &Batch) { return badOS(); }
+
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) {
+ using namespace shared;
+ return WrapperFunction<SPSError(SPSPerfJITRecordBatch)>::handle(Data, Size,
+ badOSBatch)
+ .release();
+}
+
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) {
+ using namespace shared;
+ return WrapperFunction<SPSError()>::handle(Data, Size, badOS).release();
+}
+
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) {
+ using namespace shared;
+ return WrapperFunction<SPSError()>::handle(Data, Size, badOS).release();
+}
+
+#endif
diff --git a/llvm/test/ExecutionEngine/JITLink/x86-64/ELF_perf.s b/llvm/test/ExecutionEngine/JITLink/x86-64/ELF_perf.s
new file mode 100644
index 000000000000000..6eb612acd5d6e4a
--- /dev/null
+++ b/llvm/test/ExecutionEngine/JITLink/x86-64/ELF_perf.s
@@ -0,0 +1,204 @@
+# REQUIRES: native && x86_64-linux
+
+# RUN: rm -rf %t && mkdir -p %t
+# RUN: llvm-mc -triple=x86_64-unknown-linux -position-independent \
+# RUN: -filetype=obj -o %t/ELF_x86-64_perf.o %s
+# RUN: JITDUMPDIR="%t" llvm-jitlink -perf-support \
+# RUN: %t/ELF_x86-64_perf.o
+# RUN: test -f %t/.debug/jit/llvm-IR-jit-*/jit-*.dump
+
+# Test ELF perf support for code load records and unwind info
+
+ .text
+ .file "example.c"
+ .section .text.source,"ax", at progbits
+ .globl source # -- Begin function source
+ .p2align 4, 0x90
+ .type source, at function
+source: # @source
+.Lfunc_begin0:
+ .file 1 "/app" "example.c"
+ .loc 1 1 0 # example.c:1:0
+ .cfi_startproc
+# %bb.0:
+ .loc 1 2 5 prologue_end # example.c:2:5
+ movl $1, %eax
+ retq
+.Ltmp0:
+.Lfunc_end0:
+ .size source, .Lfunc_end0-source
+ .cfi_endproc
+ # -- End function
+ .section .text.passthrough,"ax", at progbits
+ .globl passthrough # -- Begin function passthrough
+ .p2align 4, 0x90
+ .type passthrough, at function
+passthrough: # @passthrough
+.Lfunc_begin1:
+ .loc 1 5 0 # example.c:5:0
+ .cfi_startproc
+# %bb.0:
+ .loc 1 6 5 prologue_end # example.c:6:5
+ movl $1, %eax
+ retq
+.Ltmp1:
+.Lfunc_end1:
+ .size passthrough, .Lfunc_end1-passthrough
+ .cfi_endproc
+ # -- End function
+ .section .text.main,"ax", at progbits
+ .globl main # -- Begin function main
+ .p2align 4, 0x90
+ .type main, at function
+main: # @main
+.Lfunc_begin2:
+ .loc 1 9 0 # example.c:9:0
+ .cfi_startproc
+# %bb.0:
+ .loc 1 10 5 prologue_end # example.c:10:5
+ xorl %eax, %eax
+ retq
+.Ltmp2:
+.Lfunc_end2:
+ .size main, .Lfunc_end2-main
+ .cfi_endproc
+ # -- End function
+ .section .debug_abbrev,"", at progbits
+ .byte 1 # Abbreviation Code
+ .byte 17 # DW_TAG_compile_unit
+ .byte 1 # DW_CHILDREN_yes
+ .byte 37 # DW_AT_producer
+ .byte 14 # DW_FORM_strp
+ .byte 19 # DW_AT_language
+ .byte 5 # DW_FORM_data2
+ .byte 3 # DW_AT_name
+ .byte 14 # DW_FORM_strp
+ .byte 16 # DW_AT_stmt_list
+ .byte 23 # DW_FORM_sec_offset
+ .byte 27 # DW_AT_comp_dir
+ .byte 14 # DW_FORM_strp
+ .byte 17 # DW_AT_low_pc
+ .byte 1 # DW_FORM_addr
+ .byte 85 # DW_AT_ranges
+ .byte 23 # DW_FORM_sec_offset
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 2 # Abbreviation Code
+ .byte 46 # DW_TAG_subprogram
+ .byte 0 # DW_CHILDREN_no
+ .byte 17 # DW_AT_low_pc
+ .byte 1 # DW_FORM_addr
+ .byte 18 # DW_AT_high_pc
+ .byte 6 # DW_FORM_data4
+ .byte 64 # DW_AT_frame_base
+ .byte 24 # DW_FORM_exprloc
+ .ascii "\227B" # DW_AT_GNU_all_call_sites
+ .byte 25 # DW_FORM_flag_present
+ .byte 3 # DW_AT_name
+ .byte 14 # DW_FORM_strp
+ .byte 58 # DW_AT_decl_file
+ .byte 11 # DW_FORM_data1
+ .byte 59 # DW_AT_decl_line
+ .byte 11 # DW_FORM_data1
+ .byte 73 # DW_AT_type
+ .byte 19 # DW_FORM_ref4
+ .byte 63 # DW_AT_external
+ .byte 25 # DW_FORM_flag_present
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 3 # Abbreviation Code
+ .byte 36 # DW_TAG_base_type
+ .byte 0 # DW_CHILDREN_no
+ .byte 3 # DW_AT_name
+ .byte 14 # DW_FORM_strp
+ .byte 62 # DW_AT_encoding
+ .byte 11 # DW_FORM_data1
+ .byte 11 # DW_AT_byte_size
+ .byte 11 # DW_FORM_data1
+ .byte 0 # EOM(1)
+ .byte 0 # EOM(2)
+ .byte 0 # EOM(3)
+ .section .debug_info,"", at progbits
+.Lcu_begin0:
+ .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+ .short 4 # DWARF version number
+ .long .debug_abbrev # Offset Into Abbrev. Section
+ .byte 8 # Address Size (in bytes)
+ .byte 1 # Abbrev [1] 0xb:0x72 DW_TAG_compile_unit
+ .long .Linfo_string0 # DW_AT_producer
+ .short 12 # DW_AT_language
+ .long .Linfo_string1 # DW_AT_name
+ .long .Lline_table_start0 # DW_AT_stmt_list
+ .long .Linfo_string2 # DW_AT_comp_dir
+ .quad 0 # DW_AT_low_pc
+ .long .Ldebug_ranges0 # DW_AT_ranges
+ .byte 2 # Abbrev [2] 0x2a:0x19 DW_TAG_subprogram
+ .quad .Lfunc_begin0 # DW_AT_low_pc
+ .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc
+ .byte 1 # DW_AT_frame_base
+ .byte 87
+ # DW_AT_GNU_all_call_sites
+ .long .Linfo_string3 # DW_AT_name
+ .byte 1 # DW_AT_decl_file
+ .byte 1 # DW_AT_decl_line
+ .long 117 # DW_AT_type
+ # DW_AT_external
+ .byte 2 # Abbrev [2] 0x43:0x19 DW_TAG_subprogram
+ .quad .Lfunc_begin1 # DW_AT_low_pc
+ .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc
+ .byte 1 # DW_AT_frame_base
+ .byte 87
+ # DW_AT_GNU_all_call_sites
+ .long .Linfo_string5 # DW_AT_name
+ .byte 1 # DW_AT_decl_file
+ .byte 5 # DW_AT_decl_line
+ .long 117 # DW_AT_type
+ # DW_AT_external
+ .byte 2 # Abbrev [2] 0x5c:0x19 DW_TAG_subprogram
+ .quad .Lfunc_begin2 # DW_AT_low_pc
+ .long .Lfunc_end2-.Lfunc_begin2 # DW_AT_high_pc
+ .byte 1 # DW_AT_frame_base
+ .byte 87
+ # DW_AT_GNU_all_call_sites
+ .long .Linfo_string6 # DW_AT_name
+ .byte 1 # DW_AT_decl_file
+ .byte 9 # DW_AT_decl_line
+ .long 117 # DW_AT_type
+ # DW_AT_external
+ .byte 3 # Abbrev [3] 0x75:0x7 DW_TAG_base_type
+ .long .Linfo_string4 # DW_AT_name
+ .byte 5 # DW_AT_encoding
+ .byte 4 # DW_AT_byte_size
+ .byte 0 # End Of Children Mark
+.Ldebug_info_end0:
+ .section .debug_ranges,"", at progbits
+.Ldebug_ranges0:
+ .quad .Lfunc_begin0
+ .quad .Lfunc_end0
+ .quad .Lfunc_begin1
+ .quad .Lfunc_end1
+ .quad .Lfunc_begin2
+ .quad .Lfunc_end2
+ .quad 0
+ .quad 0
+ .section .debug_str,"MS", at progbits,1
+.Linfo_string0:
+ .asciz "clang version 15.0.0 (https://github.com/llvm/llvm-project.git 4ba6a9c9f65bbc8bd06e3652cb20fd4dfc846137)" # string offset=0
+.Linfo_string1:
+ .asciz "/app/example.c" # string offset=105
+.Linfo_string2:
+ .asciz "/app" # string offset=120
+.Linfo_string3:
+ .asciz "source" # string offset=125
+.Linfo_string4:
+ .asciz "int" # string offset=132
+.Linfo_string5:
+ .asciz "passthrough" # string offset=136
+.Linfo_string6:
+ .asciz "main" # string offset=148
+ .ident "clang version 15.0.0 (https://github.com/llvm/llvm-project.git 4ba6a9c9f65bbc8bd06e3652cb20fd4dfc846137)"
+ .section ".note.GNU-stack","", at progbits
+ .addrsig
+ .section .debug_line,"", at progbits
+.Lline_table_start0:
diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp
index 64806b554ee7c0b..42ef651851a4ac5 100644
--- a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp
+++ b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp
@@ -28,8 +28,10 @@
#include "llvm/ExecutionEngine/Orc/MachOPlatform.h"
#include "llvm/ExecutionEngine/Orc/MapperJITLinkMemoryManager.h"
#include "llvm/ExecutionEngine/Orc/ObjectFileInterface.h"
+#include "llvm/ExecutionEngine/Orc/PerfSupportPlugin.h"
#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
#include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h"
#include "llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@@ -140,6 +142,11 @@ static cl::opt<bool>
cl::desc("Enable debugger suppport (default = !-noexec)"),
cl::init(true), cl::Hidden, cl::cat(JITLinkCategory));
+static cl::opt<bool> PerfSupport("perf-support",
+ cl::desc("Enable perf profiling support"),
+ cl::init(false), cl::Hidden,
+ cl::cat(JITLinkCategory));
+
static cl::opt<bool>
NoProcessSymbols("no-process-syms",
cl::desc("Do not resolve to llvm-jitlink process symbols"),
@@ -243,10 +250,14 @@ static cl::opt<bool> UseSharedMemory(
static ExitOnError ExitOnErr;
static LLVM_ATTRIBUTE_USED void linkComponents() {
- errs() << (void *)&llvm_orc_registerEHFrameSectionWrapper
- << (void *)&llvm_orc_deregisterEHFrameSectionWrapper
- << (void *)&llvm_orc_registerJITLoaderGDBWrapper
- << (void *)&llvm_orc_registerJITLoaderGDBAllocAction;
+ errs() << "Linking in runtime functions\n"
+ << (void *)&llvm_orc_registerEHFrameSectionWrapper << '\n'
+ << (void *)&llvm_orc_deregisterEHFrameSectionWrapper << '\n'
+ << (void *)&llvm_orc_registerJITLoaderGDBWrapper << '\n'
+ << (void *)&llvm_orc_registerJITLoaderGDBAllocAction << '\n'
+ << (void *)&llvm_orc_registerJITLoaderPerfStart << '\n'
+ << (void *)&llvm_orc_registerJITLoaderPerfEnd << '\n'
+ << (void *)&llvm_orc_registerJITLoaderPerfImpl << '\n';
}
static bool UseTestResultOverride = false;
@@ -979,6 +990,10 @@ Session::Session(std::unique_ptr<ExecutorProcessControl> EPC, Error &Err)
ObjLayer.addPlugin(ExitOnErr(
GDBJITDebugInfoRegistrationPlugin::Create(this->ES, *MainJD, TT)));
+ if (PerfSupport && TT.isOSBinFormatELF())
+ ObjLayer.addPlugin(ExitOnErr(PerfSupportPlugin::Create(
+ this->ES.getExecutorProcessControl(), *MainJD, true)));
+
// Set up the platform.
if (TT.isOSBinFormatMachO() && !OrcRuntime.empty()) {
if (auto P =
More information about the llvm-commits
mailing list