[llvm-dev] Interest in integrating a linux perf JITEventListener?

Andres Freund via llvm-dev llvm-dev at lists.llvm.org
Sun Mar 25 23:29:11 PDT 2018


Hi,

On 2017-02-01 23:20:40 -0800, Andres Freund wrote:
> > Can you give a pointer to the patch so that I can assess the rough
> > complexity?  If it's simple enough, I'd be happy to help get it
> > reviewed and in.  If it's more complicated, I probably won't have the
> > time to assist.
> 
> Patch (and a prerequisite) attached. Took me a while to get it cleaned
> up to some degree - I'm a C programmer these days, and a lot of my C++
> knowledge has been replaced by other things...  It's still not super
> clean, but I think in a good enough state for you to estimate
> complexity.

After considerable further cleanup I've submitted this as: https://reviews.llvm.org/D44892
I'd greatly appreciate reviews!

> What do you think?  I've below included some example output to show
> what's going on.
> 
> A random example (source c file also attached):
> # generate some IR, with debug info
> clang -ggdb -S -c -emit-llvm expensive_loop.c -o tmp/expensive_loop.ll
> # record profile ('-k1; is the clocksource, -g hierarchical)
> perf record -g -k 1 lli -jit-kind=mcjit /tmp/expensive_loop.ll 1
> # enrich profile with JIT information emitted due to patch
> perf inject --jit -i perf.data -o perf.jit.data
> # and show information
> perf report -i perf.jit.data
> 
> Example output:
> Samples: 3K of event 'cycles:ppp', Event count (approx.): 3127026392
>   Overhead  Command  Shared Object                      Symbol
> -   93.41%  lli      jitted-27248-2.so                  [.] stupid_isprime
>      stupid_isprime
>      main
>      llvm::MCJIT::runFunction
>      llvm::ExecutionEngine::runFunctionAsMain
>      main
>      __libc_start_main
>      0xec26258d4c544155
> +    0.55%  lli      ld-2.24.so                         [.] do_lookup_x
> +    0.22%  lli      ld-2.24.so                         [.] _dl_lookup_symbol_x
> +    0.17%  lli      [kernel.vmlinux]                   [k] unmap_page_range
> +    0.16%  lli      ld-2.24.so                         [.] _dl_fixup
> 
> 
> Instruction level view:
> 
>        │      Disassembly of section .text:
>>        │      0000000000000040 <stupid_isprime>:
>        │      stupid_isprime():
>        │      #include <stdint.h>
>        │      #include <stdbool.h>
>>        │      bool stupid_isprime(uint64_t num)
>        │      {
>        │        push   %rbp
>        │        mov    %rsp,%rbp
>        │        mov    %rdi,-0x10(%rbp)
>        │              if (num == 2)
>        │        cmp    $0x2,%rdi
>        │      ↓ jne    14
>        │1  e:┌─→movb   $0x1,-0x1(%rbp)
>        │     │↓ jmp    55
>        │     │                return true;
>        │     │        if (num < 1 || num % 2 == 0)
>        │1 14:│  cmpq   $0x0,-0x10(%rbp)
>        │     │↓ je     51
>        │     │  testb  $0x1,-0x10(%rbp)
>        │     │↓ je     51
>        │     │                return false;
>        │     │        for(uint64_t i = 3; i < num / 2; i+= 2) {
>        │     │  movq   $0x3,-0x18(%rbp)
>        │     │↓ jmp    35
>        │     │  nop
>        │1 30:│  addq   $0x2,-0x18(%rbp)
>   4.03 │1 35:│  mov    -0x10(%rbp),%rax
>   0.06 │     │  shr    %rax
>        │     │  cmp    %rax,-0x18(%rbp)
>        │     └──jae    e
>        │                      if (num % i == 0)
>   3.74 │        mov    -0x10(%rbp),%rax
>   0.09 │        xor    %edx,%edx
>  91.82 │        divq   -0x18(%rbp)
>        │        test   %rdx,%rdx
>   0.23 │      ↑ jne    30
>   0.03 │2 51:   movb   $0x0,-0x1(%rbp)
>        │1 55:   mov    -0x1(%rbp),%al
>        │        pop    %rbp
>        │      ← retq
> 
> (the missing colors make it harder to see what's going on)

> From d4ad23177254577dde59d483f001936c693d8dea Mon Sep 17 00:00:00 2001
> From: Andres Freund <andres at anarazel.de>
> Date: Wed, 1 Feb 2017 21:18:54 -0800
> Subject: [PATCH 1/2] [MCJIT] Call JIT notifiers only after code sections are
>  ready.
> 
> Previously JIT notifiers were called before relocations were
> performed (leading to ominious function call of "0"), and before
> memory marked executable (confusing some profilers).
> 
> Move notifications to finalizeLoadedModules().
> ---
>  lib/ExecutionEngine/MCJIT/MCJIT.cpp | 16 ++++++++++++++--
>  lib/ExecutionEngine/MCJIT/MCJIT.h   |  2 ++
>  2 files changed, 16 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
> index ff8749fbfed..ebe475d9f0b 100644
> --- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp
> +++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
> @@ -222,8 +222,10 @@ void MCJIT::generateCodeForModule(Module *M) {
>    if (Dyld.hasError())
>      report_fatal_error(Dyld.getErrorString());
>  
> -  NotifyObjectEmitted(*LoadedObject.get(), *L);
> -
> +  // Can't call notifiers yet as relocations have not yet been performed, and
> +  // memory hasn't been marked executable.
> +  PendingLoadedObjects.push_back(LoadedObject->get());
> +  PendingLoadedObjectInfos.push_back(std::move(L));
>    Buffers.push_back(std::move(ObjectToLoad));
>    LoadedObjects.push_back(std::move(*LoadedObject));
>  
> @@ -243,6 +245,16 @@ void MCJIT::finalizeLoadedModules() {
>  
>    // Set page permissions.
>    MemMgr->finalizeMemory();
> +
> +  // Notify listeners about loaded objects now that memory is marked executable
> +  // and relocations have been performed.
> +  for (size_t i = 0; i < PendingLoadedObjects.size(); i++) {
> +    auto &Obj = PendingLoadedObjects[i];
> +    auto &Info = PendingLoadedObjectInfos[i];
> +    NotifyObjectEmitted(*Obj, *Info);
> +  }
> +  PendingLoadedObjects.clear();
> +  PendingLoadedObjectInfos.clear();
>  }
>  
>  // FIXME: Rename this.
> diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h
> index daf578f5daa..418578fc7a3 100644
> --- a/lib/ExecutionEngine/MCJIT/MCJIT.h
> +++ b/lib/ExecutionEngine/MCJIT/MCJIT.h
> @@ -189,6 +189,8 @@ class MCJIT : public ExecutionEngine {
>    SmallVector<std::unique_ptr<MemoryBuffer>, 2> Buffers;
>  
>    SmallVector<std::unique_ptr<object::ObjectFile>, 2> LoadedObjects;
> +  SmallVector<object::ObjectFile*, 2> PendingLoadedObjects;
> +  SmallVector<std::unique_ptr<RuntimeDyld::LoadedObjectInfo>, 2> PendingLoadedObjectInfos;
>  
>    // An optional ObjectCache to be notified of compiled objects and used to
>    // perform lookup of pre-compiled code to avoid re-compilation.
> -- 
> 2.11.0.22.g8d7a455.dirty
> 

> From debc641fbc5980461f8d36e534d5a6de70e0a293 Mon Sep 17 00:00:00 2001
> From: Andres Freund <andres at anarazel.de>
> Date: Wed, 1 Feb 2017 23:10:45 -0800
> Subject: [PATCH 2/2] Add PerfJITEventListener for perf profiling support.
> 
> ---
>  CMakeLists.txt                                     |  13 +
>  include/llvm/Config/config.h.cmake                 |   3 +
>  include/llvm/Config/llvm-config.h.cmake            |   3 +
>  include/llvm/ExecutionEngine/JITEventListener.h    |   9 +
>  lib/ExecutionEngine/CMakeLists.txt                 |   4 +
>  lib/ExecutionEngine/LLVMBuild.txt                  |   2 +-
>  lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt   |   5 +
>  .../{ => PerfJITEvents}/LLVMBuild.txt              |  13 +-
>  .../PerfJITEvents/PerfJITEventListener.cpp         | 530 +++++++++++++++++++++
>  tools/lli/CMakeLists.txt                           |   9 +
>  tools/lli/lli.cpp                                  |   2 +
>  11 files changed, 584 insertions(+), 9 deletions(-)
>  create mode 100644 lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt
>  copy lib/ExecutionEngine/{ => PerfJITEvents}/LLVMBuild.txt (65%)
>  create mode 100644 lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
> 
> diff --git a/CMakeLists.txt b/CMakeLists.txt
> index 5cb24ea7d78..6ce14799725 100644
> --- a/CMakeLists.txt
> +++ b/CMakeLists.txt
> @@ -445,6 +445,16 @@ if( LLVM_USE_OPROFILE )
>    endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
>  endif( LLVM_USE_OPROFILE )
>  
> +option(LLVM_USE_PERF
> +  "Use perf JIT interface to inform perf about JIT code" OFF)
> +
> +# If enabled, verify we are on a platform that supports perf.
> +if( LLVM_USE_PERF )
> +  if( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
> +    message(FATAL_ERROR "perf support is available on Linux only.")
> +  endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
> +endif( LLVM_USE_PERF )
> +
>  set(LLVM_USE_SANITIZER "" CACHE STRING
>    "Define the sanitizer used to build binaries and tests.")
>  
> @@ -638,6 +648,9 @@ endif (LLVM_USE_INTEL_JITEVENTS)
>  if (LLVM_USE_OPROFILE)
>    set(LLVMOPTIONALCOMPONENTS ${LLVMOPTIONALCOMPONENTS} OProfileJIT)
>  endif (LLVM_USE_OPROFILE)
> +if (LLVM_USE_PERF)
> +  set(LLVMOPTIONALCOMPONENTS PerfJITEvents)
> +endif (LLVM_USE_PERF)
>  
>  message(STATUS "Constructing LLVMBuild project information")
>  execute_process(
> diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake
> index ff41d75373c..cdc14aebe0f 100644
> --- a/include/llvm/Config/config.h.cmake
> +++ b/include/llvm/Config/config.h.cmake
> @@ -384,6 +384,9 @@
>  /* Define if we have the oprofile JIT-support library */
>  #cmakedefine01 LLVM_USE_OPROFILE
>  
> +/* Define if we have the perf JIT-support library */
> +#cmakedefine01 LLVM_USE_PERF
> +
>  /* LLVM version information */
>  #cmakedefine LLVM_VERSION_INFO "${LLVM_VERSION_INFO}"
>  
> diff --git a/include/llvm/Config/llvm-config.h.cmake b/include/llvm/Config/llvm-config.h.cmake
> index 35999705761..09c807cba8c 100644
> --- a/include/llvm/Config/llvm-config.h.cmake
> +++ b/include/llvm/Config/llvm-config.h.cmake
> @@ -65,6 +65,9 @@
>  /* Define if we have the oprofile JIT-support library */
>  #cmakedefine01 LLVM_USE_OPROFILE
>  
> +/* Define if we have the perf JIT-support library */
> +#cmakedefine01 LLVM_USE_PERF
> +
>  /* Major version of the LLVM API */
>  #define LLVM_VERSION_MAJOR ${LLVM_VERSION_MAJOR}
>  
> diff --git a/include/llvm/ExecutionEngine/JITEventListener.h b/include/llvm/ExecutionEngine/JITEventListener.h
> index 94ec4e36a19..b6916bf55a1 100644
> --- a/include/llvm/ExecutionEngine/JITEventListener.h
> +++ b/include/llvm/ExecutionEngine/JITEventListener.h
> @@ -113,6 +113,15 @@ public:
>    }
>  #endif // USE_OPROFILE
>  
> +#ifdef LLVM_USE_PERF
> +  static JITEventListener *createPerfJITEventListener();
> +#else
> +  static JITEventListener *createPerfJITEventListener()
> +  {
> +    return nullptr;
> +  }
> +#endif // USE_PERF
> +
>  private:
>    virtual void anchor();
>  };
> diff --git a/lib/ExecutionEngine/CMakeLists.txt b/lib/ExecutionEngine/CMakeLists.txt
> index 2d9337bbefd..6092b4d1100 100644
> --- a/lib/ExecutionEngine/CMakeLists.txt
> +++ b/lib/ExecutionEngine/CMakeLists.txt
> @@ -26,3 +26,7 @@ endif( LLVM_USE_OPROFILE )
>  if( LLVM_USE_INTEL_JITEVENTS )
>    add_subdirectory(IntelJITEvents)
>  endif( LLVM_USE_INTEL_JITEVENTS )
> +
> +if( LLVM_USE_PERF )
> +  add_subdirectory(PerfJITEvents)
> +endif( LLVM_USE_PERF )
> diff --git a/lib/ExecutionEngine/LLVMBuild.txt b/lib/ExecutionEngine/LLVMBuild.txt
> index 9d29a41f504..b6e1bda6a51 100644
> --- a/lib/ExecutionEngine/LLVMBuild.txt
> +++ b/lib/ExecutionEngine/LLVMBuild.txt
> @@ -16,7 +16,7 @@
>  ;===------------------------------------------------------------------------===;
>  
>  [common]
> -subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT Orc
> +subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT Orc PerfJITEvents
>  
>  [component_0]
>  type = Library
> diff --git a/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt b/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt
> new file mode 100644
> index 00000000000..136cc429d02
> --- /dev/null
> +++ b/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt
> @@ -0,0 +1,5 @@
> +add_llvm_library(LLVMPerfJITEvents
> +  PerfJITEventListener.cpp
> +  )
> +
> +add_dependencies(LLVMPerfJITEvents LLVMCodeGen)
> diff --git a/lib/ExecutionEngine/LLVMBuild.txt b/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt
> similarity index 65%
> copy from lib/ExecutionEngine/LLVMBuild.txt
> +++ b/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt
> @@ -1,4 +1,4 @@
> -;===- ./lib/ExecutionEngine/LLVMBuild.txt ----------------------*- Conf -*--===;
> +;===- ./lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt ----------------*- Conf -*--===;
>  ;
>  ;                     The LLVM Compiler Infrastructure
>  ;
> @@ -15,11 +15,8 @@
>  ;
>  ;===------------------------------------------------------------------------===;
>  
> -[common]
> -subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT Orc
> -
>  [component_0]
> -type = Library
> -name = ExecutionEngine
> -parent = Libraries
> -required_libraries = Core MC Object RuntimeDyld Support Target
> +type = OptionalLibrary
> +name = PerfJITEvents
> +parent = ExecutionEngine
> +required_libraries = CodeGen Core DebugInfoDWARF Support Object ExecutionEngine
> diff --git a/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp b/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
> new file mode 100644
> index 00000000000..301a8bc70e1
> --- /dev/null
> +++ b/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
> @@ -0,0 +1,530 @@
> +//===-- PerfJITEventListener.cpp - Tell Linux's perf about JITted code ----===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +//
> +// This file defines a JITEventListener object that tells perf JITted functions,
> +// including source line information.
> +//
> +// Documentation for perf jit integration is available at:
> +// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jitdump-specification.txt
> +// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jit-interface.txt
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include "llvm/Config/config.h"
> +
> +#include <unistd.h> // for getpid(), sysconf()
> +#include <syscall.h> // for gettid() */
> +#include <time.h> // clock_gettime(), time(), localtime_r() */
> +#include <sys/mman.h> // mmap() */
> +#include <sys/types.h> // getpid(), open()
> +#include <sys/stat.h> // open()
> +#include <fcntl.h> // open()
> +
> +#include "llvm/ExecutionEngine/JITEventListener.h"
> +
> +#include "llvm/ADT/Twine.h"
> +#include "llvm/DebugInfo/DWARF/DWARFContext.h"
> +#include "llvm/Object/ObjectFile.h"
> +#include "llvm/Object/SymbolSize.h"
> +#include "llvm/Support/Debug.h"
> +#include "llvm/Support/Errno.h"
> +#include "llvm/Support/FileSystem.h"
> +#include "llvm/Support/Mutex.h"
> +#include "llvm/Support/MutexGuard.h"
> +#include "llvm/Support/raw_ostream.h"
> +
> +using namespace llvm;
> +using namespace llvm::object;
> +typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind;
> +
> +namespace {
> +
> +// language identifier (XXX: should we generate something better from debug info?)
> +#define JIT_LANG "llvm-IR"
> +#define LLVM_PERF_JIT_MAGIC ((uint32_t) 'J' << 24 | (uint32_t) 'i' << 16 | (uint32_t) 'T' << 8 | (uint32_t) 'D')
> +#define LLVM_PERF_JIT_VERSION 2
> +
> +/* bit 0: set if the jitdump file is using an architecture-specific timestamp clock source */
> +#define JITDUMP_FLAGS_ARCH_TIMESTAMP  (1ULL << 0)
> +
> +struct LLVMPerfJitHeader;
> +
> +class PerfJITEventListener : public JITEventListener {
> +public:
> +  PerfJITEventListener();
> +  ~PerfJITEventListener() {
> +    if (MarkerAddr)
> +      CloseMarker();
> +  }
> +
> +  void NotifyObjectEmitted(const ObjectFile &Obj,
> +                           const RuntimeDyld::LoadedObjectInfo &L) override;
> +
> +  void NotifyFreeingObject(const ObjectFile &Obj) override;
> +
> +private:
> +
> +  bool InitDebuggingDir();
> +  bool OpenMarker();
> +  void CloseMarker();
> +  bool FillMachine(LLVMPerfJitHeader &hdr);
> +
> +  void NotifyCode(Expected<llvm::StringRef> &Symbol, uint64_t CodeAddr, uint64_t CodeSize);
> +  void NotifyDebug(uint64_t CodeAddr, DILineInfoTable Lines);
> +
> +  // output data stream
> +  std::unique_ptr<raw_fd_ostream> Dumpstream;
> +
> +  // output data stream, lifeliness managed via Dumpstream
> +  int Fd;
> +
> +  // prevent concurrent dumps from messing up the output file
> +  sys::Mutex Mutex;
> +
> +  // cache lookups
> +  pid_t Pid;
> +
> +  // base directory for output data
> +  std::string JitPath;
> +
> +  // perf mmap marker
> +  void *MarkerAddr = NULL;
> +
> +  // perf support ready
> +  bool SuccessfullyInitialized = false;
> +};
> +
> +// The following are POD struct definitions from the perf jit specification
> +
> +enum LLVMPerfJitRecordType {
> +  JIT_CODE_LOAD = 0,
> +  JIT_CODE_MOVE = 1,
> +  JIT_CODE_DEBUG_INFO = 2,
> +  JIT_CODE_CLOSE = 3,
> +  JIT_CODE_UNWINDING_INFO = 4,
> +
> +  JIT_CODE_MAX,
> +};
> +
> +struct LLVMPerfJitHeader {
> +  uint32_t Magic; /* characters "JiTD" */
> +  uint32_t Version; /* header version */
> +  uint32_t TotalSize; /* total size of header */
> +  uint32_t ElfMach; /* elf mach target */
> +  uint32_t Pad1; /* reserved */
> +  uint32_t Pid;
> +  uint64_t Timestamp; /* timestamp */
> +  uint64_t Flags; /* flags */
> +};
> +
> +/* record prefix (mandatory in each record) */
> +struct LLVMPerfJitRecordPrefix {
> +  uint32_t Id; /* record type identifier */
> +  uint32_t TotalSize;
> +  uint64_t Timestamp;
> +};
> +
> +struct LLVMPerfJitRecordCodeLoad {
> +  LLVMPerfJitRecordPrefix Prefix;
> +
> +  uint32_t Pid;
> +  uint32_t Tid;
> +  uint64_t Vma;
> +  uint64_t CodeAddr;
> +  uint64_t CodeSize;
> +  uint64_t CodeIndex;
> +};
> +
> +struct LLVMPerfJitRecordClose {
> +  LLVMPerfJitRecordPrefix Prefix;
> +};
> +
> +struct LLVMPerfJitRecordMoveCode {
> +  LLVMPerfJitRecordPrefix Prefix;
> +
> +  uint32_t Pid;
> +  uint32_t Tid;
> +  uint64_t Vma;
> +  uint64_t OldCodeAddr;
> +  uint64_t NewCodeAddr;
> +  uint64_t CodeSize;
> +  uint64_t CodeIndex;
> +};
> +
> +struct LLVMPerfJitDebugEntry {
> +  uint64_t Addr;
> +  int Lineno; /* source line number starting at 1 */
> +  int Discrim; /* column discriminator, 0 is default */
> +  char Name[]; /* null terminated filename, \xff\0 if same as previous entry */
> +};
> +
> +struct LLVMPerfJitRecordDebugInfo {
> +  LLVMPerfJitRecordPrefix Prefix;
> +
> +  uint64_t CodeAddr;
> +  uint64_t NrEntry;
> +  LLVMPerfJitDebugEntry Entries[];
> +};
> +
> +struct LLVMPerfJitRecordUnwindInfo {
> +  LLVMPerfJitRecordPrefix prefix;
> +
> +  uint64_t UnwindingSize;
> +  uint64_t EhFrameHdrSize;
> +  uint64_t MappedSize;
> +  const char UnwindingData[];
> +};
> +
> +// not available otherwise
> +static inline pid_t gettid(void) {
> +  return (pid_t)syscall(__NR_gettid);
> +}
> +
> +static inline uint64_t
> +timespec_to_ns(const struct timespec *ts) {
> +  const uint64_t NanoSecPerSec = 1000000000;
> +  return ((uint64_t) ts->tv_sec * NanoSecPerSec) + ts->tv_nsec;
> +}
> +
> +static inline uint64_t
> +perf_get_timestamp(void) {
> +  struct timespec ts;
> +  int ret;
> +
> +  ret = clock_gettime(CLOCK_MONOTONIC, &ts);
> +  if (ret)
> +    return 0;
> +
> +  return timespec_to_ns(&ts);
> +}
> +
> +
> +PerfJITEventListener::PerfJITEventListener()
> +    : Pid(getpid()) {
> +
> +  LLVMPerfJitHeader Header = {0};
> +  std::string Filename;
> +  raw_string_ostream FilenameBuf(Filename);
> +
> +  // check if clock-source is supported
> +  if (!perf_get_timestamp()) {
> +    errs() << "kernel does not support CLOCK_MONOTONIC("<<CLOCK_MONOTONIC<<")\n";
> +    return;
> +  }
> +
> +  memset(&Header, 0, sizeof(Header));
> +
> +  if (!InitDebuggingDir()) {
> +    errs() << "could not initialize debugging directory\n";
> +    return;
> +  }
> +
> +  FilenameBuf << JitPath << "/jit-"<<Pid<<".dump";
> +
> +  Fd = ::open(FilenameBuf.str().c_str(), O_CREAT|O_TRUNC|O_RDWR, 0666);
> +  if (Fd == -1) {
> +    errs() << "could not open JIT dump file "<<FilenameBuf.str()<<"\n";
> +    return;
> +  }
> +
> +  std::error_code EC;
> +  Dumpstream = make_unique<raw_fd_ostream>(Fd, true);
> +  assert(!EC);
> +
> +  if (!OpenMarker()) {
> +    return;
> +  }
> +
> +  if (!FillMachine(Header)) {
> +    return;
> +  }
> +
> +  Header.Magic = LLVM_PERF_JIT_MAGIC;
> +  Header.Version = LLVM_PERF_JIT_VERSION;
> +  Header.TotalSize = sizeof(Header);
> +  Header.Pid = Pid;
> +  Header.Timestamp = perf_get_timestamp();
> +
> +  Dumpstream->write((char *) &Header, sizeof(Header));
> +
> +  // Everything initialized, can do profiling now.
> +  if (!Dumpstream->has_error())
> +    SuccessfullyInitialized = true;
> +}
> +
> +void PerfJITEventListener::NotifyObjectEmitted(
> +    const ObjectFile &Obj,
> +    const RuntimeDyld::LoadedObjectInfo &L) {
> +
> +  if (!SuccessfullyInitialized)
> +    return;
> +
> +  OwningBinary<ObjectFile> DebugObjOwner = L.getObjectForDebug(Obj);
> +  const ObjectFile &DebugObj = *DebugObjOwner.getBinary();
> +
> +  // Get the address of the object image for use as a unique identifier
> +  DWARFContextInMemory Context(DebugObj);
> +
> +  // Use symbol info to iterate functions in the object.
> +  for (const std::pair<SymbolRef, uint64_t> &P : computeSymbolSizes(DebugObj)) {
> +    SymbolRef Sym = P.first;
> +    std::vector<LLVMPerfJitDebugEntry> LineInfo;
> +    std::string SourceFileName;
> +
> +    Expected<SymbolRef::Type> SymTypeOrErr = Sym.getType();
> +    if (!SymTypeOrErr) {
> +      // TODO: Actually report errors helpfully.
> +      consumeError(SymTypeOrErr.takeError());
> +      continue;
> +    }
> +    SymbolRef::Type SymType = *SymTypeOrErr;
> +    if (SymType != SymbolRef::ST_Function)
> +      continue;
> +
> +    Expected<StringRef> Name = Sym.getName();
> +    if (!Name) {
> +      // TODO: Actually report errors helpfully.
> +      consumeError(Name.takeError());
> +      continue;
> +    }
> +
> +    Expected<uint64_t> AddrOrErr = Sym.getAddress();
> +    if (!AddrOrErr) {
> +      // TODO: Actually report errors helpfully.
> +      consumeError(AddrOrErr.takeError());
> +      continue;
> +    }
> +    uint64_t Addr = *AddrOrErr;
> +    uint64_t Size = P.second;
> +
> +    // According to spec debugging info has to come before loading the
> +    // corresonding code load.
> +    DILineInfoTable Lines = Context.getLineInfoForAddressRange(
> +        Addr, Size, FileLineInfoKind::AbsoluteFilePath);
> +    NotifyDebug(Addr, Lines);
> +
> +    NotifyCode(Name, Addr, Size);
> +  }
> +
> +  Dumpstream->flush();
> +}
> +
> +void PerfJITEventListener::NotifyFreeingObject(const ObjectFile &Obj) {
> +  /* perf currently doesn't have an interface for unloading */
> +}
> +
> +bool PerfJITEventListener::InitDebuggingDir() {
> +  const char *BaseDir;
> +  llvm::SmallString<128> TestDir;
> +  time_t Time;
> +  struct tm LocalTime;
> +  char TimeBuffer[sizeof("YYMMDD")];
> +
> +  time(&Time);
> +  localtime_r(&Time, &LocalTime);
> +
> +  /* perf specific location */
> +  BaseDir = getenv("JITDUMPDIR");
> +  if (!BaseDir)
> +    BaseDir = getenv("HOME");
> +  if (!BaseDir)
> +    BaseDir = ".";
> +
> +  strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime);
> +
> +  std::string DebugDir(BaseDir);
> +  DebugDir += "/.debug/jit/";
> +
> +  if (sys::fs::create_directories(DebugDir)) {
> +    errs() << "could not create jit cache directory "<<DebugDir<<"\n";
> +    return false;
> +  }
> +
> +  SmallString<128> UniqueDebugDir;
> +
> +  if (sys::fs::createUniqueDirectory(Twine(DebugDir) + JIT_LANG"-jit-" + TimeBuffer,
> +                                     UniqueDebugDir)) {
> +    errs() << "could not create unique jit cache directory "<<DebugDir<<"\n";
> +    return false;
> +  }
> +
> +  JitPath = UniqueDebugDir.str();
> +
> +  return true;
> +}
> +
> +bool PerfJITEventListener::OpenMarker() {
> +  long pgsz;
> +
> +  pgsz = ::sysconf(_SC_PAGESIZE);
> +  if (pgsz == -1)
> +    return false;
> +
> +  /*
> +   * We mmap the jitdump to create an MMAP RECORD in perf.data file.  The mmap
> +   * is captured either live (perf record running when we mmap) or in deferred
> +   * mode, via /proc/PID/maps the MMAP record is used as a marker of a jitdump
> +   * file for more meta data info about the jitted code. Perf report/annotate
> +   * detect this special filename and process the jitdump file.
> +   *
> +   * Mapping must be PROT_EXEC to ensure it is captured by perf record
> +   * even when not using -d option.
> +   */
> +  MarkerAddr = ::mmap(NULL, pgsz, PROT_READ|PROT_EXEC, MAP_PRIVATE, Fd, 0);
> +
> +  if (MarkerAddr == MAP_FAILED) {
> +    errs() << "could not mmap JIT marker\n";
> +    return false;
> +  }
> +  return true;
> +}
> +
> +bool PerfJITEventListener::FillMachine(LLVMPerfJitHeader &hdr) {
> +  ssize_t sret;
> +  char id[16];
> +  int fd;
> +  struct {
> +    uint16_t e_type;
> +    uint16_t e_machine;
> +  } info;
> +
> +  fd = ::open("/proc/self/exe", O_RDONLY);
> +  if (fd == -1) {
> +    errs() << "could not open /proc/self/exe\n";
> +    return false;
> +  }
> +
> +  sret = ::read(fd, id, sizeof(id));
> +  if (sret != sizeof(id)) {
> +    errs() << "could not read elf signature from /proc/self/exe\n";
> +    goto error;
> +  }
> +
> +  /* check ELF signature */
> +  if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F') {
> +    errs() << "invalid elf signature\n";
> +    goto error;
> +  }
> +
> +  sret = ::read(fd, &info, sizeof(info));
> +  if (sret != sizeof(info)) {
> +    errs() << "could not read machine identification\n";
> +    goto error;
> +  }
> +
> +  hdr.ElfMach = info.e_machine;
> + error:
> +  close(fd);
> +  return true;
> +}
> +
> +void PerfJITEventListener::CloseMarker() {
> +  long pgsz;
> +
> +  if (!MarkerAddr)
> +    return;
> +
> +  pgsz = ::sysconf(_SC_PAGESIZE);
> +  if (pgsz == -1)
> +    return;
> +
> +  munmap(MarkerAddr, pgsz);
> +  MarkerAddr = nullptr;
> +}
> +
> +void PerfJITEventListener::NotifyCode(Expected<llvm::StringRef> &Symbol, uint64_t CodeAddr, uint64_t CodeSize) {
> +  static int code_generation = 1;
> +  LLVMPerfJitRecordCodeLoad rec;
> +
> +  assert(SuccessfullyInitialized);
> +
> +  // 0 length functions can't have samples.
> +  if (CodeSize == 0)
> +    return;
> +
> +  rec.Prefix.Id = JIT_CODE_LOAD;
> +  rec.Prefix.TotalSize =
> +    sizeof(rec) + // debug record itself
> +    Symbol->size() + 1 + // symbol name
> +    CodeSize; // and code
> +  rec.Prefix.Timestamp = perf_get_timestamp();
> +
> +  rec.CodeSize = CodeSize;
> +  rec.Vma = 0;
> +  rec.CodeAddr = CodeAddr;
> +  rec.Pid = Pid;
> +  rec.Tid = gettid();
> +
> +  // get code index inside lock to avoid race condition
> +  MutexGuard Guard(Mutex);
> +
> +  rec.CodeIndex = code_generation++;
> +
> +  Dumpstream->write(reinterpret_cast<const char *>(&rec), sizeof(rec));
> +  Dumpstream->write(Symbol->data(), Symbol->size() + 1);
> +  Dumpstream->write(reinterpret_cast<const char *>(CodeAddr), CodeSize);
> +}
> +
> +void PerfJITEventListener::NotifyDebug(uint64_t CodeAddr, DILineInfoTable Lines) {
> +  LLVMPerfJitRecordDebugInfo rec;
> +
> +  assert(SuccessfullyInitialized);
> +
> +  // Didn't get useful debug info.
> +  if (Lines.empty())
> +    return;
> +
> +  rec.Prefix.Id = JIT_CODE_DEBUG_INFO;
> +  rec.Prefix.TotalSize = sizeof(rec); // will be increased further
> +  rec.Prefix.Timestamp = perf_get_timestamp();
> +  rec.CodeAddr = CodeAddr;
> +  rec.NrEntry = Lines.size();
> +
> +  /* compute total size size of record (variable due to filenames) */
> +  DILineInfoTable::iterator Begin = Lines.begin();
> +  DILineInfoTable::iterator End = Lines.end();
> +  for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
> +    DILineInfo &line = It->second;
> +    rec.Prefix.TotalSize += sizeof(LLVMPerfJitDebugEntry);
> +    rec.Prefix.TotalSize += line.FileName.size() + 1;
> +  }
> +
> +  Dumpstream->write(reinterpret_cast<const char *>(&rec), sizeof(rec));
> +
> +  // The debug_entry describes the source line information. It is defined as follows in order:
> +  // * uint64_t code_addr: address of function for which the debug information is generated
> +  // * uint32_t line     : source file line number (starting at 1)
> +  // * uint32_t discrim  : column discriminator, 0 is default
> +  // * char name[n]      : source file name in ASCII, including null termination
> +
> +  MutexGuard Guard(Mutex);
> +
> +  for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
> +    LLVMPerfJitDebugEntry LineInfo;
> +    DILineInfo &Line = It->second;
> +
> +    LineInfo.Addr = It->first;
> +    // For reasons unknown to me either llvm offsets or perf's use of them is
> +    // offset by 0x40. Inquiring.
> +    LineInfo.Addr += 0x40;
> +    LineInfo.Lineno = Line.Line;
> +    LineInfo.Discrim = Line.Discriminator;
> +
> +    Dumpstream->write(reinterpret_cast<const char *>(&LineInfo), sizeof(LineInfo));
> +    Dumpstream->write(Line.FileName.c_str(), Line.FileName.size() + 1);
> +  }
> +}
> +
> +} // end anonymous namespace
> +
> +namespace llvm {
> +JITEventListener *JITEventListener::createPerfJITEventListener() {
> +  return new PerfJITEventListener();
> +}
> +} // end llvm namespace
> diff --git a/tools/lli/CMakeLists.txt b/tools/lli/CMakeLists.txt
> index f02e19313b7..5f235b6f6f3 100644
> --- a/tools/lli/CMakeLists.txt
> +++ b/tools/lli/CMakeLists.txt
> @@ -36,6 +36,15 @@ if( LLVM_USE_INTEL_JITEVENTS )
>      )
>  endif( LLVM_USE_INTEL_JITEVENTS )
>  
> +if( LLVM_USE_PERF )
> +  set(LLVM_LINK_COMPONENTS
> +    ${LLVM_LINK_COMPONENTS}
> +    DebugInfoDWARF
> +    PerfJITEvents
> +    Object
> +    )
> +endif( LLVM_USE_PERF )
> +
>  add_llvm_tool(lli
>    lli.cpp
>    OrcLazyJIT.cpp
> diff --git a/tools/lli/lli.cpp b/tools/lli/lli.cpp
> index 0823ff469de..9e488a293e2 100644
> --- a/tools/lli/lli.cpp
> +++ b/tools/lli/lli.cpp
> @@ -546,6 +546,8 @@ int main(int argc, char **argv, char * const *envp) {
>                  JITEventListener::createOProfileJITEventListener());
>    EE->RegisterJITEventListener(
>                  JITEventListener::createIntelJITEventListener());
> +  EE->RegisterJITEventListener(
> +                JITEventListener::createPerfJITEventListener());
>  
>    if (!NoLazyCompilation && RemoteMCJIT) {
>      errs() << "warning: remote mcjit does not support lazy compilation\n";
> -- 
> 2.11.0.22.g8d7a455.dirty
> 

> #include <stdint.h>
> #include <stdbool.h>
> 
> bool stupid_isprime(uint64_t num)
> {
> 	if (num == 2)
> 		return true;
> 	if (num < 1 || num % 2 == 0)
> 		return false;
> 	for(uint64_t i = 3; i < num / 2; i+= 2) {
> 		if (num % i == 0)
> 			return false;
> 	}
> 	return true;
> }
> 
> int main(int argc, char **argv)
> {
> 	int numprimes = 0;
> 
> 	for (uint64_t num = argc; num < 100000; num++)
> 	{
> 		if (stupid_isprime(num))
> 			numprimes++;
> 	}
> 
> 	return numprimes;
> }

Greetings,

Andres Freund


More information about the llvm-dev mailing list