[llvm] r268112 - [Orc] Add ORC lazy-compilation support for AArch64.

Lang Hames via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 29 14:32:00 PDT 2016


Author: lhames
Date: Fri Apr 29 16:32:00 2016
New Revision: 268112

URL: http://llvm.org/viewvc/llvm-project?rev=268112&view=rev
Log:
[Orc] Add ORC lazy-compilation support for AArch64.

The ORC compile callbacks and indirect stubs APIs will now work for AArc64,
allowing functions to be lazily compiled and/or updated.

Modified:
    llvm/trunk/include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h
    llvm/trunk/lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp

Modified: llvm/trunk/include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h?rev=268112&r1=268111&r2=268112&view=diff
==============================================================================
--- llvm/trunk/include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h (original)
+++ llvm/trunk/include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h Fri Apr 29 16:32:00 2016
@@ -107,6 +107,37 @@ private:
   sys::OwningMemoryBlock StubsMem;
 };
 
+class OrcAArch64 {
+public:
+  static const unsigned PointerSize = 8;
+  static const unsigned TrampolineSize = 12;
+  static const unsigned ResolverCodeSize = 0x6C;
+
+  typedef GenericIndirectStubsInfo<8> IndirectStubsInfo;
+
+  typedef TargetAddress (*JITReentryFn)(void *CallbackMgr, void *TrampolineId);
+
+  /// @brief Write the resolver code into the given memory. The user is be
+  ///        responsible for allocating the memory and setting permissions.
+  static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,
+                                void *CallbackMgr);
+
+  /// @brief Write the requsted number of trampolines into the given memory,
+  ///        which must be big enough to hold 1 pointer, plus NumTrampolines
+  ///        trampolines.
+  static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,
+                               unsigned NumTrampolines);
+
+  /// @brief Emit at least MinStubs worth of indirect call stubs, rounded out to
+  ///        the nearest page size.
+  ///
+  ///   E.g. Asking for 4 stubs on x86-64, where stubs are 8-bytes, with 4k
+  /// pages will return a block of 512 stubs (4096 / 8 = 512). Asking for 513
+  /// will return a block of 1024 (2-pages worth).
+  static Error emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
+                                      unsigned MinStubs, void *InitialPtrVal);
+};
+
 /// @brief X86_64 support.
 ///
 /// X86_64 supports lazy JITing.

Modified: llvm/trunk/lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp?rev=268112&r1=268111&r2=268112&view=diff
==============================================================================
--- llvm/trunk/lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp (original)
+++ llvm/trunk/lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp Fri Apr 29 16:32:00 2016
@@ -14,6 +14,150 @@
 namespace llvm {
 namespace orc {
 
+void OrcAArch64::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn,
+                                   void *CallbackMgr) {
+
+  const uint32_t ResolverCode[] = {
+    // resolver_entry:
+    0xa9bf47fd, // 0x00: stp  x29, x17, [sp, #-16]!
+    0x910003fd, // 0x04: mov  x29, sp
+    0xa9bf73fb, // 0x08: stp  x27, x28, [sp, #-16]!
+    0xa9bf6bf9, // 0x0C: stp  x25, x26, [sp, #-16]!
+    0xa9bf63f7, // 0x10: stp  x23, x24, [sp, #-16]!
+    0xa9bf5bf5, // 0x14: stp  x21, x22, [sp, #-16]!
+    0xa9bf53f3, // 0x18: stp  x19, x20, [sp, #-16]!
+    0xa9bf3fee, // 0x1C: stp  x14, x15, [sp, #-16]!
+    0xa9bf37ec, // 0x20: stp  x12, x13, [sp, #-16]!
+    0xa9bf2fea, // 0x24: stp  x10, x11, [sp, #-16]!
+    0xa9bf27e8, // 0x28: stp   x8,  x9, [sp, #-16]!
+    0xa9bf1fe6, // 0x2C: stp   x6,  x7, [sp, #-16]!
+    0xa9bf17e4, // 0x30: stp   x4,  x5, [sp, #-16]!
+    0xa9bf0fe2, // 0x34: stp   x2,  x3, [sp, #-16]!
+    0xa9bf07e0, // 0x38: stp   x0,  x1, [sp, #-16]!
+    0x580002e0, // 0x3C: ldr   x0, Lcallback_mgr
+    0xaa1e03e1, // 0x40: mov   x1, x30
+    0xd1003021, // 0x44: sub   x1, x1, #12
+    0x58000242, // 0x48: ldr   x2, Lreentry_fn
+    0xd63f0040, // 0x4C: blr   x2
+    0xaa0003f1, // 0x50: mov   x17, x0
+    0xa8c107e0, // 0x54: ldp   x0,  x1, [sp], #16
+    0xa8c10fe2, // 0x58: ldp   x2,  x3, [sp], #16
+    0xa8c117e4, // 0x5C: ldp   x4,  x5, [sp], #16
+    0xa8c11fe6, // 0x60: ldp   x6,  x7, [sp], #16
+    0xa8c127e8, // 0x64: ldp   x8,  x9, [sp], #16
+    0xa8c12fea, // 0x68: ldp  x10, x11, [sp], #16
+    0xa8c137ec, // 0x6C: ldp  x12, x13, [sp], #16
+    0xa8c13fee, // 0x70: ldp  x14, x15, [sp], #16
+    0xa8c153f3, // 0x74: ldp  x19, x20, [sp], #16
+    0xa8c15bf5, // 0x78: ldp  x21, x22, [sp], #16
+    0xa8c163f7, // 0x7C: ldp  x23, x24, [sp], #16
+    0xa8c16bf9, // 0x80: ldp  x25, x26, [sp], #16
+    0xa8c173fb, // 0x84: ldp  x27, x28, [sp], #16
+    0xa8c17bfd, // 0x88: ldp  x29, x30, [sp], #16
+    0xd65f0220, // 0x8C: ret  x17
+    0x00000000, // 0x90: Lresolver_fn:
+    0x00000000, //         .quad resolver_fn
+    0x00000000, // 0x98: Lcallback_mgr:
+    0x00000000, //         .quad callback_mgr
+  };
+
+  const unsigned ReentryFnAddrOffset = 0x90;
+  const unsigned CallbackMgrAddrOffset = 0x98;
+
+  memcpy(ResolverMem, ResolverCode, sizeof(ResolverCode));
+  memcpy(ResolverMem + ReentryFnAddrOffset, &ReentryFn, sizeof(ReentryFn));
+  memcpy(ResolverMem + CallbackMgrAddrOffset, &CallbackMgr,
+         sizeof(CallbackMgr));
+}
+
+void OrcAArch64::writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,
+                                  unsigned NumTrampolines) {
+
+  unsigned OffsetToPtr = alignTo(NumTrampolines * TrampolineSize, 8);
+
+  memcpy(TrampolineMem + OffsetToPtr, &ResolverAddr, sizeof(void *));
+
+  // OffsetToPtr is actually the offset from the PC for the 2nd instruction, so
+  // subtract 32-bits.
+  OffsetToPtr -= 4;
+
+  uint32_t *Trampolines = reinterpret_cast<uint32_t *>(TrampolineMem);
+
+  for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) {
+    Trampolines[3 * I + 0] = 0xaa1e03f1;                      // mov x17, x30
+    Trampolines[3 * I + 1] = 0x58000010 | (OffsetToPtr << 3); // mov x16, Lptr
+    Trampolines[3 * I + 2] = 0xd63f0200;                      // blr x16
+  }
+
+}
+
+Error OrcAArch64::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
+                                         unsigned MinStubs,
+                                         void *InitialPtrVal) {
+  // Stub format is:
+  //
+  // .section __orc_stubs
+  // stub1:
+  //                 ldr     x0, ptr1       ; PC-rel load of ptr1
+  //                 br      x0             ; Jump to resolver
+  // stub2:
+  //                 ldr     x0, ptr2       ; PC-rel load of ptr2
+  //                 br      x0             ; Jump to resolver
+  //
+  // ...
+  //
+  // .section __orc_ptrs
+  // ptr1:
+  //                 .quad 0x0
+  // ptr2:
+  //                 .quad 0x0
+  //
+  // ...
+
+  const unsigned StubSize = IndirectStubsInfo::StubSize;
+
+  // Emit at least MinStubs, rounded up to fill the pages allocated.
+  unsigned PageSize = sys::Process::getPageSize();
+  unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize;
+  unsigned NumStubs = (NumPages * PageSize) / StubSize;
+
+  // Allocate memory for stubs and pointers in one call.
+  std::error_code EC;
+  auto StubsMem = sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory(
+      2 * NumPages * PageSize, nullptr,
+      sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC));
+
+  if (EC)
+    return errorCodeToError(EC);
+
+  // Create separate MemoryBlocks representing the stubs and pointers.
+  sys::MemoryBlock StubsBlock(StubsMem.base(), NumPages * PageSize);
+  sys::MemoryBlock PtrsBlock(static_cast<char *>(StubsMem.base()) +
+                                 NumPages * PageSize,
+                             NumPages * PageSize);
+
+  // Populate the stubs page stubs and mark it executable.
+  uint64_t *Stub = reinterpret_cast<uint64_t *>(StubsBlock.base());
+  uint64_t PtrOffsetField = static_cast<uint64_t>(NumPages * PageSize)
+                            << 3;
+
+  for (unsigned I = 0; I < NumStubs; ++I)
+    Stub[I] = 0xd61f020058000010 | PtrOffsetField;
+
+  if (auto EC = sys::Memory::protectMappedMemory(
+          StubsBlock, sys::Memory::MF_READ | sys::Memory::MF_EXEC))
+    return errorCodeToError(EC);
+
+  // Initialize all pointers to point at FailureAddress.
+  void **Ptr = reinterpret_cast<void **>(PtrsBlock.base());
+  for (unsigned I = 0; I < NumStubs; ++I)
+    Ptr[I] = InitialPtrVal;
+
+  StubsInfo = IndirectStubsInfo(NumStubs, std::move(StubsMem));
+
+  return Error::success();
+}
+
 void OrcX86_64::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn,
                                   void *CallbackMgr) {
 




More information about the llvm-commits mailing list