[llvm] r268112 - [Orc] Add ORC lazy-compilation support for AArch64.
Lang Hames via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 29 14:32:00 PDT 2016
Author: lhames
Date: Fri Apr 29 16:32:00 2016
New Revision: 268112
URL: http://llvm.org/viewvc/llvm-project?rev=268112&view=rev
Log:
[Orc] Add ORC lazy-compilation support for AArch64.
The ORC compile callbacks and indirect stubs APIs will now work for AArc64,
allowing functions to be lazily compiled and/or updated.
Modified:
llvm/trunk/include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h
llvm/trunk/lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp
Modified: llvm/trunk/include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h?rev=268112&r1=268111&r2=268112&view=diff
==============================================================================
--- llvm/trunk/include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h (original)
+++ llvm/trunk/include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h Fri Apr 29 16:32:00 2016
@@ -107,6 +107,37 @@ private:
sys::OwningMemoryBlock StubsMem;
};
+class OrcAArch64 {
+public:
+ static const unsigned PointerSize = 8;
+ static const unsigned TrampolineSize = 12;
+ static const unsigned ResolverCodeSize = 0x6C;
+
+ typedef GenericIndirectStubsInfo<8> IndirectStubsInfo;
+
+ typedef TargetAddress (*JITReentryFn)(void *CallbackMgr, void *TrampolineId);
+
+ /// @brief Write the resolver code into the given memory. The user is be
+ /// responsible for allocating the memory and setting permissions.
+ static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,
+ void *CallbackMgr);
+
+ /// @brief Write the requsted number of trampolines into the given memory,
+ /// which must be big enough to hold 1 pointer, plus NumTrampolines
+ /// trampolines.
+ static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,
+ unsigned NumTrampolines);
+
+ /// @brief Emit at least MinStubs worth of indirect call stubs, rounded out to
+ /// the nearest page size.
+ ///
+ /// E.g. Asking for 4 stubs on x86-64, where stubs are 8-bytes, with 4k
+ /// pages will return a block of 512 stubs (4096 / 8 = 512). Asking for 513
+ /// will return a block of 1024 (2-pages worth).
+ static Error emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
+ unsigned MinStubs, void *InitialPtrVal);
+};
+
/// @brief X86_64 support.
///
/// X86_64 supports lazy JITing.
Modified: llvm/trunk/lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp?rev=268112&r1=268111&r2=268112&view=diff
==============================================================================
--- llvm/trunk/lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp (original)
+++ llvm/trunk/lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp Fri Apr 29 16:32:00 2016
@@ -14,6 +14,150 @@
namespace llvm {
namespace orc {
+void OrcAArch64::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn,
+ void *CallbackMgr) {
+
+ const uint32_t ResolverCode[] = {
+ // resolver_entry:
+ 0xa9bf47fd, // 0x00: stp x29, x17, [sp, #-16]!
+ 0x910003fd, // 0x04: mov x29, sp
+ 0xa9bf73fb, // 0x08: stp x27, x28, [sp, #-16]!
+ 0xa9bf6bf9, // 0x0C: stp x25, x26, [sp, #-16]!
+ 0xa9bf63f7, // 0x10: stp x23, x24, [sp, #-16]!
+ 0xa9bf5bf5, // 0x14: stp x21, x22, [sp, #-16]!
+ 0xa9bf53f3, // 0x18: stp x19, x20, [sp, #-16]!
+ 0xa9bf3fee, // 0x1C: stp x14, x15, [sp, #-16]!
+ 0xa9bf37ec, // 0x20: stp x12, x13, [sp, #-16]!
+ 0xa9bf2fea, // 0x24: stp x10, x11, [sp, #-16]!
+ 0xa9bf27e8, // 0x28: stp x8, x9, [sp, #-16]!
+ 0xa9bf1fe6, // 0x2C: stp x6, x7, [sp, #-16]!
+ 0xa9bf17e4, // 0x30: stp x4, x5, [sp, #-16]!
+ 0xa9bf0fe2, // 0x34: stp x2, x3, [sp, #-16]!
+ 0xa9bf07e0, // 0x38: stp x0, x1, [sp, #-16]!
+ 0x580002e0, // 0x3C: ldr x0, Lcallback_mgr
+ 0xaa1e03e1, // 0x40: mov x1, x30
+ 0xd1003021, // 0x44: sub x1, x1, #12
+ 0x58000242, // 0x48: ldr x2, Lreentry_fn
+ 0xd63f0040, // 0x4C: blr x2
+ 0xaa0003f1, // 0x50: mov x17, x0
+ 0xa8c107e0, // 0x54: ldp x0, x1, [sp], #16
+ 0xa8c10fe2, // 0x58: ldp x2, x3, [sp], #16
+ 0xa8c117e4, // 0x5C: ldp x4, x5, [sp], #16
+ 0xa8c11fe6, // 0x60: ldp x6, x7, [sp], #16
+ 0xa8c127e8, // 0x64: ldp x8, x9, [sp], #16
+ 0xa8c12fea, // 0x68: ldp x10, x11, [sp], #16
+ 0xa8c137ec, // 0x6C: ldp x12, x13, [sp], #16
+ 0xa8c13fee, // 0x70: ldp x14, x15, [sp], #16
+ 0xa8c153f3, // 0x74: ldp x19, x20, [sp], #16
+ 0xa8c15bf5, // 0x78: ldp x21, x22, [sp], #16
+ 0xa8c163f7, // 0x7C: ldp x23, x24, [sp], #16
+ 0xa8c16bf9, // 0x80: ldp x25, x26, [sp], #16
+ 0xa8c173fb, // 0x84: ldp x27, x28, [sp], #16
+ 0xa8c17bfd, // 0x88: ldp x29, x30, [sp], #16
+ 0xd65f0220, // 0x8C: ret x17
+ 0x00000000, // 0x90: Lresolver_fn:
+ 0x00000000, // .quad resolver_fn
+ 0x00000000, // 0x98: Lcallback_mgr:
+ 0x00000000, // .quad callback_mgr
+ };
+
+ const unsigned ReentryFnAddrOffset = 0x90;
+ const unsigned CallbackMgrAddrOffset = 0x98;
+
+ memcpy(ResolverMem, ResolverCode, sizeof(ResolverCode));
+ memcpy(ResolverMem + ReentryFnAddrOffset, &ReentryFn, sizeof(ReentryFn));
+ memcpy(ResolverMem + CallbackMgrAddrOffset, &CallbackMgr,
+ sizeof(CallbackMgr));
+}
+
+void OrcAArch64::writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,
+ unsigned NumTrampolines) {
+
+ unsigned OffsetToPtr = alignTo(NumTrampolines * TrampolineSize, 8);
+
+ memcpy(TrampolineMem + OffsetToPtr, &ResolverAddr, sizeof(void *));
+
+ // OffsetToPtr is actually the offset from the PC for the 2nd instruction, so
+ // subtract 32-bits.
+ OffsetToPtr -= 4;
+
+ uint32_t *Trampolines = reinterpret_cast<uint32_t *>(TrampolineMem);
+
+ for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) {
+ Trampolines[3 * I + 0] = 0xaa1e03f1; // mov x17, x30
+ Trampolines[3 * I + 1] = 0x58000010 | (OffsetToPtr << 3); // mov x16, Lptr
+ Trampolines[3 * I + 2] = 0xd63f0200; // blr x16
+ }
+
+}
+
+Error OrcAArch64::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
+ unsigned MinStubs,
+ void *InitialPtrVal) {
+ // Stub format is:
+ //
+ // .section __orc_stubs
+ // stub1:
+ // ldr x0, ptr1 ; PC-rel load of ptr1
+ // br x0 ; Jump to resolver
+ // stub2:
+ // ldr x0, ptr2 ; PC-rel load of ptr2
+ // br x0 ; Jump to resolver
+ //
+ // ...
+ //
+ // .section __orc_ptrs
+ // ptr1:
+ // .quad 0x0
+ // ptr2:
+ // .quad 0x0
+ //
+ // ...
+
+ const unsigned StubSize = IndirectStubsInfo::StubSize;
+
+ // Emit at least MinStubs, rounded up to fill the pages allocated.
+ unsigned PageSize = sys::Process::getPageSize();
+ unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize;
+ unsigned NumStubs = (NumPages * PageSize) / StubSize;
+
+ // Allocate memory for stubs and pointers in one call.
+ std::error_code EC;
+ auto StubsMem = sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory(
+ 2 * NumPages * PageSize, nullptr,
+ sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC));
+
+ if (EC)
+ return errorCodeToError(EC);
+
+ // Create separate MemoryBlocks representing the stubs and pointers.
+ sys::MemoryBlock StubsBlock(StubsMem.base(), NumPages * PageSize);
+ sys::MemoryBlock PtrsBlock(static_cast<char *>(StubsMem.base()) +
+ NumPages * PageSize,
+ NumPages * PageSize);
+
+ // Populate the stubs page stubs and mark it executable.
+ uint64_t *Stub = reinterpret_cast<uint64_t *>(StubsBlock.base());
+ uint64_t PtrOffsetField = static_cast<uint64_t>(NumPages * PageSize)
+ << 3;
+
+ for (unsigned I = 0; I < NumStubs; ++I)
+ Stub[I] = 0xd61f020058000010 | PtrOffsetField;
+
+ if (auto EC = sys::Memory::protectMappedMemory(
+ StubsBlock, sys::Memory::MF_READ | sys::Memory::MF_EXEC))
+ return errorCodeToError(EC);
+
+ // Initialize all pointers to point at FailureAddress.
+ void **Ptr = reinterpret_cast<void **>(PtrsBlock.base());
+ for (unsigned I = 0; I < NumStubs; ++I)
+ Ptr[I] = InitialPtrVal;
+
+ StubsInfo = IndirectStubsInfo(NumStubs, std::move(StubsMem));
+
+ return Error::success();
+}
+
void OrcX86_64::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn,
void *CallbackMgr) {
More information about the llvm-commits
mailing list