[llvm] 8daf4f1 - [ORC][ORC-RT] Add ORC-RT based lazy compilation support for x86-64.
Lang Hames via llvm-commits
llvm-commits at lists.llvm.org
Sun Dec 15 15:50:39 PST 2024
Author: Lang Hames
Date: 2024-12-15T23:50:31Z
New Revision: 8daf4f16fa08b5d876e98108721dd1743a360326
URL: https://github.com/llvm/llvm-project/commit/8daf4f16fa08b5d876e98108721dd1743a360326
DIFF: https://github.com/llvm/llvm-project/commit/8daf4f16fa08b5d876e98108721dd1743a360326.diff
LOG: [ORC][ORC-RT] Add ORC-RT based lazy compilation support for x86-64.
Adds support for the ORC-RT based lazy compilation scheme that was introduced
in 570ecdcf8b4.
Added:
compiler-rt/lib/orc/sysv_reenter.x86-64.S
Modified:
compiler-rt/lib/orc/CMakeLists.txt
compiler-rt/test/orc/TestCases/Generic/lazy-link.ll
llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h
llvm/lib/ExecutionEngine/JITLink/x86_64.cpp
llvm/lib/ExecutionEngine/Orc/JITLinkReentryTrampolines.cpp
Removed:
################################################################################
diff --git a/compiler-rt/lib/orc/CMakeLists.txt b/compiler-rt/lib/orc/CMakeLists.txt
index c95700ab53876c..7da230d8296e90 100644
--- a/compiler-rt/lib/orc/CMakeLists.txt
+++ b/compiler-rt/lib/orc/CMakeLists.txt
@@ -53,6 +53,7 @@ if (APPLE)
macho_tlv.x86-64.S
macho_tlv.arm64.S
sysv_reenter.arm64.S
+ sysv_reenter.x86-64.S
)
set(ORC_IMPL_HEADERS
@@ -119,6 +120,7 @@ else() # not Apple
elfnix_tls.aarch64.S
elfnix_tls.ppc64.S
sysv_reenter.arm64.S
+ sysv_reenter.x86-64.S
)
endif()
diff --git a/compiler-rt/lib/orc/sysv_reenter.x86-64.S b/compiler-rt/lib/orc/sysv_reenter.x86-64.S
new file mode 100644
index 00000000000000..0a36280f1d1f85
--- /dev/null
+++ b/compiler-rt/lib/orc/sysv_reenter.x86-64.S
@@ -0,0 +1,81 @@
+//===-- orc_rt_macho_tlv.x86-64.s -------------------------------*- ASM -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of the ORC runtime support library.
+//
+//===----------------------------------------------------------------------===//
+
+// The content of this file is x86_64-only
+#if defined(__x86_64__)
+
+// Save all GRPS except %rsp.
+// This value is also subtracted from %rsp below, despite the fact that %rbp
+// has already been pushed, because we need %rsp to stay 16-byte aligned.
+#define GPR_SAVE_SPACE_SIZE 15 * 8
+#define FXSAVE64_SAVE_SPACE_SIZE 512
+#define REGISTER_SAVE_SPACE_SIZE \
+ GPR_SAVE_SPACE_SIZE + FXSAVE64_SAVE_SPACE_SIZE
+
+ .text
+
+ // returns address of TLV in %rax, all other registers preserved
+ .globl __orc_rt_sysv_reenter
+__orc_rt_sysv_reenter:
+ pushq %rbp
+ movq %rsp, %rbp
+ subq $REGISTER_SAVE_SPACE_SIZE, %rsp
+ movq %rax, -8(%rbp)
+ movq %rbx, -16(%rbp)
+ movq %rcx, -24(%rbp)
+ movq %rdx, -32(%rbp)
+ movq %rsi, -40(%rbp)
+ movq %rdi, -48(%rbp)
+ movq %r8, -56(%rbp)
+ movq %r9, -64(%rbp)
+ movq %r10, -72(%rbp)
+ movq %r11, -80(%rbp)
+ movq %r12, -88(%rbp)
+ movq %r13, -96(%rbp)
+ movq %r14, -104(%rbp)
+ movq %r15, -112(%rbp)
+ fxsave64 (%rsp)
+ movq 8(%rbp), %rdi
+
+ // Load return address and subtract five from it (on the assumption
+ // that it's a call instruction).
+ subq $5, %rdi
+
+ // Call __orc_rt_resolve to look up the implementation corresponding to
+ // the calling stub, then store this in x17 (which we'll return to
+ // below).
+#if !defined(__APPLE__)
+ call __orc_rt_resolve
+#else
+ call ___orc_rt_resolve
+#endif
+ movq %rax, 8(%rbp)
+ fxrstor64 (%rsp)
+ movq -112(%rbp), %r15
+ movq -104(%rbp), %r14
+ movq -96(%rbp), %r13
+ movq -88(%rbp), %r12
+ movq -80(%rbp), %r11
+ movq -72(%rbp), %r10
+ movq -64(%rbp), %r9
+ movq -56(%rbp), %r8
+ movq -48(%rbp), %rdi
+ movq -40(%rbp), %rsi
+ movq -32(%rbp), %rdx
+ movq -24(%rbp), %rcx
+ movq -16(%rbp), %rbx
+ movq -8(%rbp), %rax
+ addq $REGISTER_SAVE_SPACE_SIZE, %rsp
+ popq %rbp
+ ret
+
+#endif // defined(__x86_64__)
diff --git a/compiler-rt/test/orc/TestCases/Generic/lazy-link.ll b/compiler-rt/test/orc/TestCases/Generic/lazy-link.ll
index e722b813c3fe06..5a8dbfc532b0fe 100644
--- a/compiler-rt/test/orc/TestCases/Generic/lazy-link.ll
+++ b/compiler-rt/test/orc/TestCases/Generic/lazy-link.ll
@@ -11,7 +11,7 @@
; RUN: -lazy %t/x.o | FileCheck %s
;
; UNSUPPORTED: system-windows
-; REQUIRES: target={{(arm|aarch)64.*}}
+; REQUIRES: target={{(arm|aarch|x86_)64.*}}
;
; CHECK: Linking {{.*}}main.o
; CHECK-DAG: Linking <indirect stubs graph #1>
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h b/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h
index 0d7e0fdb5820b5..356b8cd70aec51 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h
@@ -641,6 +641,31 @@ inline Symbol &createAnonymousPointerJumpStub(LinkGraph &G,
false);
}
+/// x86-64 reentry trampoline.
+///
+/// Contains the instruction sequence for a trampoline that stores its return
+/// address on the stack and calls <reentry-symbol>:
+/// call <reentry-symbol>
+extern const char ReentryTrampolineContent[5];
+
+/// Create a block of N reentry trampolines.
+inline Block &createReentryTrampolineBlock(LinkGraph &G,
+ Section &TrampolineSection,
+ Symbol &ReentrySymbol) {
+ auto &B = G.createContentBlock(TrampolineSection, ReentryTrampolineContent,
+ orc::ExecutorAddr(~uint64_t(7)), 1, 0);
+ B.addEdge(BranchPCRel32, 1, ReentrySymbol, 0);
+ return B;
+}
+
+inline Symbol &createAnonymousReentryTrampoline(LinkGraph &G,
+ Section &TrampolineSection,
+ Symbol &ReentrySymbol) {
+ return G.addAnonymousSymbol(
+ createReentryTrampolineBlock(G, TrampolineSection, ReentrySymbol), 0,
+ sizeof(ReentryTrampolineContent), true, false);
+}
+
/// Global Offset Table Builder.
class GOTTableManager : public TableManager<GOTTableManager> {
public:
diff --git a/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp
index e5b48d2c3fab0e..a84e0001f115a8 100644
--- a/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp
@@ -85,6 +85,10 @@ const char NullPointerContent[PointerSize] = {0x00, 0x00, 0x00, 0x00,
const char PointerJumpStubContent[6] = {
static_cast<char>(0xFFu), 0x25, 0x00, 0x00, 0x00, 0x00};
+const char ReentryTrampolineContent[5] = {
+ static_cast<char>(0xe8), 0x00, 0x00, 0x00, 0x00
+};
+
Error optimizeGOTAndStubAccesses(LinkGraph &G) {
LLVM_DEBUG(dbgs() << "Optimizing GOT entries and stubs:\n");
diff --git a/llvm/lib/ExecutionEngine/Orc/JITLinkReentryTrampolines.cpp b/llvm/lib/ExecutionEngine/Orc/JITLinkReentryTrampolines.cpp
index 834292c165095a..be574ef7279c27 100644
--- a/llvm/lib/ExecutionEngine/Orc/JITLinkReentryTrampolines.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/JITLinkReentryTrampolines.cpp
@@ -9,6 +9,7 @@
#include "llvm/ExecutionEngine/Orc/JITLinkReentryTrampolines.h"
#include "llvm/ExecutionEngine/JITLink/aarch64.h"
+#include "llvm/ExecutionEngine/JITLink/x86_64.h"
#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
#include <memory>
@@ -91,6 +92,9 @@ JITLinkReentryTrampolines::Create(ObjectLinkingLayer &ObjLinkingLayer) {
case Triple::aarch64:
EmitTrampoline = aarch64::createAnonymousReentryTrampoline;
break;
+ case Triple::x86_64:
+ EmitTrampoline = x86_64::createAnonymousReentryTrampoline;
+ break;
default:
return make_error<StringError>("JITLinkReentryTrampolines: architecture " +
TT.getArchName() + " not supported",
More information about the llvm-commits
mailing list