[llvm] acb408f - [ORC] add lazy jit support for riscv64
Alex Fan via llvm-commits
llvm-commits at lists.llvm.org
Sun Apr 10 03:44:56 PDT 2022
Author: Alex Fan
Date: 2022-04-10T18:44:50+08:00
New Revision: acb408fbbc9d77b75c8ed2f36805f59e6a640e2b
URL: https://github.com/llvm/llvm-project/commit/acb408fbbc9d77b75c8ed2f36805f59e6a640e2b
DIFF: https://github.com/llvm/llvm-project/commit/acb408fbbc9d77b75c8ed2f36805f59e6a640e2b.diff
LOG: [ORC] add lazy jit support for riscv64
This adds resolver, indirection and trampoline stubs for riscv64,
allowing lazy compilation to work.
It assumes hard float extension exists. I don't know the proper way to detect it as Triple doesn't provide the interface to check riscv +f +d abi.
I am also not sure if orclazy tests should be enabled because lli needs an additional -codemodel=melany for tests to pass.
Reviewed By: lhames
Differential Revision: https://reviews.llvm.org/D122543
Added:
Modified:
llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp
llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
index 82dfdc2701287..c5c2780bc9ee5 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
@@ -330,6 +330,45 @@ class OrcMips64 {
JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs);
};
+// @brief riscv64 support.
+//
+// RISC-V 64 supports lazy JITing.
+class OrcRiscv64 {
+public:
+ static constexpr unsigned PointerSize = 8;
+ static constexpr unsigned TrampolineSize = 16;
+ static constexpr unsigned StubSize = 16;
+ static constexpr unsigned StubToPointerMaxDisplacement = 1 << 31;
+ static constexpr unsigned ResolverCodeSize = 0x148;
+
+ /// Write the resolver code into the given memory. The user is
+ /// responsible for allocating the memory and setting permissions.
+ ///
+ /// ReentryFnAddr should be the address of a function whose signature matches
+ /// void* (*)(void *TrampolineAddr, void *ReentryCtxAddr). The ReentryCtxAddr
+ /// argument of writeResolverCode will be passed as the second argument to
+ /// the function at ReentryFnAddr.
+ static void writeResolverCode(char *ResolverWorkingMem,
+ JITTargetAddress ResolverTargetAddress,
+ JITTargetAddress ReentryFnAddr,
+ JITTargetAddress ReentryCtxAddr);
+
+ /// Write the requested number of trampolines into the given memory,
+ /// which must be big enough to hold 1 pointer, plus NumTrampolines
+ /// trampolines.
+ static void writeTrampolines(char *TrampolineBlockWorkingMem,
+ JITTargetAddress TrampolineBlockTargetAddress,
+ JITTargetAddress ResolverFnAddr,
+ unsigned NumTrampolines);
+ /// Write NumStubs indirect stubs to working memory at StubsBlockWorkingMem.
+ /// Stubs will be written as if linked at StubsBlockTargetAddress, with the
+ /// Nth stub using the Nth pointer in memory starting at
+ /// PointersBlockTargetAddress.
+ static void writeIndirectStubsBlock(
+ char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
+ JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs);
+};
+
} // end namespace orc
} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp
index 63de9cdfa02ea..eb555b4ddb8b6 100644
--- a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp
@@ -261,6 +261,9 @@ EPCIndirectionUtils::Create(ExecutorProcessControl &EPC) {
case Triple::mips64el:
return CreateWithABI<OrcMips64>(EPC);
+ case Triple::riscv64:
+ return CreateWithABI<OrcRiscv64>(EPC);
+
case Triple::x86_64:
if (TT.getOS() == Triple::OSType::Win32)
return CreateWithABI<OrcX86_64_Win32>(EPC);
diff --git a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
index 02e121cd9a4a0..a3942b1066858 100644
--- a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
@@ -152,6 +152,11 @@ createLocalCompileCallbackManager(const Triple &T, ExecutionSession &ES,
return CCMgrT::Create(ES, ErrorHandlerAddress);
}
+ case Triple::riscv64: {
+ typedef orc::LocalJITCompileCallbackManager<orc::OrcRiscv64> CCMgrT;
+ return CCMgrT::Create(ES, ErrorHandlerAddress);
+ }
+
case Triple::x86_64: {
if (T.getOS() == Triple::OSType::Win32) {
typedef orc::LocalJITCompileCallbackManager<orc::OrcX86_64_Win32> CCMgrT;
@@ -206,6 +211,12 @@ createLocalIndirectStubsManagerBuilder(const Triple &T) {
orc::LocalIndirectStubsManager<orc::OrcMips64>>();
};
+ case Triple::riscv64:
+ return []() {
+ return std::make_unique<
+ orc::LocalIndirectStubsManager<orc::OrcRiscv64>>();
+ };
+
case Triple::x86_64:
if (T.getOS() == Triple::OSType::Win32) {
return [](){
diff --git a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
index 66453e6a632fc..20b655bdf4b10 100644
--- a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
@@ -131,6 +131,10 @@ createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES,
case Triple::mips64el:
return LocalLazyCallThroughManager::Create<OrcMips64>(ES, ErrorHandlerAddr);
+ case Triple::riscv64:
+ return LocalLazyCallThroughManager::Create<OrcRiscv64>(ES,
+ ErrorHandlerAddr);
+
case Triple::x86_64:
if (T.getOS() == Triple::OSType::Win32)
return LocalLazyCallThroughManager::Create<OrcX86_64_Win32>(
diff --git a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
index 18b3c5e12b1c2..ef764a3f0d7fe 100644
--- a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
@@ -906,5 +906,176 @@ void OrcMips64::writeIndirectStubsBlock(
Stub[8 * I + 7] = 0x00000000; // nop
}
}
+
+void OrcRiscv64::writeResolverCode(char *ResolverWorkingMem,
+ JITTargetAddress ResolverTargetAddress,
+ JITTargetAddress ReentryFnAddr,
+ JITTargetAddress ReentryCtxAddr) {
+
+ const uint32_t ResolverCode[] = {
+ 0xef810113, // 0x00: addi sp,sp,-264
+ 0x00813023, // 0x04: sd s0,0(sp)
+ 0x00913423, // 0x08: sd s1,8(sp)
+ 0x01213823, // 0x0c: sd s2,16(sp)
+ 0x01313c23, // 0x10: sd s3,24(sp)
+ 0x03413023, // 0x14: sd s4,32(sp)
+ 0x03513423, // 0x18: sd s5,40(sp)
+ 0x03613823, // 0x1c: sd s6,48(sp)
+ 0x03713c23, // 0x20: sd s7,56(sp)
+ 0x05813023, // 0x24: sd s8,64(sp)
+ 0x05913423, // 0x28: sd s9,72(sp)
+ 0x05a13823, // 0x2c: sd s10,80(sp)
+ 0x05b13c23, // 0x30: sd s11,88(sp)
+ 0x06113023, // 0x34: sd ra,96(sp)
+ 0x06a13423, // 0x38: sd a0,104(sp)
+ 0x06b13823, // 0x3c: sd a1,112(sp)
+ 0x06c13c23, // 0x40: sd a2,120(sp)
+ 0x08d13023, // 0x44: sd a3,128(sp)
+ 0x08e13423, // 0x48: sd a4,136(sp)
+ 0x08f13823, // 0x4c: sd a5,144(sp)
+ 0x09013c23, // 0x50: sd a6,152(sp)
+ 0x0b113023, // 0x54: sd a7,160(sp)
+ 0x0a813427, // 0x58: fsd fs0,168(sp)
+ 0x0a913827, // 0x5c: fsd fs1,176(sp)
+ 0x0b213c27, // 0x60: fsd fs2,184(sp)
+ 0x0d313027, // 0x64: fsd fs3,192(sp)
+ 0x0d413427, // 0x68: fsd fs4,200(sp)
+ 0x0d513827, // 0x6c: fsd fs5,208(sp)
+ 0x0d613c27, // 0x70: fsd fs6,216(sp)
+ 0x0f713027, // 0x74: fsd fs7,224(sp)
+ 0x0f813427, // 0x78: fsd fs8,232(sp)
+ 0x0f913827, // 0x7c: fsd fs9,240(sp)
+ 0x0fa13c27, // 0x80: fsd fs10,248(sp)
+ 0x11b13027, // 0x84: fsd fs11,256(sp)
+ 0x00000517, // 0x88: auipc a0,0x0
+ 0x0b053503, // 0x8c: ld a0,176(a0) # 0x138
+ 0x00030593, // 0x90: mv a1,t1
+ 0xff458593, // 0x94: addi a1,a1,-12
+ 0x00000617, // 0x98: auipc a2,0x0
+ 0x0a863603, // 0x9c: ld a2,168(a2) # 0x140
+ 0x000600e7, // 0xa0: jalr a2
+ 0x00050293, // 0xa4: mv t0,a0
+ 0x00013403, // 0xa8: ld s0,0(sp)
+ 0x00813483, // 0xac: ld s1,8(sp)
+ 0x01013903, // 0xb0: ld s2,16(sp)
+ 0x01813983, // 0xb4: ld s3,24(sp)
+ 0x02013a03, // 0xb8: ld s4,32(sp)
+ 0x02813a83, // 0xbc: ld s5,40(sp)
+ 0x03013b03, // 0xc0: ld s6,48(sp)
+ 0x03813b83, // 0xc4: ld s7,56(sp)
+ 0x04013c03, // 0xc8: ld s8,64(sp)
+ 0x04813c83, // 0xcc: ld s9,72(sp)
+ 0x05013d03, // 0xd0: ld s10,80(sp)
+ 0x05813d83, // 0xd4: ld s11,88(sp)
+ 0x06013083, // 0xd8: ld ra,96(sp)
+ 0x06813503, // 0xdc: ld a0,104(sp)
+ 0x07013583, // 0xe0: ld a1,112(sp)
+ 0x07813603, // 0xe4: ld a2,120(sp)
+ 0x08013683, // 0xe8: ld a3,128(sp)
+ 0x08813703, // 0xec: ld a4,136(sp)
+ 0x09013783, // 0xf0: ld a5,144(sp)
+ 0x09813803, // 0xf4: ld a6,152(sp)
+ 0x0a013883, // 0xf8: ld a7,160(sp)
+ 0x0a813407, // 0xfc: fld fs0,168(sp)
+ 0x0b013487, // 0x100: fld fs1,176(sp)
+ 0x0b813907, // 0x104: fld fs2,184(sp)
+ 0x0c013987, // 0x108: fld fs3,192(sp)
+ 0x0c813a07, // 0x10c: fld fs4,200(sp)
+ 0x0d013a87, // 0x110: fld fs5,208(sp)
+ 0x0d813b07, // 0x114: fld fs6,216(sp)
+ 0x0e013b87, // 0x118: fld fs7,224(sp)
+ 0x0e813c07, // 0x11c: fld fs8,232(sp)
+ 0x0f013c87, // 0x120: fld fs9,240(sp)
+ 0x0f813d07, // 0x124: fld fs10,248(sp)
+ 0x10013d87, // 0x128: fld fs11,256(sp)
+ 0x10810113, // 0x12c: addi sp,sp,264
+ 0x00028067, // 0x130: jr t0
+ 0x12345678, // 0x134: padding to align at 8 byte
+ 0x12345678, // 0x138: Lreentry_ctx_ptr:
+ 0xdeadbeef, // 0x13c: .quad 0
+ 0x98765432, // 0x140: Lreentry_fn_ptr:
+ 0xcafef00d // 0x144: .quad 0
+ };
+
+ const unsigned ReentryCtxAddrOffset = 0x138;
+ const unsigned ReentryFnAddrOffset = 0x140;
+
+ memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
+ memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr,
+ sizeof(uint64_t));
+ memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr,
+ sizeof(uint64_t));
+}
+
+void OrcRiscv64::writeTrampolines(char *TrampolineBlockWorkingMem,
+ JITTargetAddress TrampolineBlockTargetAddress,
+ JITTargetAddress ResolverAddr,
+ unsigned NumTrampolines) {
+
+ unsigned OffsetToPtr = alignTo(NumTrampolines * TrampolineSize, 8);
+
+ memcpy(TrampolineBlockWorkingMem + OffsetToPtr, &ResolverAddr,
+ sizeof(uint64_t));
+
+ uint32_t *Trampolines =
+ reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem);
+ for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) {
+ uint32_t Hi20 = (OffsetToPtr + 0x800) & 0xFFFFF000;
+ uint32_t Lo12 = OffsetToPtr - Hi20;
+ Trampolines[4 * I + 0] = 0x00000297 | Hi20; // auipc t0, %hi(Lptr)
+ Trampolines[4 * I + 1] =
+ 0x0002b283 | ((Lo12 & 0xFFF) << 20); // ld t0, %lo(Lptr)
+ Trampolines[4 * I + 2] = 0x00028367; // jalr t1, t0
+ Trampolines[4 * I + 3] = 0xdeadface; // padding
+ }
+}
+
+void OrcRiscv64::writeIndirectStubsBlock(
+ char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
+ JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) {
+ // Stub format is:
+ //
+ // .section __orc_stubs
+ // stub1:
+ // auipc t0, %hi(ptr1) ; PC-rel load of ptr1
+ // ld t0, %lo(t0)
+ // jr t0 ; Jump to resolver
+ // .quad 0 ; Pad to 16 bytes
+ // stub2:
+ // auipc t0, %hi(ptr1) ; PC-rel load of ptr1
+ // ld t0, %lo(t0)
+ // jr t0 ; Jump to resolver
+ // .quad 0
+ //
+ // ...
+ //
+ // .section __orc_ptrs
+ // ptr1:
+ // .quad 0x0
+ // ptr2:
+ // .quad 0x0
+ //
+ // ...
+
+ assert(stubAndPointerRangesOk<OrcRiscv64>(
+ StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) &&
+ "PointersBlock is out of range");
+
+ uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem);
+
+ for (unsigned I = 0; I < NumStubs; ++I) {
+ uint64_t PtrDisplacement =
+ PointersBlockTargetAddress - StubsBlockTargetAddress;
+ uint32_t Hi20 = (PtrDisplacement + 0x800) & 0xFFFFF000;
+ uint32_t Lo12 = PtrDisplacement - Hi20;
+ Stub[4 * I + 0] = 0x00000297 | Hi20; // auipc t0, %hi(Lptr)
+ Stub[4 * I + 1] = 0x0002b283 | ((Lo12 & 0xFFF) << 20); // ld t0, %lo(Lptr)
+ Stub[4 * I + 2] = 0x00028067; // jr t0
+ Stub[4 * I + 3] = 0xfeedbeef; // padding
+ PointersBlockTargetAddress += PointerSize;
+ StubsBlockTargetAddress += StubSize;
+ }
+}
+
} // End namespace orc.
} // End namespace llvm.
More information about the llvm-commits
mailing list