[llvm] acb408f - [ORC] add lazy jit support for riscv64

Alex Fan via llvm-commits llvm-commits at lists.llvm.org
Sun Apr 10 03:44:56 PDT 2022


Author: Alex Fan
Date: 2022-04-10T18:44:50+08:00
New Revision: acb408fbbc9d77b75c8ed2f36805f59e6a640e2b

URL: https://github.com/llvm/llvm-project/commit/acb408fbbc9d77b75c8ed2f36805f59e6a640e2b
DIFF: https://github.com/llvm/llvm-project/commit/acb408fbbc9d77b75c8ed2f36805f59e6a640e2b.diff

LOG: [ORC] add lazy jit support for riscv64

This adds resolver, indirection and trampoline stubs for riscv64,
allowing lazy compilation to work.

It assumes hard float extension exists. I don't know the proper way to detect it as Triple doesn't provide the interface to check riscv +f +d abi.

I am also not sure if orclazy tests should be enabled because lli needs an additional -codemodel=melany for tests to pass.

Reviewed By: lhames

Differential Revision: https://reviews.llvm.org/D122543

Added: 
    

Modified: 
    llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
    llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp
    llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
    llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
    llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
index 82dfdc2701287..c5c2780bc9ee5 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
@@ -330,6 +330,45 @@ class OrcMips64 {
       JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs);
 };
 
+// @brief riscv64 support.
+//
+// RISC-V 64 supports lazy JITing.
+class OrcRiscv64 {
+public:
+  static constexpr unsigned PointerSize = 8;
+  static constexpr unsigned TrampolineSize = 16;
+  static constexpr unsigned StubSize = 16;
+  static constexpr unsigned StubToPointerMaxDisplacement = 1 << 31;
+  static constexpr unsigned ResolverCodeSize = 0x148;
+
+  /// Write the resolver code into the given memory. The user is
+  /// responsible for allocating the memory and setting permissions.
+  ///
+  /// ReentryFnAddr should be the address of a function whose signature matches
+  /// void* (*)(void *TrampolineAddr, void *ReentryCtxAddr). The ReentryCtxAddr
+  /// argument of writeResolverCode will be passed as the second argument to
+  /// the function at ReentryFnAddr.
+  static void writeResolverCode(char *ResolverWorkingMem,
+                                JITTargetAddress ResolverTargetAddress,
+                                JITTargetAddress ReentryFnAddr,
+                                JITTargetAddress ReentryCtxAddr);
+
+  /// Write the requested number of trampolines into the given memory,
+  /// which must be big enough to hold 1 pointer, plus NumTrampolines
+  /// trampolines.
+  static void writeTrampolines(char *TrampolineBlockWorkingMem,
+                               JITTargetAddress TrampolineBlockTargetAddress,
+                               JITTargetAddress ResolverFnAddr,
+                               unsigned NumTrampolines);
+  /// Write NumStubs indirect stubs to working memory at StubsBlockWorkingMem.
+  /// Stubs will be written as if linked at StubsBlockTargetAddress, with the
+  /// Nth stub using the Nth pointer in memory starting at
+  /// PointersBlockTargetAddress.
+  static void writeIndirectStubsBlock(
+      char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
+      JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs);
+};
+
 } // end namespace orc
 } // end namespace llvm
 

diff  --git a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp
index 63de9cdfa02ea..eb555b4ddb8b6 100644
--- a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp
@@ -261,6 +261,9 @@ EPCIndirectionUtils::Create(ExecutorProcessControl &EPC) {
   case Triple::mips64el:
     return CreateWithABI<OrcMips64>(EPC);
 
+  case Triple::riscv64:
+    return CreateWithABI<OrcRiscv64>(EPC);
+
   case Triple::x86_64:
     if (TT.getOS() == Triple::OSType::Win32)
       return CreateWithABI<OrcX86_64_Win32>(EPC);

diff  --git a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
index 02e121cd9a4a0..a3942b1066858 100644
--- a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
@@ -152,6 +152,11 @@ createLocalCompileCallbackManager(const Triple &T, ExecutionSession &ES,
       return CCMgrT::Create(ES, ErrorHandlerAddress);
     }
 
+    case Triple::riscv64: {
+      typedef orc::LocalJITCompileCallbackManager<orc::OrcRiscv64> CCMgrT;
+      return CCMgrT::Create(ES, ErrorHandlerAddress);
+    }
+
     case Triple::x86_64: {
       if (T.getOS() == Triple::OSType::Win32) {
         typedef orc::LocalJITCompileCallbackManager<orc::OrcX86_64_Win32> CCMgrT;
@@ -206,6 +211,12 @@ createLocalIndirectStubsManagerBuilder(const Triple &T) {
                       orc::LocalIndirectStubsManager<orc::OrcMips64>>();
       };
 
+    case Triple::riscv64:
+      return []() {
+        return std::make_unique<
+            orc::LocalIndirectStubsManager<orc::OrcRiscv64>>();
+      };
+
     case Triple::x86_64:
       if (T.getOS() == Triple::OSType::Win32) {
         return [](){

diff  --git a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
index 66453e6a632fc..20b655bdf4b10 100644
--- a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
@@ -131,6 +131,10 @@ createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES,
   case Triple::mips64el:
     return LocalLazyCallThroughManager::Create<OrcMips64>(ES, ErrorHandlerAddr);
 
+  case Triple::riscv64:
+    return LocalLazyCallThroughManager::Create<OrcRiscv64>(ES,
+                                                           ErrorHandlerAddr);
+
   case Triple::x86_64:
     if (T.getOS() == Triple::OSType::Win32)
       return LocalLazyCallThroughManager::Create<OrcX86_64_Win32>(

diff  --git a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
index 18b3c5e12b1c2..ef764a3f0d7fe 100644
--- a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
@@ -906,5 +906,176 @@ void OrcMips64::writeIndirectStubsBlock(
     Stub[8 * I + 7] = 0x00000000;                            // nop
   }
 }
+
+void OrcRiscv64::writeResolverCode(char *ResolverWorkingMem,
+                                   JITTargetAddress ResolverTargetAddress,
+                                   JITTargetAddress ReentryFnAddr,
+                                   JITTargetAddress ReentryCtxAddr) {
+
+  const uint32_t ResolverCode[] = {
+      0xef810113, // 0x00: addi sp,sp,-264
+      0x00813023, // 0x04: sd s0,0(sp)
+      0x00913423, // 0x08: sd s1,8(sp)
+      0x01213823, // 0x0c: sd s2,16(sp)
+      0x01313c23, // 0x10: sd s3,24(sp)
+      0x03413023, // 0x14: sd s4,32(sp)
+      0x03513423, // 0x18: sd s5,40(sp)
+      0x03613823, // 0x1c: sd s6,48(sp)
+      0x03713c23, // 0x20: sd s7,56(sp)
+      0x05813023, // 0x24: sd s8,64(sp)
+      0x05913423, // 0x28: sd s9,72(sp)
+      0x05a13823, // 0x2c: sd s10,80(sp)
+      0x05b13c23, // 0x30: sd s11,88(sp)
+      0x06113023, // 0x34: sd ra,96(sp)
+      0x06a13423, // 0x38: sd a0,104(sp)
+      0x06b13823, // 0x3c: sd a1,112(sp)
+      0x06c13c23, // 0x40: sd a2,120(sp)
+      0x08d13023, // 0x44: sd a3,128(sp)
+      0x08e13423, // 0x48: sd a4,136(sp)
+      0x08f13823, // 0x4c: sd a5,144(sp)
+      0x09013c23, // 0x50: sd a6,152(sp)
+      0x0b113023, // 0x54: sd a7,160(sp)
+      0x0a813427, // 0x58: fsd fs0,168(sp)
+      0x0a913827, // 0x5c: fsd fs1,176(sp)
+      0x0b213c27, // 0x60: fsd fs2,184(sp)
+      0x0d313027, // 0x64: fsd fs3,192(sp)
+      0x0d413427, // 0x68: fsd fs4,200(sp)
+      0x0d513827, // 0x6c: fsd fs5,208(sp)
+      0x0d613c27, // 0x70: fsd fs6,216(sp)
+      0x0f713027, // 0x74: fsd fs7,224(sp)
+      0x0f813427, // 0x78: fsd fs8,232(sp)
+      0x0f913827, // 0x7c: fsd fs9,240(sp)
+      0x0fa13c27, // 0x80: fsd fs10,248(sp)
+      0x11b13027, // 0x84: fsd fs11,256(sp)
+      0x00000517, // 0x88: auipc a0,0x0
+      0x0b053503, // 0x8c: ld a0,176(a0) # 0x138
+      0x00030593, // 0x90: mv a1,t1
+      0xff458593, // 0x94: addi a1,a1,-12
+      0x00000617, // 0x98: auipc a2,0x0
+      0x0a863603, // 0x9c: ld a2,168(a2) # 0x140
+      0x000600e7, // 0xa0: jalr a2
+      0x00050293, // 0xa4: mv t0,a0
+      0x00013403, // 0xa8: ld s0,0(sp)
+      0x00813483, // 0xac: ld s1,8(sp)
+      0x01013903, // 0xb0: ld s2,16(sp)
+      0x01813983, // 0xb4: ld s3,24(sp)
+      0x02013a03, // 0xb8: ld s4,32(sp)
+      0x02813a83, // 0xbc: ld s5,40(sp)
+      0x03013b03, // 0xc0: ld s6,48(sp)
+      0x03813b83, // 0xc4: ld s7,56(sp)
+      0x04013c03, // 0xc8: ld s8,64(sp)
+      0x04813c83, // 0xcc: ld s9,72(sp)
+      0x05013d03, // 0xd0: ld s10,80(sp)
+      0x05813d83, // 0xd4: ld s11,88(sp)
+      0x06013083, // 0xd8: ld ra,96(sp)
+      0x06813503, // 0xdc: ld a0,104(sp)
+      0x07013583, // 0xe0: ld a1,112(sp)
+      0x07813603, // 0xe4: ld a2,120(sp)
+      0x08013683, // 0xe8: ld a3,128(sp)
+      0x08813703, // 0xec: ld a4,136(sp)
+      0x09013783, // 0xf0: ld a5,144(sp)
+      0x09813803, // 0xf4: ld a6,152(sp)
+      0x0a013883, // 0xf8: ld a7,160(sp)
+      0x0a813407, // 0xfc: fld fs0,168(sp)
+      0x0b013487, // 0x100: fld fs1,176(sp)
+      0x0b813907, // 0x104: fld fs2,184(sp)
+      0x0c013987, // 0x108: fld fs3,192(sp)
+      0x0c813a07, // 0x10c: fld fs4,200(sp)
+      0x0d013a87, // 0x110: fld fs5,208(sp)
+      0x0d813b07, // 0x114: fld fs6,216(sp)
+      0x0e013b87, // 0x118: fld fs7,224(sp)
+      0x0e813c07, // 0x11c: fld fs8,232(sp)
+      0x0f013c87, // 0x120: fld fs9,240(sp)
+      0x0f813d07, // 0x124: fld fs10,248(sp)
+      0x10013d87, // 0x128: fld fs11,256(sp)
+      0x10810113, // 0x12c: addi sp,sp,264
+      0x00028067, // 0x130: jr t0
+      0x12345678, // 0x134: padding to align at 8 byte
+      0x12345678, // 0x138: Lreentry_ctx_ptr:
+      0xdeadbeef, // 0x13c:      .quad 0
+      0x98765432, // 0x140: Lreentry_fn_ptr:
+      0xcafef00d  // 0x144:      .quad 0
+  };
+
+  const unsigned ReentryCtxAddrOffset = 0x138;
+  const unsigned ReentryFnAddrOffset = 0x140;
+
+  memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
+  memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr,
+         sizeof(uint64_t));
+  memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr,
+         sizeof(uint64_t));
+}
+
+void OrcRiscv64::writeTrampolines(char *TrampolineBlockWorkingMem,
+                                  JITTargetAddress TrampolineBlockTargetAddress,
+                                  JITTargetAddress ResolverAddr,
+                                  unsigned NumTrampolines) {
+
+  unsigned OffsetToPtr = alignTo(NumTrampolines * TrampolineSize, 8);
+
+  memcpy(TrampolineBlockWorkingMem + OffsetToPtr, &ResolverAddr,
+         sizeof(uint64_t));
+
+  uint32_t *Trampolines =
+      reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem);
+  for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) {
+    uint32_t Hi20 = (OffsetToPtr + 0x800) & 0xFFFFF000;
+    uint32_t Lo12 = OffsetToPtr - Hi20;
+    Trampolines[4 * I + 0] = 0x00000297 | Hi20; // auipc t0, %hi(Lptr)
+    Trampolines[4 * I + 1] =
+        0x0002b283 | ((Lo12 & 0xFFF) << 20);    // ld t0, %lo(Lptr)
+    Trampolines[4 * I + 2] = 0x00028367;        // jalr t1, t0
+    Trampolines[4 * I + 3] = 0xdeadface;        // padding
+  }
+}
+
+void OrcRiscv64::writeIndirectStubsBlock(
+    char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
+    JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) {
+  // Stub format is:
+  //
+  // .section __orc_stubs
+  // stub1:
+  //                 auipc   t0, %hi(ptr1)  ; PC-rel load of ptr1
+  //                 ld      t0, %lo(t0)
+  //                 jr      t0             ; Jump to resolver
+  //                 .quad 0                ; Pad to 16 bytes
+  // stub2:
+  //                 auipc   t0, %hi(ptr1)  ; PC-rel load of ptr1
+  //                 ld      t0, %lo(t0)
+  //                 jr      t0             ; Jump to resolver
+  //                 .quad 0
+  //
+  // ...
+  //
+  // .section __orc_ptrs
+  // ptr1:
+  //                 .quad 0x0
+  // ptr2:
+  //                 .quad 0x0
+  //
+  // ...
+
+  assert(stubAndPointerRangesOk<OrcRiscv64>(
+             StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) &&
+         "PointersBlock is out of range");
+
+  uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem);
+
+  for (unsigned I = 0; I < NumStubs; ++I) {
+    uint64_t PtrDisplacement =
+        PointersBlockTargetAddress - StubsBlockTargetAddress;
+    uint32_t Hi20 = (PtrDisplacement + 0x800) & 0xFFFFF000;
+    uint32_t Lo12 = PtrDisplacement - Hi20;
+    Stub[4 * I + 0] = 0x00000297 | Hi20;                   // auipc t0, %hi(Lptr)
+    Stub[4 * I + 1] = 0x0002b283 | ((Lo12 & 0xFFF) << 20); // ld t0, %lo(Lptr)
+    Stub[4 * I + 2] = 0x00028067;                          // jr t0
+    Stub[4 * I + 3] = 0xfeedbeef;                          // padding
+    PointersBlockTargetAddress += PointerSize;
+    StubsBlockTargetAddress += StubSize;
+  }
+}
+
 } // End namespace orc.
 } // End namespace llvm.


        


More information about the llvm-commits mailing list