[llvm] fe1fa43 - [ORC][ORC-RT] Add initial native-TLV support to MachOPlatform.

Lang Hames via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 20 16:13:00 PDT 2021


Author: Lang Hames
Date: 2021-07-21T09:10:10+10:00
New Revision: fe1fa43f16beac1506a2e73a9f7b3c81179744eb

URL: https://github.com/llvm/llvm-project/commit/fe1fa43f16beac1506a2e73a9f7b3c81179744eb
DIFF: https://github.com/llvm/llvm-project/commit/fe1fa43f16beac1506a2e73a9f7b3c81179744eb.diff

LOG: [ORC][ORC-RT] Add initial native-TLV support to MachOPlatform.

Adds code to LLVM (MachOPlatform) and the ORC runtime to support native MachO
thread local variables. Adding new TLVs to a JITDylib at runtime is supported.

On the LLVM side MachOPlatform is updated to:

1. Identify thread local variables in the LinkGraph and lower them to GOT
accesses to data in the __thread_data or __thread_bss sections.

2. Merge and report the address range of __thread_data and thread_bss sections
to the runtime.

On the ORC runtime a MachOTLVManager class introduced which records the address
range of thread data/bss sections, and creates thread-local instances from the
initial data on demand. An orc-runtime specific tlv_get_addr implementation is
included which saves all register state then calls the MachOTLVManager to get
the address of the requested variable for the current thread.

Added: 
    compiler-rt/lib/orc/macho_tlv.x86-64.s
    compiler-rt/test/orc/TestCases/Darwin/x86-64/trivial-tlv.S

Modified: 
    compiler-rt/lib/orc/CMakeLists.txt
    compiler-rt/lib/orc/macho_platform.cpp
    compiler-rt/lib/orc/macho_platform.h
    llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h
    llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
    llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/orc/CMakeLists.txt b/compiler-rt/lib/orc/CMakeLists.txt
index 0a83787d86232..541634e13ab7a 100644
--- a/compiler-rt/lib/orc/CMakeLists.txt
+++ b/compiler-rt/lib/orc/CMakeLists.txt
@@ -11,6 +11,7 @@ set(ORC_SOURCES
 # Implementation files for all ORC architectures.
 set(x86_64_SOURCES
 # x86-64 specific assembly files will go here.
+  macho_tlv.x86-64.s
 )
 
 set(ORC_IMPL_HEADERS

diff  --git a/compiler-rt/lib/orc/macho_platform.cpp b/compiler-rt/lib/orc/macho_platform.cpp
index 8a3f8d96300a5..812cce5b51d0e 100644
--- a/compiler-rt/lib/orc/macho_platform.cpp
+++ b/compiler-rt/lib/orc/macho_platform.cpp
@@ -85,6 +85,12 @@ Error runModInits(const std::vector<ExecutorAddressRange> &ModInitsSections,
   return Error::success();
 }
 
+struct TLVDescriptor {
+  void *(*Thunk)(TLVDescriptor *) = nullptr;
+  unsigned long Key = 0;
+  unsigned long DataAddress = 0;
+};
+
 class MachOPlatformRuntimeState {
 private:
   struct AtExitEntry {
@@ -126,11 +132,17 @@ class MachOPlatformRuntimeState {
   int registerAtExit(void (*F)(void *), void *Arg, void *DSOHandle);
   void runAtExits(void *DSOHandle);
 
+  /// Returns the base address of the section containing ThreadData.
+  Expected<std::pair<const char *, size_t>>
+  getThreadDataSectionFor(const char *ThreadData);
+
 private:
   PerJITDylibState *getJITDylibStateByHeaderAddr(void *DSOHandle);
   PerJITDylibState *getJITDylibStateByName(string_view Path);
   PerJITDylibState &getOrCreateJITDylibState(MachOJITDylibInitializers &MOJDIs);
 
+  Error registerThreadDataSection(span<const char> ThreadDataSec);
+
   Expected<ExecutorAddress> lookupSymbolInJITDylib(void *DSOHandle,
                                                    string_view Symbol);
 
@@ -153,6 +165,9 @@ class MachOPlatformRuntimeState {
   std::recursive_mutex JDStatesMutex;
   std::unordered_map<void *, PerJITDylibState> JDStates;
   std::unordered_map<std::string, void *> JDNameToHeader;
+
+  std::mutex ThreadDataSectionsMutex;
+  std::map<const char *, size_t> ThreadDataSections;
 };
 
 MachOPlatformRuntimeState *MachOPlatformRuntimeState::MOPS = nullptr;
@@ -178,6 +193,12 @@ Error MachOPlatformRuntimeState::registerObjectSections(
     walkEHFrameSection(POSR.EHFrameSection.toSpan<const char>(),
                        __register_frame);
 
+  if (POSR.ThreadDataSection.StartAddress) {
+    if (auto Err = registerThreadDataSection(
+            POSR.ThreadDataSection.toSpan<const char>()))
+      return Err;
+  }
+
   return Error::success();
 }
 
@@ -256,6 +277,19 @@ void MachOPlatformRuntimeState::runAtExits(void *DSOHandle) {
   }
 }
 
+Expected<std::pair<const char *, size_t>>
+MachOPlatformRuntimeState::getThreadDataSectionFor(const char *ThreadData) {
+  std::lock_guard<std::mutex> Lock(ThreadDataSectionsMutex);
+  auto I = ThreadDataSections.upper_bound(ThreadData);
+  // Check that we have a valid entry covering this address.
+  if (I == ThreadDataSections.begin())
+    return make_error<StringError>("No thread local data section for key");
+  I = std::prev(I);
+  if (ThreadData >= I->first + I->second)
+    return make_error<StringError>("No thread local data section for key");
+  return *I;
+}
+
 MachOPlatformRuntimeState::PerJITDylibState *
 MachOPlatformRuntimeState::getJITDylibStateByHeaderAddr(void *DSOHandle) {
   auto I = JDStates.find(DSOHandle);
@@ -295,6 +329,20 @@ MachOPlatformRuntimeState::getOrCreateJITDylibState(
   return JDS;
 }
 
+Error MachOPlatformRuntimeState::registerThreadDataSection(
+    span<const char> ThreadDataSection) {
+  std::lock_guard<std::mutex> Lock(ThreadDataSectionsMutex);
+  auto I = ThreadDataSections.upper_bound(ThreadDataSection.data());
+  if (I != ThreadDataSections.begin()) {
+    auto J = std::prev(I);
+    if (J->first + J->second > ThreadDataSection.data())
+      return make_error<StringError>("Overlapping __thread_data sections");
+  }
+  ThreadDataSections.insert(
+      I, std::make_pair(ThreadDataSection.data(), ThreadDataSection.size()));
+  return Error::success();
+}
+
 Expected<ExecutorAddress>
 MachOPlatformRuntimeState::lookupSymbolInJITDylib(void *DSOHandle,
                                                   string_view Sym) {
@@ -367,6 +415,45 @@ Error MachOPlatformRuntimeState::initializeJITDylib(
   return Error::success();
 }
 
+class MachOPlatformRuntimeTLVManager {
+public:
+  void *getInstance(const char *ThreadData);
+
+private:
+  std::unordered_map<const char *, char *> Instances;
+  std::unordered_map<const char *, std::unique_ptr<char[]>> AllocatedSections;
+};
+
+void *MachOPlatformRuntimeTLVManager::getInstance(const char *ThreadData) {
+  auto I = Instances.find(ThreadData);
+  if (I != Instances.end())
+    return I->second;
+
+  auto TDS =
+      MachOPlatformRuntimeState::get().getThreadDataSectionFor(ThreadData);
+  if (!TDS) {
+    __orc_rt_log_error(toString(TDS.takeError()).c_str());
+    return nullptr;
+  }
+
+  auto &Allocated = AllocatedSections[TDS->first];
+  if (!Allocated) {
+    Allocated = std::make_unique<char[]>(TDS->second);
+    memcpy(Allocated.get(), TDS->first, TDS->second);
+  }
+
+  size_t ThreadDataDelta = ThreadData - TDS->first;
+  assert(ThreadDataDelta <= TDS->second && "ThreadData outside section bounds");
+
+  char *Instance = Allocated.get() + ThreadDataDelta;
+  Instances[ThreadData] = Instance;
+  return Instance;
+}
+
+void destroyMachOTLVMgr(void *MachOTLVMgr) {
+  delete static_cast<MachOPlatformRuntimeTLVManager *>(MachOTLVMgr);
+}
+
 } // end anonymous namespace
 
 //------------------------------------------------------------------------------
@@ -409,6 +496,40 @@ __orc_rt_macho_deregister_object_sections(char *ArgData, size_t ArgSize) {
       .release();
 }
 
+//------------------------------------------------------------------------------
+//                            TLV support
+//------------------------------------------------------------------------------
+
+ORC_RT_INTERFACE void *__orc_rt_macho_tlv_get_addr_impl(TLVDescriptor *D) {
+  auto *TLVMgr = static_cast<MachOPlatformRuntimeTLVManager *>(
+      pthread_getspecific(D->Key));
+  if (!TLVMgr) {
+    TLVMgr = new MachOPlatformRuntimeTLVManager();
+    if (pthread_setspecific(D->Key, TLVMgr)) {
+      __orc_rt_log_error("Call to pthread_setspecific failed");
+      return nullptr;
+    }
+  }
+
+  return TLVMgr->getInstance(
+      reinterpret_cast<char *>(static_cast<uintptr_t>(D->DataAddress)));
+}
+
+ORC_RT_INTERFACE __orc_rt_CWrapperFunctionResult
+__orc_rt_macho_create_pthread_key(char *ArgData, size_t ArgSize) {
+  return WrapperFunction<uint64_t(void)>::handle(
+             ArgData, ArgSize,
+             []() {
+               pthread_key_t Key;
+               if (int Err = pthread_key_create(&Key, destroyMachOTLVMgr)) {
+                 __orc_rt_log_error("Call to pthread_key_create failed");
+                 return ~uint64_t(0);
+               }
+               return static_cast<uint64_t>(Key);
+             })
+      .release();
+}
+
 //------------------------------------------------------------------------------
 //                           cxa_atexit support
 //------------------------------------------------------------------------------

diff  --git a/compiler-rt/lib/orc/macho_platform.h b/compiler-rt/lib/orc/macho_platform.h
index b4abb50e87341..e097c1515ed17 100644
--- a/compiler-rt/lib/orc/macho_platform.h
+++ b/compiler-rt/lib/orc/macho_platform.h
@@ -33,6 +33,7 @@ namespace macho {
 
 struct MachOPerObjectSectionsToRegister {
   ExecutorAddressRange EHFrameSection;
+  ExecutorAddressRange ThreadDataSection;
 };
 
 struct MachOJITDylibInitializers {
@@ -66,7 +67,8 @@ enum dlopen_mode : int {
 
 } // end namespace macho
 
-using SPSMachOPerObjectSectionsToRegister = SPSTuple<SPSExecutorAddressRange>;
+using SPSMachOPerObjectSectionsToRegister =
+    SPSTuple<SPSExecutorAddressRange, SPSExecutorAddressRange>;
 
 template <>
 class SPSSerializationTraits<SPSMachOPerObjectSectionsToRegister,
@@ -75,19 +77,19 @@ class SPSSerializationTraits<SPSMachOPerObjectSectionsToRegister,
 public:
   static size_t size(const macho::MachOPerObjectSectionsToRegister &MOPOSR) {
     return SPSMachOPerObjectSectionsToRegister::AsArgList::size(
-        MOPOSR.EHFrameSection);
+        MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection);
   }
 
   static bool serialize(SPSOutputBuffer &OB,
                         const macho::MachOPerObjectSectionsToRegister &MOPOSR) {
     return SPSMachOPerObjectSectionsToRegister::AsArgList::serialize(
-        OB, MOPOSR.EHFrameSection);
+        OB, MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection);
   }
 
   static bool deserialize(SPSInputBuffer &IB,
                           macho::MachOPerObjectSectionsToRegister &MOPOSR) {
     return SPSMachOPerObjectSectionsToRegister::AsArgList::deserialize(
-        IB, MOPOSR.EHFrameSection);
+        IB, MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection);
   }
 };
 

diff  --git a/compiler-rt/lib/orc/macho_tlv.x86-64.s b/compiler-rt/lib/orc/macho_tlv.x86-64.s
new file mode 100644
index 0000000000000..c77064e0504ba
--- /dev/null
+++ b/compiler-rt/lib/orc/macho_tlv.x86-64.s
@@ -0,0 +1,70 @@
+//===-- orc_rt_macho_tlv.x86-64.s -------------------------------*- ASM -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of the ORC runtime support library.
+//
+//===----------------------------------------------------------------------===//
+
+#define REGISTER_SAVE_SPACE_SIZE        512
+
+        .text
+
+	// returns address of TLV in %rax, all other registers preserved
+	.globl ___orc_rt_macho_tlv_get_addr
+___orc_rt_macho_tlv_get_addr:
+        pushq           %rbp
+        movq            %rsp,        %rbp
+        subq            $REGISTER_SAVE_SPACE_SIZE, %rsp
+        movq            %rbx,     -8(%rbp)
+        movq            %rcx,    -16(%rbp)
+        movq            %rdx,    -24(%rbp)
+        movq            %rsi,    -32(%rbp)
+        movq            %rdi,    -40(%rbp)
+        movq            %r8,     -48(%rbp)
+        movq            %r9,     -56(%rbp)
+        movq            %r10,    -64(%rbp)
+        movq            %r11,    -72(%rbp)
+        movq            %r12,    -80(%rbp)
+        movq            %r13,    -88(%rbp)
+        movq            %r14,    -96(%rbp)
+        movq            %r15,   -104(%rbp)
+	movdqa          %xmm0,  -128(%rbp)
+	movdqa          %xmm1,  -144(%rbp)
+	movdqa          %xmm2,  -160(%rbp)
+	movdqa          %xmm3,  -176(%rbp)
+	movdqa          %xmm4,  -192(%rbp)
+	movdqa          %xmm5,  -208(%rbp)
+	movdqa          %xmm6,  -224(%rbp)
+	movdqa          %xmm7,  -240(%rbp)
+        call            ___orc_rt_macho_tlv_get_addr_impl
+        movq            -8(%rbp),       %rbx
+        movq            -16(%rbp),      %rcx
+        movq            -24(%rbp),      %rdx
+        movq            -32(%rbp),      %rsi
+        movq            -40(%rbp),      %rdi
+        movq            -48(%rbp),      %r8
+        movq            -56(%rbp),      %r9
+        movq            -64(%rbp),      %r10
+        movq            -72(%rbp),      %r11
+        movq            -80(%rbp),      %r12
+        movq            -88(%rbp),      %r13
+        movq            -96(%rbp),      %r14
+        movq            -104(%rbp),     %r15
+        movdqa          -128(%rbp),     %xmm0
+	movdqa          -144(%rbp),     %xmm1
+	movdqa          -160(%rbp),     %xmm2
+	movdqa          -176(%rbp),     %xmm3
+	movdqa          -192(%rbp),     %xmm4
+	movdqa          -208(%rbp),     %xmm5
+	movdqa          -224(%rbp),     %xmm6
+	movdqa          -240(%rbp),     %xmm7
+        addq            $REGISTER_SAVE_SPACE_SIZE, %rsp
+        popq            %rbp
+        ret
+
+.subsections_via_symbols

diff  --git a/compiler-rt/test/orc/TestCases/Darwin/x86-64/trivial-tlv.S b/compiler-rt/test/orc/TestCases/Darwin/x86-64/trivial-tlv.S
new file mode 100644
index 0000000000000..9afff510db6a4
--- /dev/null
+++ b/compiler-rt/test/orc/TestCases/Darwin/x86-64/trivial-tlv.S
@@ -0,0 +1,63 @@
+// RUN: %clang -c -o %t %s
+// RUN: %llvm_jitlink %t
+//
+// Test that basic MachO TLVs work by adding together TLVs with values
+// 0, 1, and -1, and returning the result (0 for success). This setup
+// tests both zero-initialized (__thread_bss) and non-zero-initialized
+// (__thread_data) secitons.
+
+	.section	__TEXT,__text,regular,pure_instructions
+	.build_version macos, 11, 0
+
+        .globl  _main
+        .p2align        4, 0x90
+_main:
+	pushq	%rax
+	movq	_x at TLVP(%rip), %rdi
+	callq	*(%rdi)
+	movq	%rax, %rcx
+	movq	_y at TLVP(%rip), %rdi
+	callq	*(%rdi)
+	movl	(%rax), %edx
+	addl	(%rcx), %edx
+	movq	_z at TLVP(%rip), %rdi
+	callq	*(%rdi)
+	addl	(%rax), %edx
+	movl	%edx, %eax
+	popq	%rcx
+	retq
+
+.tbss _x$tlv$init, 4, 2
+
+	.section	__DATA,__thread_vars,thread_local_variables
+	.globl	_x
+_x:
+	.quad	__tlv_bootstrap
+	.quad	0
+	.quad	_x$tlv$init
+
+	.section	__DATA,__thread_data,thread_local_regular
+	.p2align	2
+_y$tlv$init:
+	.long	4294967295
+
+	.section	__DATA,__thread_vars,thread_local_variables
+	.globl	_y
+_y:
+	.quad	__tlv_bootstrap
+	.quad	0
+	.quad	_y$tlv$init
+
+	.section	__DATA,__thread_data,thread_local_regular
+	.p2align	2
+_z$tlv$init:
+	.long	1
+
+	.section	__DATA,__thread_vars,thread_local_variables
+	.globl	_z
+_z:
+	.quad	__tlv_bootstrap
+	.quad	0
+	.quad	_z$tlv$init
+
+.subsections_via_symbols

diff  --git a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h
index 50f26b399a69c..189ff80a199fb 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h
@@ -29,6 +29,7 @@ namespace orc {
 
 struct MachOPerObjectSectionsToRegister {
   ExecutorAddressRange EHFrameSection;
+  ExecutorAddressRange ThreadDataSection;
 };
 
 struct MachOJITDylibInitializers {
@@ -158,14 +159,16 @@ class MachOPlatform : public Platform {
     void addMachOHeaderSupportPasses(MaterializationResponsibility &MR,
                                      jitlink::PassConfiguration &Config);
 
-    void addEHSupportPasses(MaterializationResponsibility &MR,
-                            jitlink::PassConfiguration &Config);
+    void addEHAndTLVSupportPasses(MaterializationResponsibility &MR,
+                                  jitlink::PassConfiguration &Config);
 
     Error preserveInitSections(jitlink::LinkGraph &G,
                                MaterializationResponsibility &MR);
 
     Error registerInitSections(jitlink::LinkGraph &G, JITDylib &JD);
 
+    Error fixTLVSectionsAndEdges(jitlink::LinkGraph &G, JITDylib &JD);
+
     std::mutex PluginMutex;
     MachOPlatform &MP;
     InitSymbolDepMap InitSymbolDeps;
@@ -213,6 +216,8 @@ class MachOPlatform : public Platform {
 
   Error registerPerObjectSections(const MachOPerObjectSectionsToRegister &POSR);
 
+  Expected<uint64_t> createPThreadKey();
+
   ExecutionSession &ES;
   ObjectLinkingLayer &ObjLinkingLayer;
   ExecutorProcessControl &EPC;
@@ -223,6 +228,7 @@ class MachOPlatform : public Platform {
   ExecutorAddress orc_rt_macho_platform_bootstrap;
   ExecutorAddress orc_rt_macho_platform_shutdown;
   ExecutorAddress orc_rt_macho_register_object_sections;
+  ExecutorAddress orc_rt_macho_create_pthread_key;
 
   DenseMap<JITDylib *, SymbolLookupSet> RegisteredInitSymbols;
 
@@ -233,11 +239,13 @@ class MachOPlatform : public Platform {
   std::vector<MachOPerObjectSectionsToRegister> BootstrapPOSRs;
 
   DenseMap<JITTargetAddress, JITDylib *> HeaderAddrToJITDylib;
+  DenseMap<JITDylib *, uint64_t> JITDylibToPThreadKey;
 };
 
 namespace shared {
 
-using SPSMachOPerObjectSectionsToRegister = SPSTuple<SPSExecutorAddressRange>;
+using SPSMachOPerObjectSectionsToRegister =
+    SPSTuple<SPSExecutorAddressRange, SPSExecutorAddressRange>;
 
 template <>
 class SPSSerializationTraits<SPSMachOPerObjectSectionsToRegister,
@@ -246,19 +254,19 @@ class SPSSerializationTraits<SPSMachOPerObjectSectionsToRegister,
 public:
   static size_t size(const MachOPerObjectSectionsToRegister &MOPOSR) {
     return SPSMachOPerObjectSectionsToRegister::AsArgList::size(
-        MOPOSR.EHFrameSection);
+        MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection);
   }
 
   static bool serialize(SPSOutputBuffer &OB,
                         const MachOPerObjectSectionsToRegister &MOPOSR) {
     return SPSMachOPerObjectSectionsToRegister::AsArgList::serialize(
-        OB, MOPOSR.EHFrameSection);
+        OB, MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection);
   }
 
   static bool deserialize(SPSInputBuffer &IB,
                           MachOPerObjectSectionsToRegister &MOPOSR) {
     return SPSMachOPerObjectSectionsToRegister::AsArgList::deserialize(
-        IB, MOPOSR.EHFrameSection);
+        IB, MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection);
   }
 };
 

diff  --git a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
index d45a8b70e6402..61d5c5e21ff1d 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
@@ -313,6 +313,14 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder {
           Addend = *(const little32_t *)FixupContent - 4;
           Kind = x86_64::RequestGOTAndTransformToDelta32;
           break;
+        case MachOPCRel32TLV:
+          if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum))
+            TargetSymbol = TargetSymbolOrErr->GraphSymbol;
+          else
+            return TargetSymbolOrErr.takeError();
+          Addend = *(const little32_t *)FixupContent;
+          Kind = x86_64::RequestTLVPAndTransformToPCRel32TLVPLoadRelaxable;
+          break;
         case MachOPointer32:
           if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum))
             TargetSymbol = TargetSymbolOrErr->GraphSymbol;
@@ -392,9 +400,6 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder {
           assert(TargetSymbol && "No target symbol from parsePairRelocation?");
           break;
         }
-        case MachOPCRel32TLV:
-          return make_error<JITLinkError>(
-              "MachO TLV relocations not yet supported");
         }
 
         LLVM_DEBUG({

diff  --git a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
index 2a6583249cad4..1cb61a45dcc96 100644
--- a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
@@ -128,6 +128,9 @@ constexpr MachOHeaderMaterializationUnit::HeaderSymbol
 
 StringRef EHFrameSectionName = "__TEXT,__eh_frame";
 StringRef ModInitFuncSectionName = "__DATA,__mod_init_func";
+StringRef ThreadBSSSectionName = "__DATA,__thread_bss";
+StringRef ThreadDataSectionName = "__DATA,__thread_data";
+StringRef ThreadVarsSectionName = "__DATA,__thread_vars";
 
 StringRef InitSectionNames[] = {ModInitFuncSectionName};
 
@@ -467,7 +470,8 @@ Error MachOPlatform::bootstrapMachORuntime(JITDylib &PlatformJD) {
       {"___orc_rt_macho_platform_bootstrap", &orc_rt_macho_platform_bootstrap},
       {"___orc_rt_macho_platform_shutdown", &orc_rt_macho_platform_shutdown},
       {"___orc_rt_macho_register_object_sections",
-       &orc_rt_macho_register_object_sections}};
+       &orc_rt_macho_register_object_sections},
+      {"___orc_rt_macho_create_pthread_key", &orc_rt_macho_create_pthread_key}};
 
   SymbolLookupSet RuntimeSymbols;
   std::vector<std::pair<SymbolStringPtr, ExecutorAddress *>> AddrsToRecord;
@@ -562,6 +566,20 @@ Error MachOPlatform::registerPerObjectSections(
   return ErrResult;
 }
 
+Expected<uint64_t> MachOPlatform::createPThreadKey() {
+  if (!orc_rt_macho_create_pthread_key)
+    return make_error<StringError>(
+        "Attempting to create pthread key in target, but runtime support has "
+        "not been loaded yet",
+        inconvertibleErrorCode());
+
+  uint64_t Result = 0;
+  if (auto Err = EPC.runSPSWrapper<uint64_t(void)>(
+          orc_rt_macho_create_pthread_key.getValue(), Result))
+    return std::move(Err);
+  return Result;
+}
+
 void MachOPlatform::MachOPlatformPlugin::modifyPassConfig(
     MaterializationResponsibility &MR, jitlink::LinkGraph &LG,
     jitlink::PassConfiguration &Config) {
@@ -579,8 +597,8 @@ void MachOPlatform::MachOPlatformPlugin::modifyPassConfig(
   if (MR.getInitializerSymbol())
     addInitializerSupportPasses(MR, Config);
 
-  // Add passes for eh-frame support.
-  addEHSupportPasses(MR, Config);
+  // Add passes for eh-frame and TLV support.
+  addEHAndTLVSupportPasses(MR, Config);
 }
 
 ObjectLinkingLayer::Plugin::SyntheticSymbolDependenciesMap
@@ -634,10 +652,18 @@ void MachOPlatform::MachOPlatformPlugin::addMachOHeaderSupportPasses(
   });
 }
 
-void MachOPlatform::MachOPlatformPlugin::addEHSupportPasses(
+void MachOPlatform::MachOPlatformPlugin::addEHAndTLVSupportPasses(
     MaterializationResponsibility &MR, jitlink::PassConfiguration &Config) {
 
-  // Add a pass to register the final addresses of the eh-frame sections
+  // Insert TLV lowering at the start of the PostPrunePasses, since we want
+  // it to run before GOT/PLT lowering.
+  Config.PostPrunePasses.insert(
+      Config.PostPrunePasses.begin(),
+      [this, &JD = MR.getTargetJITDylib()](jitlink::LinkGraph &G) {
+        return fixTLVSectionsAndEdges(G, JD);
+      });
+
+  // Add a pass to register the final addresses of the eh-frame and TLV sections
   // with the runtime.
   Config.PostFixupPasses.push_back([this](jitlink::LinkGraph &G) -> Error {
     MachOPerObjectSectionsToRegister POSR;
@@ -649,7 +675,33 @@ void MachOPlatform::MachOPlatformPlugin::addEHSupportPasses(
                                ExecutorAddress(R.getEnd())};
     }
 
-    if (POSR.EHFrameSection.StartAddress) {
+    // Get a pointer to the thread data section if there is one. It will be used
+    // below.
+    jitlink::Section *ThreadDataSection =
+        G.findSectionByName(ThreadDataSectionName);
+
+    // Handle thread BSS section if there is one.
+    if (auto *ThreadBSSSection = G.findSectionByName(ThreadBSSSectionName)) {
+      // If there's already a thread data section in this graph then merge the
+      // thread BSS section content into it, otherwise just treat the thread
+      // BSS section as the thread data section.
+      if (ThreadDataSection)
+        G.mergeSections(*ThreadDataSection, *ThreadBSSSection);
+      else
+        ThreadDataSection = ThreadBSSSection;
+    }
+
+    // Having merged thread BSS (if present) and thread data (if present),
+    // record the resulting section range.
+    if (ThreadDataSection) {
+      jitlink::SectionRange R(*ThreadDataSection);
+      if (!R.empty())
+        POSR.ThreadDataSection = {ExecutorAddress(R.getStart()),
+                                  ExecutorAddress(R.getEnd())};
+    }
+
+    if (POSR.EHFrameSection.StartAddress ||
+        POSR.ThreadDataSection.StartAddress) {
 
       // If we're still bootstrapping the runtime then just record this
       // frame for now.
@@ -727,5 +779,61 @@ Error MachOPlatform::MachOPlatformPlugin::registerInitSections(
   return MP.registerInitInfo(JD, InitSections);
 }
 
+Error MachOPlatform::MachOPlatformPlugin::fixTLVSectionsAndEdges(
+    jitlink::LinkGraph &G, JITDylib &JD) {
+
+  // Rename external references to __tlv_bootstrap to ___orc_rt_tlv_get_addr.
+  for (auto *Sym : G.external_symbols())
+    if (Sym->getName() == "__tlv_bootstrap") {
+      Sym->setName("___orc_rt_macho_tlv_get_addr");
+      break;
+    }
+
+  // Store key in __thread_vars struct fields.
+  if (auto *ThreadDataSec = G.findSectionByName(ThreadVarsSectionName)) {
+    Optional<uint64_t> Key;
+    {
+      std::lock_guard<std::mutex> Lock(MP.PlatformMutex);
+      auto I = MP.JITDylibToPThreadKey.find(&JD);
+      if (I != MP.JITDylibToPThreadKey.end())
+        Key = I->second;
+    }
+
+    if (!Key) {
+      if (auto KeyOrErr = MP.createPThreadKey())
+        Key = *KeyOrErr;
+      else
+        return KeyOrErr.takeError();
+    }
+
+    uint64_t PlatformKeyBits =
+        support::endian::byte_swap(*Key, G.getEndianness());
+
+    for (auto *B : ThreadDataSec->blocks()) {
+      if (B->getSize() != 3 * G.getPointerSize())
+        return make_error<StringError>("__thread_vars block at " +
+                                           formatv("{0:x}", B->getAddress()) +
+                                           " has unexpected size",
+                                       inconvertibleErrorCode());
+
+      auto NewBlockContent = G.allocateBuffer(B->getSize());
+      llvm::copy(B->getContent(), NewBlockContent.data());
+      memcpy(NewBlockContent.data() + G.getPointerSize(), &PlatformKeyBits,
+             G.getPointerSize());
+      B->setContent(NewBlockContent);
+    }
+  }
+
+  // Transform any TLV edges into GOT edges.
+  for (auto *B : G.blocks())
+    for (auto &E : B->edges())
+      if (E.getKind() ==
+          jitlink::x86_64::RequestTLVPAndTransformToPCRel32TLVPLoadRelaxable)
+        E.setKind(
+            jitlink::x86_64::RequestGOTAndTransformToPCRel32GOTLoadRelaxable);
+
+  return Error::success();
+}
+
 } // End namespace orc.
 } // End namespace llvm.


        


More information about the llvm-commits mailing list