[llvm] 27a79b7 - [JITLink] Add a MachO x86-64 GOT and Stub bypass optimization.

Lang Hames via llvm-commits llvm-commits at lists.llvm.org
Sun Feb 23 23:39:09 PST 2020


Author: Lang Hames
Date: 2020-02-23T23:38:31-08:00
New Revision: 27a79b721628ce0814cdc7b4d5267717bcc52421

URL: https://github.com/llvm/llvm-project/commit/27a79b721628ce0814cdc7b4d5267717bcc52421
DIFF: https://github.com/llvm/llvm-project/commit/27a79b721628ce0814cdc7b4d5267717bcc52421.diff

LOG: [JITLink] Add a MachO x86-64 GOT and Stub bypass optimization.

This optimization bypasses GOT loads and calls/branches through stubs when the
ultimate target of the access/branch is found to be within range of the
reference.

Extra debugging output is also added to the generic JITLink algorithm and
basic GOT and Stubs builder utility to aid debugging.

Added: 
    llvm/test/ExecutionEngine/JITLink/X86/Inputs/MachO_GOTAndStubsOptimizationHelper.s
    llvm/test/ExecutionEngine/JITLink/X86/MachO_GOTAndStubsOptimization.s

Modified: 
    llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
    llvm/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h
    llvm/lib/ExecutionEngine/JITLink/BasicGOTAndStubsBuilder.h
    llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
    llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h
    llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
    llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_relocations.s

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
index d673a89386da..967397f10ad3 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
@@ -1176,7 +1176,7 @@ struct PassConfiguration {
   /// Pre-prune passes.
   ///
   /// These passes are called on the graph after it is built, and before any
-  /// symbols have been pruned.
+  /// symbols have been pruned. Graph nodes still have their original vmaddrs.
   ///
   /// Notable use cases: Marking symbols live or should-discard.
   LinkGraphPassList PrePrunePasses;
@@ -1184,15 +1184,26 @@ struct PassConfiguration {
   /// Post-prune passes.
   ///
   /// These passes are called on the graph after dead stripping, but before
-  /// fixups are applied.
+  /// memory is allocated or nodes assigned their final addresses.
   ///
   /// Notable use cases: Building GOT, stub, and TLV symbols.
   LinkGraphPassList PostPrunePasses;
 
+  /// Pre-fixup passes.
+  ///
+  /// These passes are called on the graph after memory has been allocated,
+  /// content copied into working memory, and nodes have been assigned their
+  /// final addresses.
+  ///
+  /// Notable use cases: Late link-time optimizations like GOT and stub
+  /// elimination.
+  LinkGraphPassList PostAllocationPasses;
+
   /// Post-fixup passes.
   ///
   /// These passes are called on the graph after block contents has been copied
-  /// to working memory, and fixups applied.
+  /// to working memory, and fixups applied. Graph nodes have been updated to
+  /// their final target vmaddrs.
   ///
   /// Notable use cases: Testing and validation.
   LinkGraphPassList PostFixupPasses;

diff  --git a/llvm/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h b/llvm/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h
index 00a7feb86e83..27fcdf4fa990 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h
@@ -22,6 +22,7 @@ namespace MachO_x86_64_Edges {
 
 enum MachOX86RelocationKind : Edge::Kind {
   Branch32 = Edge::FirstRelocation,
+  Branch32ToStub,
   Pointer32,
   Pointer64,
   Pointer64Anon,

diff  --git a/llvm/lib/ExecutionEngine/JITLink/BasicGOTAndStubsBuilder.h b/llvm/lib/ExecutionEngine/JITLink/BasicGOTAndStubsBuilder.h
index b47a798c7603..82258a35a675 100644
--- a/llvm/lib/ExecutionEngine/JITLink/BasicGOTAndStubsBuilder.h
+++ b/llvm/lib/ExecutionEngine/JITLink/BasicGOTAndStubsBuilder.h
@@ -15,6 +15,8 @@
 
 #include "llvm/ExecutionEngine/JITLink/JITLink.h"
 
+#define DEBUG_TYPE "jitlink"
+
 namespace llvm {
 namespace jitlink {
 
@@ -27,12 +29,25 @@ template <typename BuilderImpl> class BasicGOTAndStubsBuilder {
     // the newly added ones, so just copy the existing blocks out.
     std::vector<Block *> Blocks(G.blocks().begin(), G.blocks().end());
 
+    LLVM_DEBUG(dbgs() << "Creating GOT entries and stubs:\n");
+
     for (auto *B : Blocks)
       for (auto &E : B->edges())
-        if (impl().isGOTEdge(E))
+        if (impl().isGOTEdge(E)) {
+          LLVM_DEBUG({
+            dbgs() << "  Updating GOT edge ";
+            printEdge(dbgs(), *B, E, "<target GOT>");
+            dbgs() << "\n";
+          });
           impl().fixGOTEdge(E, getGOTEntrySymbol(E.getTarget()));
-        else if (impl().isExternalBranchEdge(E))
+        } else if (impl().isExternalBranchEdge(E)) {
+          LLVM_DEBUG({
+            dbgs() << "  Updating external branch edge ";
+            printEdge(dbgs(), *B, E, "<target PC-rel>");
+            dbgs() << "\n";
+          });
           impl().fixExternalBranchEdge(E, getStubSymbol(E.getTarget()));
+        }
   }
 
 protected:
@@ -44,11 +59,17 @@ template <typename BuilderImpl> class BasicGOTAndStubsBuilder {
     // Build the entry if it doesn't exist.
     if (GOTEntryI == GOTEntries.end()) {
       auto &GOTEntry = impl().createGOTEntry(Target);
+      LLVM_DEBUG({
+        dbgs() << "    Created GOT entry for " << Target.getName() << ": "
+               << GOTEntry << "\n";
+      });
       GOTEntryI =
           GOTEntries.insert(std::make_pair(Target.getName(), &GOTEntry)).first;
     }
 
     assert(GOTEntryI != GOTEntries.end() && "Could not get GOT entry symbol");
+    LLVM_DEBUG(
+        { dbgs() << "    Using GOT entry " << *GOTEntryI->second << "\n"; });
     return *GOTEntryI->second;
   }
 
@@ -59,10 +80,15 @@ template <typename BuilderImpl> class BasicGOTAndStubsBuilder {
 
     if (StubI == Stubs.end()) {
       auto &StubSymbol = impl().createStub(Target);
+      LLVM_DEBUG({
+        dbgs() << "    Created stub for " << Target.getName() << ": "
+               << StubSymbol << "\n";
+      });
       StubI = Stubs.insert(std::make_pair(Target.getName(), &StubSymbol)).first;
     }
 
     assert(StubI != Stubs.end() && "Count not get stub symbol");
+    LLVM_DEBUG({ dbgs() << "    Using stub " << *StubI->second << "\n"; });
     return *StubI->second;
   }
 

diff  --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
index 7b594fd2c0ea..ec6681c823f6 100644
--- a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
@@ -24,6 +24,8 @@ JITLinkerBase::~JITLinkerBase() {}
 
 void JITLinkerBase::linkPhase1(std::unique_ptr<JITLinkerBase> Self) {
 
+  LLVM_DEBUG({ dbgs() << "Building jitlink graph for new input...\n"; });
+
   // Build the link graph.
   if (auto GraphOrErr = buildGraph(Ctx->getObjectBuffer()))
     G = std::move(*GraphOrErr);
@@ -31,6 +33,10 @@ void JITLinkerBase::linkPhase1(std::unique_ptr<JITLinkerBase> Self) {
     return Ctx->notifyFailed(GraphOrErr.takeError());
   assert(G && "Graph should have been created by buildGraph above");
 
+  LLVM_DEBUG({
+    dbgs() << "Starting link phase 1 for graph " << G->getName() << "\n";
+  });
+
   // Prune and optimize the graph.
   if (auto Err = runPasses(Passes.PrePrunePasses))
     return Ctx->notifyFailed(std::move(Err));
@@ -59,10 +65,17 @@ void JITLinkerBase::linkPhase1(std::unique_ptr<JITLinkerBase> Self) {
     return Ctx->notifyFailed(std::move(Err));
 
   // Notify client that the defined symbols have been assigned addresses.
+  LLVM_DEBUG(
+      { dbgs() << "Resolving symbols defined in " << G->getName() << "\n"; });
   Ctx->notifyResolved(*G);
 
   auto ExternalSymbols = getExternalSymbolNames();
 
+  LLVM_DEBUG({
+    dbgs() << "Issuing lookup for external symbols for " << G->getName()
+           << " (may trigger materialization/linking of other graphs)...\n";
+  });
+
   // We're about to hand off ownership of ourself to the continuation. Grab a
   // pointer to the context so that we can call it to initiate the lookup.
   //
@@ -87,6 +100,11 @@ void JITLinkerBase::linkPhase1(std::unique_ptr<JITLinkerBase> Self) {
 void JITLinkerBase::linkPhase2(std::unique_ptr<JITLinkerBase> Self,
                                Expected<AsyncLookupResult> LR,
                                SegmentLayoutMap Layout) {
+
+  LLVM_DEBUG({
+    dbgs() << "Starting link phase 2 for graph " << G->getName() << "\n";
+  });
+
   // If the lookup failed, bail out.
   if (!LR)
     return deallocateAndBailOut(LR.takeError());
@@ -94,13 +112,25 @@ void JITLinkerBase::linkPhase2(std::unique_ptr<JITLinkerBase> Self,
   // Assign addresses to external addressables.
   applyLookupResult(*LR);
 
+  // Copy block content to working memory.
+  copyBlockContentToWorkingMemory(Layout, *Alloc);
+
+  LLVM_DEBUG({
+    dbgs() << "Link graph \"" << G->getName()
+           << "\" before post-allocation passes:\n";
+    dumpGraph(dbgs());
+  });
+
+  if (auto Err = runPasses(Passes.PostAllocationPasses))
+    return deallocateAndBailOut(std::move(Err));
+
   LLVM_DEBUG({
     dbgs() << "Link graph \"" << G->getName() << "\" before copy-and-fixup:\n";
     dumpGraph(dbgs());
   });
 
-  // Copy block content to working memory and fix up.
-  if (auto Err = copyAndFixUpBlocks(Layout, *Alloc))
+  // Fix up block content.
+  if (auto Err = fixUpBlocks(*G))
     return deallocateAndBailOut(std::move(Err));
 
   LLVM_DEBUG({
@@ -122,9 +152,16 @@ void JITLinkerBase::linkPhase2(std::unique_ptr<JITLinkerBase> Self,
 }
 
 void JITLinkerBase::linkPhase3(std::unique_ptr<JITLinkerBase> Self, Error Err) {
+
+  LLVM_DEBUG({
+    dbgs() << "Starting link phase 3 for graph " << G->getName() << "\n";
+  });
+
   if (Err)
     return deallocateAndBailOut(std::move(Err));
   Ctx->notifyFinalized(std::move(Alloc));
+
+  LLVM_DEBUG({ dbgs() << "Link of graph " << G->getName() << " complete\n"; });
 }
 
 Error JITLinkerBase::runPasses(LinkGraphPassList &Passes) {
@@ -165,7 +202,7 @@ JITLinkerBase::SegmentLayoutMap JITLinkerBase::layOutBlocks() {
   }
 
   LLVM_DEBUG({
-    dbgs() << "Segment ordering:\n";
+    dbgs() << "Computed segment ordering:\n";
     for (auto &KV : Layout) {
       dbgs() << "  Segment "
              << static_cast<sys::Memory::ProtectionFlags>(KV.first) << ":\n";
@@ -302,6 +339,77 @@ void JITLinkerBase::applyLookupResult(AsyncLookupResult Result) {
          "All strong external symbols should have been resolved by now");
 }
 
+void JITLinkerBase::copyBlockContentToWorkingMemory(
+    const SegmentLayoutMap &Layout, JITLinkMemoryManager::Allocation &Alloc) {
+
+  LLVM_DEBUG(dbgs() << "Copying block content:\n");
+  for (auto &KV : Layout) {
+    auto &Prot = KV.first;
+    auto &SegLayout = KV.second;
+
+    auto SegMem =
+        Alloc.getWorkingMemory(static_cast<sys::Memory::ProtectionFlags>(Prot));
+    char *LastBlockEnd = SegMem.data();
+    char *BlockDataPtr = LastBlockEnd;
+
+    LLVM_DEBUG({
+      dbgs() << "  Processing segment "
+             << static_cast<sys::Memory::ProtectionFlags>(Prot) << " [ "
+             << (const void *)SegMem.data() << " .. "
+             << (const void *)((char *)SegMem.data() + SegMem.size())
+             << " ]\n    Processing content sections:\n";
+    });
+
+    for (auto *B : SegLayout.ContentBlocks) {
+      LLVM_DEBUG(dbgs() << "    " << *B << ":\n");
+
+      // Pad to alignment/alignment-offset.
+      BlockDataPtr = alignToBlock(BlockDataPtr, *B);
+
+      LLVM_DEBUG({
+        dbgs() << "      Bumped block pointer to " << (const void *)BlockDataPtr
+               << " to meet block alignment " << B->getAlignment()
+               << " and alignment offset " << B->getAlignmentOffset() << "\n";
+      });
+
+      // Zero pad up to alignment.
+      LLVM_DEBUG({
+        if (LastBlockEnd != BlockDataPtr)
+          dbgs() << "      Zero padding from " << (const void *)LastBlockEnd
+                 << " to " << (const void *)BlockDataPtr << "\n";
+      });
+
+      while (LastBlockEnd != BlockDataPtr)
+        *LastBlockEnd++ = 0;
+
+      // Copy initial block content.
+      LLVM_DEBUG({
+        dbgs() << "      Copying block " << *B << " content, "
+               << B->getContent().size() << " bytes, from "
+               << (const void *)B->getContent().data() << " to "
+               << (const void *)BlockDataPtr << "\n";
+      });
+      memcpy(BlockDataPtr, B->getContent().data(), B->getContent().size());
+
+      // Point the block's content to the fixed up buffer.
+      B->setContent(StringRef(BlockDataPtr, B->getContent().size()));
+
+      // Update block end pointer.
+      LastBlockEnd = BlockDataPtr + B->getContent().size();
+      BlockDataPtr = LastBlockEnd;
+    }
+
+    // Zero pad the rest of the segment.
+    LLVM_DEBUG({
+      dbgs() << "    Zero padding end of segment from "
+             << (const void *)LastBlockEnd << " to "
+             << (const void *)((char *)SegMem.data() + SegMem.size()) << "\n";
+    });
+    while (LastBlockEnd != SegMem.data() + SegMem.size())
+      *LastBlockEnd++ = 0;
+  }
+}
+
 void JITLinkerBase::deallocateAndBailOut(Error Err) {
   assert(Err && "Should not be bailing out on success value");
   assert(Alloc && "can not call deallocateAndBailOut before allocation");

diff  --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h
index d5687b7afc96..534590946493 100644
--- a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h
+++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h
@@ -100,14 +100,14 @@ class JITLinkerBase {
 
   // Copy block contents and apply relocations.
   // Implemented in JITLinker.
-  virtual Error
-  copyAndFixUpBlocks(const SegmentLayoutMap &Layout,
-                     JITLinkMemoryManager::Allocation &Alloc) const = 0;
+  virtual Error fixUpBlocks(LinkGraph &G) const = 0;
 
   SegmentLayoutMap layOutBlocks();
   Error allocateSegments(const SegmentLayoutMap &Layout);
   JITLinkContext::LookupMap getExternalSymbolNames() const;
   void applyLookupResult(AsyncLookupResult LR);
+  void copyBlockContentToWorkingMemory(const SegmentLayoutMap &Layout,
+                                       JITLinkMemoryManager::Allocation &Alloc);
   void deallocateAndBailOut(Error Err);
 
   void dumpGraph(raw_ostream &OS);
@@ -144,88 +144,25 @@ template <typename LinkerImpl> class JITLinker : public JITLinkerBase {
     return static_cast<const LinkerImpl &>(*this);
   }
 
-  Error
-  copyAndFixUpBlocks(const SegmentLayoutMap &Layout,
-                     JITLinkMemoryManager::Allocation &Alloc) const override {
-    LLVM_DEBUG(dbgs() << "Copying and fixing up blocks:\n");
-    for (auto &KV : Layout) {
-      auto &Prot = KV.first;
-      auto &SegLayout = KV.second;
-
-      auto SegMem = Alloc.getWorkingMemory(
-          static_cast<sys::Memory::ProtectionFlags>(Prot));
-      char *LastBlockEnd = SegMem.data();
-      char *BlockDataPtr = LastBlockEnd;
-
-      LLVM_DEBUG({
-        dbgs() << "  Processing segment "
-               << static_cast<sys::Memory::ProtectionFlags>(Prot) << " [ "
-               << (const void *)SegMem.data() << " .. "
-               << (const void *)((char *)SegMem.data() + SegMem.size())
-               << " ]\n    Processing content sections:\n";
-      });
-
-      for (auto *B : SegLayout.ContentBlocks) {
-        LLVM_DEBUG(dbgs() << "    " << *B << ":\n");
-
-        // Pad to alignment/alignment-offset.
-        BlockDataPtr = alignToBlock(BlockDataPtr, *B);
-
-        LLVM_DEBUG({
-          dbgs() << "      Bumped block pointer to "
-                 << (const void *)BlockDataPtr << " to meet block alignment "
-                 << B->getAlignment() << " and alignment offset "
-                 << B->getAlignmentOffset() << "\n";
-        });
-
-        // Zero pad up to alignment.
-        LLVM_DEBUG({
-          if (LastBlockEnd != BlockDataPtr)
-            dbgs() << "      Zero padding from " << (const void *)LastBlockEnd
-                   << " to " << (const void *)BlockDataPtr << "\n";
-        });
-
-        while (LastBlockEnd != BlockDataPtr)
-          *LastBlockEnd++ = 0;
-
-        // Copy initial block content.
-        LLVM_DEBUG({
-          dbgs() << "      Copying block " << *B << " content, "
-                 << B->getContent().size() << " bytes, from "
-                 << (const void *)B->getContent().data() << " to "
-                 << (const void *)BlockDataPtr << "\n";
-        });
-        memcpy(BlockDataPtr, B->getContent().data(), B->getContent().size());
-
-        // Copy Block data and apply fixups.
-        LLVM_DEBUG(dbgs() << "      Applying fixups.\n");
-        for (auto &E : B->edges()) {
-
-          // Skip non-relocation edges.
-          if (!E.isRelocation())
-            continue;
-
-          // Dispatch to LinkerImpl for fixup.
-          if (auto Err = impl().applyFixup(*B, E, BlockDataPtr))
-            return Err;
-        }
-
-        // Point the block's content to the fixed up buffer.
-        B->setContent(StringRef(BlockDataPtr, B->getContent().size()));
-
-        // Update block end pointer.
-        LastBlockEnd = BlockDataPtr + B->getContent().size();
-        BlockDataPtr = LastBlockEnd;
-      }
+  Error fixUpBlocks(LinkGraph &G) const override {
+    LLVM_DEBUG(dbgs() << "Fixing up blocks:\n");
+
+    for (auto *B : G.blocks()) {
+      LLVM_DEBUG(dbgs() << "  " << *B << ":\n");
+
+      // Copy Block data and apply fixups.
+      LLVM_DEBUG(dbgs() << "    Applying fixups.\n");
+      for (auto &E : B->edges()) {
 
-      // Zero pad the rest of the segment.
-      LLVM_DEBUG({
-        dbgs() << "    Zero padding end of segment from "
-               << (const void *)LastBlockEnd << " to "
-               << (const void *)((char *)SegMem.data() + SegMem.size()) << "\n";
-      });
-      while (LastBlockEnd != SegMem.data() + SegMem.size())
-        *LastBlockEnd++ = 0;
+        // Skip non-relocation edges.
+        if (!E.isRelocation())
+          continue;
+
+        // Dispatch to LinkerImpl for fixup.
+        auto *BlockData = const_cast<char *>(B->getContent().data());
+        if (auto Err = impl().applyFixup(*B, E, BlockData))
+          return Err;
+      }
     }
 
     return Error::success();

diff  --git a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
index 69ec72aae292..86d10025d33c 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
@@ -350,6 +350,9 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder {
 class MachO_x86_64_GOTAndStubsBuilder
     : public BasicGOTAndStubsBuilder<MachO_x86_64_GOTAndStubsBuilder> {
 public:
+  static const uint8_t NullGOTEntryContent[8];
+  static const uint8_t StubContent[6];
+
   MachO_x86_64_GOTAndStubsBuilder(LinkGraph &G)
       : BasicGOTAndStubsBuilder<MachO_x86_64_GOTAndStubsBuilder>(G) {}
 
@@ -367,7 +370,13 @@ class MachO_x86_64_GOTAndStubsBuilder
   void fixGOTEdge(Edge &E, Symbol &GOTEntry) {
     assert((E.getKind() == PCRel32GOT || E.getKind() == PCRel32GOTLoad) &&
            "Not a GOT edge?");
-    E.setKind(PCRel32);
+    // If this is a PCRel32GOT then change it to an ordinary PCRel32. If it is
+    // a PCRel32GOTLoad then leave it as-is for now. We will use the kind to
+    // check for GOT optimization opportunities in the
+    // optimizeMachO_x86_64_GOTAndStubs pass below.
+    if (E.getKind() == PCRel32GOT)
+      E.setKind(PCRel32);
+
     E.setTarget(GOTEntry);
     // Leave the edge addend as-is.
   }
@@ -388,6 +397,11 @@ class MachO_x86_64_GOTAndStubsBuilder
   void fixExternalBranchEdge(Edge &E, Symbol &Stub) {
     assert(E.getKind() == Branch32 && "Not a Branch32 edge?");
     assert(E.getAddend() == 0 && "Branch32 edge has non-zero addend?");
+
+    // Set the edge kind to Branch32ToStub. We will use this to check for stub
+    // optimization opportunities in the optimizeMachO_x86_64_GOTAndStubs pass
+    // below.
+    E.setKind(Branch32ToStub);
     E.setTarget(Stub);
   }
 
@@ -417,8 +431,6 @@ class MachO_x86_64_GOTAndStubsBuilder
                      sizeof(StubContent));
   }
 
-  static const uint8_t NullGOTEntryContent[8];
-  static const uint8_t StubContent[6];
   Section *GOTSection = nullptr;
   Section *StubsSection = nullptr;
 };
@@ -429,6 +441,89 @@ const uint8_t MachO_x86_64_GOTAndStubsBuilder::StubContent[6] = {
     0xFF, 0x25, 0x00, 0x00, 0x00, 0x00};
 } // namespace
 
+Error optimizeMachO_x86_64_GOTAndStubs(LinkGraph &G) {
+  LLVM_DEBUG(dbgs() << "Optimizing GOT entries and stubs:\n");
+
+  for (auto *B : G.blocks())
+    for (auto &E : B->edges())
+      if (E.getKind() == PCRel32GOTLoad) {
+        assert(E.getOffset() >= 3 && "GOT edge occurs too early in block");
+
+        // Switch the edge kind to PCRel32: Whether we change the edge target
+        // or not this will be the desired kind.
+        E.setKind(PCRel32);
+
+        // Optimize GOT references.
+        auto &GOTBlock = E.getTarget().getBlock();
+        assert(GOTBlock.getSize() == G.getPointerSize() &&
+               "GOT entry block should be pointer sized");
+        assert(GOTBlock.edges_size() == 1 &&
+               "GOT entry should only have one outgoing edge");
+
+        auto &GOTTarget = GOTBlock.edges().begin()->getTarget();
+        JITTargetAddress EdgeAddr = B->getAddress() + E.getOffset();
+        JITTargetAddress TargetAddr = GOTTarget.getAddress();
+
+        // Check that this is a recognized MOV instruction.
+        // FIXME: Can we assume this?
+        constexpr uint8_t MOVQRIPRel[] = {0x48, 0x8b};
+        if (strncmp(B->getContent().data() + E.getOffset() - 3,
+                    reinterpret_cast<const char *>(MOVQRIPRel), 2) != 0)
+          continue;
+
+        int64_t Displacement = TargetAddr - EdgeAddr + 4;
+        if (Displacement >= std::numeric_limits<int32_t>::min() &&
+            Displacement <= std::numeric_limits<int32_t>::max()) {
+          E.setTarget(GOTTarget);
+          auto *BlockData = reinterpret_cast<uint8_t *>(
+              const_cast<char *>(B->getContent().data()));
+          BlockData[E.getOffset() - 2] = 0x8d;
+          LLVM_DEBUG({
+            dbgs() << "  Replaced GOT load wih LEA:\n    ";
+            printEdge(dbgs(), *B, E,
+                      getMachOX86RelocationKindName(E.getKind()));
+            dbgs() << "\n";
+          });
+        }
+      } else if (E.getKind() == Branch32ToStub) {
+
+        // Switch the edge kind to PCRel32: Whether we change the edge target
+        // or not this will be the desired kind.
+        E.setKind(Branch32);
+
+        auto &StubBlock = E.getTarget().getBlock();
+        assert(StubBlock.getSize() ==
+                   sizeof(MachO_x86_64_GOTAndStubsBuilder::StubContent) &&
+               "Stub block should be stub sized");
+        assert(StubBlock.edges_size() == 1 &&
+               "Stub block should only have one outgoing edge");
+
+        auto &GOTBlock = StubBlock.edges().begin()->getTarget().getBlock();
+        assert(GOTBlock.getSize() == G.getPointerSize() &&
+               "GOT block should be pointer sized");
+        assert(GOTBlock.edges_size() == 1 &&
+               "GOT block should only have one outgoing edge");
+
+        auto &GOTTarget = GOTBlock.edges().begin()->getTarget();
+        JITTargetAddress EdgeAddr = B->getAddress() + E.getOffset();
+        JITTargetAddress TargetAddr = GOTTarget.getAddress();
+
+        int64_t Displacement = TargetAddr - EdgeAddr + 4;
+        if (Displacement >= std::numeric_limits<int32_t>::min() &&
+            Displacement <= std::numeric_limits<int32_t>::max()) {
+          E.setTarget(GOTTarget);
+          LLVM_DEBUG({
+            dbgs() << "  Replaced stub branch with direct branch:\n    ";
+            printEdge(dbgs(), *B, E,
+                      getMachOX86RelocationKindName(E.getKind()));
+            dbgs() << "\n";
+          });
+        }
+      }
+
+  return Error::success();
+}
+
 namespace llvm {
 namespace jitlink {
 
@@ -570,6 +665,9 @@ void jitLink_MachO_x86_64(std::unique_ptr<JITLinkContext> Ctx) {
       MachO_x86_64_GOTAndStubsBuilder(G).run();
       return Error::success();
     });
+
+    // Add GOT/Stubs optimizer pass.
+    Config.PostAllocationPasses.push_back(optimizeMachO_x86_64_GOTAndStubs);
   }
 
   if (auto Err = Ctx->modifyPassConfig(TT, Config))
@@ -583,6 +681,8 @@ StringRef getMachOX86RelocationKindName(Edge::Kind R) {
   switch (R) {
   case Branch32:
     return "Branch32";
+  case Branch32ToStub:
+    return "Branch32ToStub";
   case Pointer32:
     return "Pointer32";
   case Pointer64:

diff  --git a/llvm/test/ExecutionEngine/JITLink/X86/Inputs/MachO_GOTAndStubsOptimizationHelper.s b/llvm/test/ExecutionEngine/JITLink/X86/Inputs/MachO_GOTAndStubsOptimizationHelper.s
new file mode 100644
index 000000000000..9cfe9a2c5f49
--- /dev/null
+++ b/llvm/test/ExecutionEngine/JITLink/X86/Inputs/MachO_GOTAndStubsOptimizationHelper.s
@@ -0,0 +1,8 @@
+	.section	__TEXT,__text,regular,pure_instructions
+	.macosx_version_min 10, 14
+	.globl	bypass_got
+	.p2align	4, 0x90
+bypass_got:
+	movq	_x at GOTPCREL(%rip), %rax
+
+.subsections_via_symbols

diff  --git a/llvm/test/ExecutionEngine/JITLink/X86/MachO_GOTAndStubsOptimization.s b/llvm/test/ExecutionEngine/JITLink/X86/MachO_GOTAndStubsOptimization.s
new file mode 100644
index 000000000000..98df053c9e59
--- /dev/null
+++ b/llvm/test/ExecutionEngine/JITLink/X86/MachO_GOTAndStubsOptimization.s
@@ -0,0 +1,31 @@
+# RUN: rm -rf %t && mkdir -p %t
+# RUN: llvm-mc -triple=x86_64-apple-macos10.9 -filetype=obj \
+# RUN:   -o %t/helper.o %S/Inputs/MachO_GOTAndStubsOptimizationHelper.s
+# RUN: llvm-mc -triple=x86_64-apple-macos10.9 -filetype=obj \
+# RUN:   -o %t/testcase.o %s
+# RUN: llvm-jitlink -noexec -slab-allocate 64Kb -entry=bypass_stub -check %s \
+# RUN:   %t/testcase.o %t/helper.o
+#
+# Test that references to in-range GOT and stub targets can be bypassed.
+# The helper file contains a function that uses the GOT for _x, and this file
+# contains an external call to that function. By slab allocating the JIT memory
+# we can ensure that the references and targets will be in-range of one another,
+# which should cause both the GOT load and stub to be bypassed.
+
+        .section	__TEXT,__text,regular,pure_instructions
+	.macosx_version_min 10, 14
+	.globl bypass_stub
+	.p2align	4, 0x90
+
+# jitlink-check: decode_operand(bypass_got, 4) = _x - next_pc(bypass_got)
+# jitlink-check: decode_operand(bypass_stub, 0) = bypass_got - next_pc(bypass_stub)
+bypass_stub:
+	callq	bypass_got
+
+	.section	__DATA,__data
+	.globl	_x
+	.p2align	2
+_x:
+	.long	42
+
+.subsections_via_symbols

diff  --git a/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_relocations.s b/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_relocations.s
index 87ed036556e1..b128d6fd9be9 100644
--- a/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_relocations.s
+++ b/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_relocations.s
@@ -1,6 +1,8 @@
 # RUN: rm -rf %t && mkdir -p %t
 # RUN: llvm-mc -triple=x86_64-apple-macosx10.9 -filetype=obj -o %t/macho_reloc.o %s
-# RUN: llvm-jitlink -noexec -define-abs external_data=0xdeadbeef -define-abs external_func=0xcafef00d -check=%s %t/macho_reloc.o
+# RUN: llvm-jitlink -noexec -define-abs external_data=0xffffffffdeadbeef \
+# RUN:    -define-abs external_func=0xffffffffcafef00d \
+# RUN:    -define-abs lowaddr_symbol=0x1000 -check=%s %t/macho_reloc.o
 
         .section        __TEXT,__text,regular,pure_instructions
 
@@ -170,11 +172,11 @@ named_func_addr_quad:
 # Check X86_64_RELOC_UNSIGNED / long / extern handling by putting the address of
 # an external function (defined to reside in the low 4Gb) into a long symbol.
 #
-# jitlink-check: *{4}named_func_addr_long = external_func
-        .globl  named_func_addr_long
+# jitlink-check: *{4}named_lowaddr_symbol_long = lowaddr_symbol
+        .globl  named_lowaddr_symbol_long
         .p2align  2
-named_func_addr_long:
-        .long   external_func
+named_lowaddr_symbol_long:
+        .long   lowaddr_symbol
 
 # Check X86_64_RELOC_UNSIGNED / quad / non-extern handling by putting the
 # address of a local anonymous function into a quad symbol.


        


More information about the llvm-commits mailing list