[lld] f2b1264 - [lld-macho] Use intermediate arrays to store opcodes

Vincent Lee via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 15 17:11:47 PDT 2021


Author: Vincent Lee
Date: 2021-07-15T16:57:45-07:00
New Revision: f2b1264141b02efb1ba76b6eb7a7d2fff6d4c21a

URL: https://github.com/llvm/llvm-project/commit/f2b1264141b02efb1ba76b6eb7a7d2fff6d4c21a
DIFF: https://github.com/llvm/llvm-project/commit/f2b1264141b02efb1ba76b6eb7a7d2fff6d4c21a.diff

LOG: [lld-macho] Use intermediate arrays to store opcodes

We want to incorporate some of the optimization passes in bind opcodes from ld64.
This revision makes no functional changes but to start storing opcodes in intermediate
containers in preparation for implementing the optimization passes in a follow-up revision.

Differential Revision: https://reviews.llvm.org/D105866

Added: 
    

Modified: 
    lld/MachO/SyntheticSections.cpp

Removed: 
    


################################################################################
diff  --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 403da5608210..4b8d3149f9e7 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -277,6 +277,12 @@ struct Binding {
   uint64_t offset = 0;
   int64_t addend = 0;
 };
+struct BindIR {
+  // Default value of 0xF0 is not valid opcode and should make the program
+  // scream instead of accidentally writing "valid" values.
+  uint8_t opcode = 0xF0;
+  uint64_t data = 0;
+};
 } // namespace
 
 // Encode a sequence of opcodes that tell dyld to write the address of symbol +
@@ -287,32 +293,65 @@ struct Binding {
 // lastBinding.
 static void encodeBinding(const OutputSection *osec, uint64_t outSecOff,
                           int64_t addend, Binding &lastBinding,
-                          raw_svector_ostream &os) {
+                          std::vector<BindIR> &opcodes) {
   OutputSegment *seg = osec->parent;
   uint64_t offset = osec->getSegmentOffset() + outSecOff;
   if (lastBinding.segment != seg) {
-    os << static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
-                               seg->index);
-    encodeULEB128(offset, os);
+    BindIR op = {
+        static_cast<uint8_t>(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB |
+                             seg->index), // opcode
+        offset                            // data
+    };
+    opcodes.push_back(op);
     lastBinding.segment = seg;
     lastBinding.offset = offset;
   } else if (lastBinding.offset != offset) {
-    os << static_cast<uint8_t>(BIND_OPCODE_ADD_ADDR_ULEB);
-    encodeULEB128(offset - lastBinding.offset, os);
+    BindIR op = {
+        static_cast<uint8_t>(BIND_OPCODE_ADD_ADDR_ULEB), // opcode
+        offset - lastBinding.offset                      // data
+    };
+    opcodes.push_back(op);
     lastBinding.offset = offset;
   }
 
   if (lastBinding.addend != addend) {
-    os << static_cast<uint8_t>(BIND_OPCODE_SET_ADDEND_SLEB);
-    encodeSLEB128(addend, os);
+    BindIR op = {
+        static_cast<uint8_t>(BIND_OPCODE_SET_ADDEND_SLEB), // opcode
+        static_cast<uint64_t>(addend)                      // data
+    };
+    opcodes.push_back(op);
     lastBinding.addend = addend;
   }
 
-  os << static_cast<uint8_t>(BIND_OPCODE_DO_BIND);
+  BindIR op = {
+      static_cast<uint8_t>(BIND_OPCODE_DO_BIND), // opcode
+      0                                          // data
+  };
+  opcodes.push_back(op);
   // DO_BIND causes dyld to both perform the binding and increment the offset
   lastBinding.offset += target->wordSize;
 }
 
+static void flushOpcodes(const BindIR &op, raw_svector_ostream &os) {
+  uint8_t opcode = op.opcode & BIND_OPCODE_MASK;
+  switch (opcode) {
+  case BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
+  case BIND_OPCODE_ADD_ADDR_ULEB:
+    os << op.opcode;
+    encodeULEB128(op.data, os);
+    break;
+  case BIND_OPCODE_SET_ADDEND_SLEB:
+    os << op.opcode;
+    encodeSLEB128(static_cast<int64_t>(op.data), os);
+    break;
+  case BIND_OPCODE_DO_BIND:
+    os << op.opcode;
+    break;
+  default:
+    llvm_unreachable("cannot bind to an unrecognized symbol");
+  }
+}
+
 // Non-weak bindings need to have their dylib ordinal encoded as well.
 static int16_t ordinalForDylibSymbol(const DylibSymbol &dysym) {
   if (config->namespaceKind == NamespaceKind::flat || dysym.isDynamicLookup())
@@ -392,9 +431,6 @@ void BindingSection::finalizeContents() {
   for (auto &p : sortBindings(bindingsMap)) {
     const DylibSymbol *sym = p.first;
     std::vector<BindingEntry> &bindings = p.second;
-    llvm::sort(bindings, [](const BindingEntry &a, const BindingEntry &b) {
-      return a.target.getVA() < b.target.getVA();
-    });
     uint8_t flags = BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM;
     if (sym->isWeakRef())
       flags |= BIND_SYMBOL_FLAGS_WEAK_IMPORT;
@@ -405,10 +441,13 @@ void BindingSection::finalizeContents() {
       encodeDylibOrdinal(ordinal, os);
       lastOrdinal = ordinal;
     }
+    std::vector<BindIR> opcodes;
     for (const BindingEntry &b : bindings)
       encodeBinding(b.target.isec->parent,
                     b.target.isec->getOffset(b.target.offset), b.addend,
-                    lastBinding, os);
+                    lastBinding, opcodes);
+    for (const auto &op : opcodes)
+      flushOpcodes(op, os);
   }
   if (!bindingsMap.empty())
     os << static_cast<uint8_t>(BIND_OPCODE_DONE);
@@ -434,10 +473,13 @@ void WeakBindingSection::finalizeContents() {
     os << static_cast<uint8_t>(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM)
        << sym->getName() << '\0'
        << static_cast<uint8_t>(BIND_OPCODE_SET_TYPE_IMM | BIND_TYPE_POINTER);
+    std::vector<BindIR> opcodes;
     for (const BindingEntry &b : bindings)
       encodeBinding(b.target.isec->parent,
                     b.target.isec->getOffset(b.target.offset), b.addend,
-                    lastBinding, os);
+                    lastBinding, opcodes);
+    for (const auto &op : opcodes)
+      flushOpcodes(op, os);
   }
   if (!bindingsMap.empty() || !definitions.empty())
     os << static_cast<uint8_t>(BIND_OPCODE_DONE);


        


More information about the llvm-commits mailing list