[llvm] r369298 - Recommit "[llvm-objcopy][MachO] Support load commands used in executables/shared libraries"

Seiya Nuta via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 19 14:05:31 PDT 2019


Author: seiya
Date: Mon Aug 19 14:05:31 2019
New Revision: 369298

URL: http://llvm.org/viewvc/llvm-project?rev=369298&view=rev
Log:
Recommit "[llvm-objcopy][MachO] Support load commands used in executables/shared libraries"

Summary:
This patch implements copying some load commands that appear in executables/shared libraries such as the indirect symbol table.

I don't add tests intentionally because this patch is incomplete: we need a layout algorithm for executables/shared libraries. I'll submit it as a separate patch with tests.

Reviewers: alexshap, rupprecht, jhenderson, compnerd

Reviewed By: alexshap

Subscribers: abrachet, mgorny, mgrang, MaskRay, mtrent, jakehehrlich, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D63395

Added:
    llvm/trunk/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp
    llvm/trunk/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h
Modified:
    llvm/trunk/tools/llvm-objcopy/CMakeLists.txt
    llvm/trunk/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
    llvm/trunk/tools/llvm-objcopy/MachO/MachOReader.cpp
    llvm/trunk/tools/llvm-objcopy/MachO/MachOReader.h
    llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.cpp
    llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.h
    llvm/trunk/tools/llvm-objcopy/MachO/Object.h

Modified: llvm/trunk/tools/llvm-objcopy/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objcopy/CMakeLists.txt?rev=369298&r1=369297&r2=369298&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-objcopy/CMakeLists.txt (original)
+++ llvm/trunk/tools/llvm-objcopy/CMakeLists.txt Mon Aug 19 14:05:31 2019
@@ -26,6 +26,7 @@ add_llvm_tool(llvm-objcopy
   MachO/MachOObjcopy.cpp
   MachO/MachOReader.cpp
   MachO/MachOWriter.cpp
+  MachO/MachOLayoutBuilder.cpp
   MachO/Object.cpp
   DEPENDS
   ObjcopyOptsTableGen

Added: llvm/trunk/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp?rev=369298&view=auto
==============================================================================
--- llvm/trunk/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp (added)
+++ llvm/trunk/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp Mon Aug 19 14:05:31 2019
@@ -0,0 +1,322 @@
+//===- MachOLayoutBuilder.cpp -----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "MachOLayoutBuilder.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+namespace objcopy {
+namespace macho {
+
+uint32_t MachOLayoutBuilder::computeSizeOfCmds() const {
+  uint32_t Size = 0;
+  for (const auto &LC : O.LoadCommands) {
+    auto &MLC = LC.MachOLoadCommand;
+    auto cmd = MLC.load_command_data.cmd;
+    switch (cmd) {
+    case MachO::LC_SEGMENT:
+      Size += sizeof(MachO::segment_command) +
+              sizeof(MachO::section) * LC.Sections.size();
+      continue;
+    case MachO::LC_SEGMENT_64:
+      Size += sizeof(MachO::segment_command_64) +
+              sizeof(MachO::section_64) * LC.Sections.size();
+      continue;
+    }
+
+    switch (cmd) {
+#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct)                         \
+  case MachO::LCName:                                                          \
+    Size += sizeof(MachO::LCStruct) + LC.Payload.size();                       \
+    break;
+#include "llvm/BinaryFormat/MachO.def"
+#undef HANDLE_LOAD_COMMAND
+    }
+  }
+
+  return Size;
+}
+
+void MachOLayoutBuilder::constructStringTable() {
+  for (std::unique_ptr<SymbolEntry> &Sym : O.SymTable.Symbols)
+    StrTableBuilder.add(Sym->Name);
+  StrTableBuilder.finalize();
+}
+
+void MachOLayoutBuilder::updateSymbolIndexes() {
+  uint32_t Index = 0;
+  for (auto &Symbol : O.SymTable.Symbols)
+    Symbol->Index = Index++;
+}
+
+// Updates the index and the number of local/external/undefined symbols.
+void MachOLayoutBuilder::updateDySymTab(MachO::macho_load_command &MLC) {
+  assert(MLC.load_command_data.cmd == MachO::LC_DYSYMTAB);
+  // Make sure that nlist entries in the symbol table are sorted by the those
+  // types. The order is: local < defined external < undefined external.
+  assert(std::is_sorted(O.SymTable.Symbols.begin(), O.SymTable.Symbols.end(),
+                        [](const std::unique_ptr<SymbolEntry> &A,
+                           const std::unique_ptr<SymbolEntry> &B) {
+                          return (A->isLocalSymbol() && !B->isLocalSymbol()) ||
+                                 (!A->isUndefinedSymbol() &&
+                                  B->isUndefinedSymbol());
+                        }) &&
+         "Symbols are not sorted by their types.");
+
+  uint32_t NumLocalSymbols = 0;
+  auto Iter = O.SymTable.Symbols.begin();
+  auto End = O.SymTable.Symbols.end();
+  for (; Iter != End; ++Iter) {
+    if ((*Iter)->isExternalSymbol())
+      break;
+
+    ++NumLocalSymbols;
+  }
+
+  uint32_t NumExtDefSymbols = 0;
+  for (; Iter != End; ++Iter) {
+    if ((*Iter)->isUndefinedSymbol())
+      break;
+
+    ++NumExtDefSymbols;
+  }
+
+  MLC.dysymtab_command_data.ilocalsym = 0;
+  MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols;
+  MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols;
+  MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols;
+  MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols;
+  MLC.dysymtab_command_data.nundefsym =
+      O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols);
+}
+
+// Recomputes and updates offset and size fields in load commands and sections
+// since they could be modified.
+uint64_t MachOLayoutBuilder::layoutSegments() {
+  auto HeaderSize =
+      Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
+  auto Offset = HeaderSize + O.Header.SizeOfCmds;
+
+  // Lay out sections.
+  for (auto &LC : O.LoadCommands) {
+    uint64_t FileOff = Offset;
+    auto &MLC = LC.MachOLoadCommand;
+    StringRef Segname;
+    switch (MLC.load_command_data.cmd) {
+    case MachO::LC_SEGMENT:
+      Segname = StringRef(MLC.segment_command_data.segname,
+                          strnlen(MLC.segment_command_data.segname,
+                                  sizeof(MLC.segment_command_data.segname)));
+      break;
+    case MachO::LC_SEGMENT_64:
+      Segname = StringRef(MLC.segment_command_64_data.segname,
+                          strnlen(MLC.segment_command_64_data.segname,
+                                  sizeof(MLC.segment_command_64_data.segname)));
+      break;
+    default:
+      continue;
+    }
+
+    if (Segname == "__LINKEDIT") {
+      // We update the __LINKEDIT segment later (in layoutTail).
+      assert(LC.Sections.empty() && "__LINKEDIT segment has sections");
+      LinkEditLoadCommand = &MLC;
+      continue;
+    }
+
+    // Update file offsets and sizes of sections.
+    uint64_t VMSize = 0;
+    uint64_t FileOffsetInSegment = 0;
+    for (auto &Sec : LC.Sections) {
+      if (!Sec.isVirtualSection()) {
+        auto FilePaddingSize =
+            OffsetToAlignment(FileOffsetInSegment, 1ull << Sec.Align);
+        Sec.Offset = Offset + FileOffsetInSegment + FilePaddingSize;
+        Sec.Size = Sec.Content.size();
+        FileOffsetInSegment += FilePaddingSize + Sec.Size;
+      }
+
+      VMSize = std::max(VMSize, Sec.Addr + Sec.Size);
+    }
+
+    // TODO: Handle the __PAGEZERO segment.
+    switch (MLC.load_command_data.cmd) {
+    case MachO::LC_SEGMENT:
+      MLC.segment_command_data.cmdsize =
+          sizeof(MachO::segment_command) +
+          sizeof(MachO::section) * LC.Sections.size();
+      MLC.segment_command_data.nsects = LC.Sections.size();
+      MLC.segment_command_data.fileoff = FileOff;
+      MLC.segment_command_data.vmsize = VMSize;
+      MLC.segment_command_data.filesize = FileOffsetInSegment;
+      break;
+    case MachO::LC_SEGMENT_64:
+      MLC.segment_command_64_data.cmdsize =
+          sizeof(MachO::segment_command_64) +
+          sizeof(MachO::section_64) * LC.Sections.size();
+      MLC.segment_command_64_data.nsects = LC.Sections.size();
+      MLC.segment_command_64_data.fileoff = FileOff;
+      MLC.segment_command_64_data.vmsize = VMSize;
+      MLC.segment_command_64_data.filesize = FileOffsetInSegment;
+      break;
+    }
+
+    Offset += FileOffsetInSegment;
+  }
+
+  return Offset;
+}
+
+uint64_t MachOLayoutBuilder::layoutRelocations(uint64_t Offset) {
+  for (auto &LC : O.LoadCommands)
+    for (auto &Sec : LC.Sections) {
+      Sec.RelOff = Sec.Relocations.empty() ? 0 : Offset;
+      Sec.NReloc = Sec.Relocations.size();
+      Offset += sizeof(MachO::any_relocation_info) * Sec.NReloc;
+    }
+
+  return Offset;
+}
+
+Error MachOLayoutBuilder::layoutTail(uint64_t Offset) {
+  // The order of LINKEDIT elements is as follows:
+  // rebase info, binding info, weak binding info, lazy binding info, export
+  // trie, data-in-code, symbol table, indirect symbol table, symbol table
+  // strings.
+  uint64_t NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
+  uint64_t StartOfLinkEdit = Offset;
+  uint64_t StartOfRebaseInfo = StartOfLinkEdit;
+  uint64_t StartOfBindingInfo = StartOfRebaseInfo + O.Rebases.Opcodes.size();
+  uint64_t StartOfWeakBindingInfo = StartOfBindingInfo + O.Binds.Opcodes.size();
+  uint64_t StartOfLazyBindingInfo =
+      StartOfWeakBindingInfo + O.WeakBinds.Opcodes.size();
+  uint64_t StartOfExportTrie =
+      StartOfLazyBindingInfo + O.LazyBinds.Opcodes.size();
+  uint64_t StartOfFunctionStarts = StartOfExportTrie + O.Exports.Trie.size();
+  uint64_t StartOfDataInCode =
+      StartOfFunctionStarts + O.FunctionStarts.Data.size();
+  uint64_t StartOfSymbols = StartOfDataInCode + O.DataInCode.Data.size();
+  uint64_t StartOfIndirectSymbols =
+      StartOfSymbols + NListSize * O.SymTable.Symbols.size();
+  uint64_t StartOfSymbolStrings =
+      StartOfIndirectSymbols +
+      sizeof(uint32_t) * O.IndirectSymTable.Symbols.size();
+  uint64_t LinkEditSize =
+      (StartOfSymbolStrings + StrTableBuilder.getSize()) - StartOfLinkEdit;
+
+  // Now we have determined the layout of the contents of the __LINKEDIT
+  // segment. Update its load command.
+  if (LinkEditLoadCommand) {
+    MachO::macho_load_command *MLC = LinkEditLoadCommand;
+    switch (LinkEditLoadCommand->load_command_data.cmd) {
+    case MachO::LC_SEGMENT:
+      MLC->segment_command_data.cmdsize = sizeof(MachO::segment_command);
+      MLC->segment_command_data.fileoff = StartOfLinkEdit;
+      MLC->segment_command_data.vmsize = alignTo(LinkEditSize, PageSize);
+      MLC->segment_command_data.filesize = LinkEditSize;
+      break;
+    case MachO::LC_SEGMENT_64:
+      MLC->segment_command_64_data.cmdsize = sizeof(MachO::segment_command_64);
+      MLC->segment_command_64_data.fileoff = StartOfLinkEdit;
+      MLC->segment_command_64_data.vmsize = alignTo(LinkEditSize, PageSize);
+      MLC->segment_command_64_data.filesize = LinkEditSize;
+      break;
+    }
+  }
+
+  for (auto &LC : O.LoadCommands) {
+    auto &MLC = LC.MachOLoadCommand;
+    auto cmd = MLC.load_command_data.cmd;
+    switch (cmd) {
+    case MachO::LC_SYMTAB:
+      MLC.symtab_command_data.symoff = StartOfSymbols;
+      MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size();
+      MLC.symtab_command_data.stroff = StartOfSymbolStrings;
+      MLC.symtab_command_data.strsize = StrTableBuilder.getSize();
+      break;
+    case MachO::LC_DYSYMTAB: {
+      if (MLC.dysymtab_command_data.ntoc != 0 ||
+          MLC.dysymtab_command_data.nmodtab != 0 ||
+          MLC.dysymtab_command_data.nextrefsyms != 0 ||
+          MLC.dysymtab_command_data.nlocrel != 0 ||
+          MLC.dysymtab_command_data.nextrel != 0)
+        return createStringError(llvm::errc::not_supported,
+                                 "shared library is not yet supported");
+
+      if (!O.IndirectSymTable.Symbols.empty()) {
+        MLC.dysymtab_command_data.indirectsymoff = StartOfIndirectSymbols;
+        MLC.dysymtab_command_data.nindirectsyms =
+            O.IndirectSymTable.Symbols.size();
+      }
+
+      updateDySymTab(MLC);
+      break;
+    }
+    case MachO::LC_DATA_IN_CODE:
+      MLC.linkedit_data_command_data.dataoff = StartOfDataInCode;
+      MLC.linkedit_data_command_data.datasize = O.DataInCode.Data.size();
+      break;
+    case MachO::LC_FUNCTION_STARTS:
+      MLC.linkedit_data_command_data.dataoff = StartOfFunctionStarts;
+      MLC.linkedit_data_command_data.datasize = O.FunctionStarts.Data.size();
+      break;
+    case MachO::LC_DYLD_INFO:
+    case MachO::LC_DYLD_INFO_ONLY:
+      MLC.dyld_info_command_data.rebase_off =
+          O.Rebases.Opcodes.empty() ? 0 : StartOfRebaseInfo;
+      MLC.dyld_info_command_data.rebase_size = O.Rebases.Opcodes.size();
+      MLC.dyld_info_command_data.bind_off =
+          O.Binds.Opcodes.empty() ? 0 : StartOfBindingInfo;
+      MLC.dyld_info_command_data.bind_size = O.Binds.Opcodes.size();
+      MLC.dyld_info_command_data.weak_bind_off =
+          O.WeakBinds.Opcodes.empty() ? 0 : StartOfWeakBindingInfo;
+      MLC.dyld_info_command_data.weak_bind_size = O.WeakBinds.Opcodes.size();
+      MLC.dyld_info_command_data.lazy_bind_off =
+          O.LazyBinds.Opcodes.empty() ? 0 : StartOfLazyBindingInfo;
+      MLC.dyld_info_command_data.lazy_bind_size = O.LazyBinds.Opcodes.size();
+      MLC.dyld_info_command_data.export_off =
+          O.Exports.Trie.empty() ? 0 : StartOfExportTrie;
+      MLC.dyld_info_command_data.export_size = O.Exports.Trie.size();
+      break;
+    case MachO::LC_LOAD_DYLINKER:
+    case MachO::LC_MAIN:
+    case MachO::LC_RPATH:
+    case MachO::LC_SEGMENT:
+    case MachO::LC_SEGMENT_64:
+    case MachO::LC_VERSION_MIN_MACOSX:
+    case MachO::LC_BUILD_VERSION:
+    case MachO::LC_ID_DYLIB:
+    case MachO::LC_LOAD_DYLIB:
+    case MachO::LC_UUID:
+    case MachO::LC_SOURCE_VERSION:
+      // Nothing to update.
+      break;
+    default:
+      // Abort if it's unsupported in order to prevent corrupting the object.
+      return createStringError(llvm::errc::not_supported,
+                               "unsupported load command (cmd=0x%x)", cmd);
+    }
+  }
+
+  return Error::success();
+}
+
+Error MachOLayoutBuilder::layout() {
+  O.Header.NCmds = O.LoadCommands.size();
+  O.Header.SizeOfCmds = computeSizeOfCmds();
+  constructStringTable();
+  updateSymbolIndexes();
+  uint64_t Offset = layoutSegments();
+  Offset = layoutRelocations(Offset);
+  return layoutTail(Offset);
+}
+
+} // end namespace macho
+} // end namespace objcopy
+} // end namespace llvm

Added: llvm/trunk/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h?rev=369298&view=auto
==============================================================================
--- llvm/trunk/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h (added)
+++ llvm/trunk/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h Mon Aug 19 14:05:31 2019
@@ -0,0 +1,50 @@
+//===- MachOLayoutBuilder.h -------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H
+#define LLVM_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H
+
+#include "MachOObjcopy.h"
+#include "Object.h"
+
+namespace llvm {
+namespace objcopy {
+namespace macho {
+
+class MachOLayoutBuilder {
+  Object &O;
+  bool Is64Bit;
+  uint64_t PageSize;
+
+  // Points to the __LINKEDIT segment if it exists.
+  MachO::macho_load_command *LinkEditLoadCommand = nullptr;
+  StringTableBuilder StrTableBuilder{StringTableBuilder::MachO};
+
+  uint32_t computeSizeOfCmds() const;
+  void constructStringTable();
+  void updateSymbolIndexes();
+  void updateDySymTab(MachO::macho_load_command &MLC);
+  uint64_t layoutSegments();
+  uint64_t layoutRelocations(uint64_t Offset);
+  Error layoutTail(uint64_t Offset);
+
+public:
+  MachOLayoutBuilder(Object &O, bool Is64Bit, uint64_t PageSize)
+      : O(O), Is64Bit(Is64Bit), PageSize(PageSize) {}
+
+  // Recomputes and updates fields in the given object such as file offsets.
+  Error layout();
+
+  StringTableBuilder &getStringTableBuilder() { return StrTableBuilder; }
+};
+
+} // end namespace macho
+} // end namespace objcopy
+} // end namespace llvm
+
+#endif // LLVM_OBJCOPY_MACHO_MACHOLAYOUTBUILDER_H

Modified: llvm/trunk/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objcopy/MachO/MachOObjcopy.cpp?rev=369298&r1=369297&r2=369298&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-objcopy/MachO/MachOObjcopy.cpp (original)
+++ llvm/trunk/tools/llvm-objcopy/MachO/MachOObjcopy.cpp Mon Aug 19 14:05:31 2019
@@ -57,7 +57,11 @@ Error executeObjcopyOnBinary(const CopyC
   if (Error E = handleArgs(Config, *O))
     return createFileError(Config.InputFilename, std::move(E));
 
-  MachOWriter Writer(*O, In.is64Bit(), In.isLittleEndian(), Out);
+  // TODO: Support 16KB pages which are employed in iOS arm64 binaries:
+  //       https://github.com/llvm/llvm-project/commit/1bebb2832ee312d3b0316dacff457a7a29435edb
+  const uint64_t PageSize = 4096;
+
+  MachOWriter Writer(*O, In.is64Bit(), In.isLittleEndian(), PageSize, Out);
   if (auto E = Writer.finalize())
     return E;
   return Writer.write();

Modified: llvm/trunk/tools/llvm-objcopy/MachO/MachOReader.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objcopy/MachO/MachOReader.cpp?rev=369298&r1=369297&r2=369298&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-objcopy/MachO/MachOReader.cpp (original)
+++ llvm/trunk/tools/llvm-objcopy/MachO/MachOReader.cpp Mon Aug 19 14:05:31 2019
@@ -129,10 +129,19 @@ void MachOReader::readLoadCommands(Objec
     case MachO::LC_SYMTAB:
       O.SymTabCommandIndex = O.LoadCommands.size();
       break;
+    case MachO::LC_DYSYMTAB:
+      O.DySymTabCommandIndex = O.LoadCommands.size();
+      break;
     case MachO::LC_DYLD_INFO:
     case MachO::LC_DYLD_INFO_ONLY:
       O.DyLdInfoCommandIndex = O.LoadCommands.size();
       break;
+    case MachO::LC_DATA_IN_CODE:
+      O.DataInCodeCommandIndex = O.LoadCommands.size();
+      break;
+    case MachO::LC_FUNCTION_STARTS:
+      O.FunctionStartsCommandIndex = O.LoadCommands.size();
+      break;
     }
 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct)                         \
   case MachO::LCName:                                                          \
@@ -222,6 +231,35 @@ void MachOReader::readExportInfo(Object
   O.Exports.Trie = MachOObj.getDyldInfoExportsTrie();
 }
 
+void MachOReader::readDataInCodeData(Object &O) const {
+  if (!O.DataInCodeCommandIndex)
+    return;
+  const MachO::linkedit_data_command &LDC =
+      O.LoadCommands[*O.DataInCodeCommandIndex]
+          .MachOLoadCommand.linkedit_data_command_data;
+
+  O.DataInCode.Data = arrayRefFromStringRef(
+      MachOObj.getData().substr(LDC.dataoff, LDC.datasize));
+}
+
+void MachOReader::readFunctionStartsData(Object &O) const {
+  if (!O.FunctionStartsCommandIndex)
+    return;
+  const MachO::linkedit_data_command &LDC =
+      O.LoadCommands[*O.FunctionStartsCommandIndex]
+          .MachOLoadCommand.linkedit_data_command_data;
+
+  O.FunctionStarts.Data = arrayRefFromStringRef(
+      MachOObj.getData().substr(LDC.dataoff, LDC.datasize));
+}
+
+void MachOReader::readIndirectSymbolTable(Object &O) const {
+  MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand();
+  for (uint32_t i = 0; i < DySymTab.nindirectsyms; ++i)
+    O.IndirectSymTable.Symbols.push_back(
+        MachOObj.getIndirectSymbolTableEntry(DySymTab, i));
+}
+
 std::unique_ptr<Object> MachOReader::create() const {
   auto Obj = std::make_unique<Object>();
   readHeader(*Obj);
@@ -233,6 +271,9 @@ std::unique_ptr<Object> MachOReader::cre
   readWeakBindInfo(*Obj);
   readLazyBindInfo(*Obj);
   readExportInfo(*Obj);
+  readDataInCodeData(*Obj);
+  readFunctionStartsData(*Obj);
+  readIndirectSymbolTable(*Obj);
   return Obj;
 }
 

Modified: llvm/trunk/tools/llvm-objcopy/MachO/MachOReader.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objcopy/MachO/MachOReader.h?rev=369298&r1=369297&r2=369298&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-objcopy/MachO/MachOReader.h (original)
+++ llvm/trunk/tools/llvm-objcopy/MachO/MachOReader.h Mon Aug 19 14:05:31 2019
@@ -36,6 +36,9 @@ class MachOReader : public Reader {
   void readWeakBindInfo(Object &O) const;
   void readLazyBindInfo(Object &O) const;
   void readExportInfo(Object &O) const;
+  void readDataInCodeData(Object &O) const;
+  void readFunctionStartsData(Object &O) const;
+  void readIndirectSymbolTable(Object &O) const;
 
 public:
   explicit MachOReader(const object::MachOObjectFile &Obj) : MachOObj(Obj) {}

Modified: llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.cpp?rev=369298&r1=369297&r2=369298&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.cpp (original)
+++ llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.cpp Mon Aug 19 14:05:31 2019
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "MachOWriter.h"
+#include "MachOLayoutBuilder.h"
 #include "Object.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/BinaryFormat/MachO.h"
@@ -40,16 +41,10 @@ size_t MachOWriter::totalSize() const {
     const MachO::symtab_command &SymTabCommand =
         O.LoadCommands[*O.SymTabCommandIndex]
             .MachOLoadCommand.symtab_command_data;
-    if (SymTabCommand.symoff) {
-      assert((SymTabCommand.nsyms == O.SymTable.Symbols.size()) &&
-             "Incorrect number of symbols");
+    if (SymTabCommand.symoff)
       Ends.push_back(SymTabCommand.symoff + symTableSize());
-    }
-    if (SymTabCommand.stroff) {
-      assert((SymTabCommand.strsize == StrTableBuilder.getSize()) &&
-             "Incorrect string table size");
+    if (SymTabCommand.stroff)
       Ends.push_back(SymTabCommand.stroff + SymTabCommand.strsize);
-    }
   }
   if (O.DyLdInfoCommandIndex) {
     const MachO::dyld_info_command &DyLdInfoCommand =
@@ -84,6 +79,36 @@ size_t MachOWriter::totalSize() const {
     }
   }
 
+  if (O.DySymTabCommandIndex) {
+    const MachO::dysymtab_command &DySymTabCommand =
+        O.LoadCommands[*O.DySymTabCommandIndex]
+            .MachOLoadCommand.dysymtab_command_data;
+
+    if (DySymTabCommand.indirectsymoff)
+      Ends.push_back(DySymTabCommand.indirectsymoff +
+                     sizeof(uint32_t) * O.IndirectSymTable.Symbols.size());
+  }
+
+  if (O.DataInCodeCommandIndex) {
+    const MachO::linkedit_data_command &LinkEditDataCommand =
+        O.LoadCommands[*O.DataInCodeCommandIndex]
+            .MachOLoadCommand.linkedit_data_command_data;
+
+    if (LinkEditDataCommand.dataoff)
+      Ends.push_back(LinkEditDataCommand.dataoff +
+                     LinkEditDataCommand.datasize);
+  }
+
+  if (O.FunctionStartsCommandIndex) {
+    const MachO::linkedit_data_command &LinkEditDataCommand =
+        O.LoadCommands[*O.FunctionStartsCommandIndex]
+            .MachOLoadCommand.linkedit_data_command_data;
+
+    if (LinkEditDataCommand.dataoff)
+      Ends.push_back(LinkEditDataCommand.dataoff +
+                     LinkEditDataCommand.datasize);
+  }
+
   // Otherwise, use the last section / reloction.
   for (const auto &LC : O.LoadCommands)
     for (const auto &S : LC.Sections) {
@@ -120,14 +145,6 @@ void MachOWriter::writeHeader() {
   memcpy(B.getBufferStart(), &Header, HeaderSize);
 }
 
-void MachOWriter::updateSymbolIndexes() {
-  uint32_t Index = 0;
-  for (auto &Symbol : O.SymTable.Symbols) {
-    Symbol->Index = Index;
-    Index++;
-  }
-}
-
 void MachOWriter::writeLoadCommands() {
   uint8_t *Begin = B.getBufferStart() + headerSize();
   for (const auto &LC : O.LoadCommands) {
@@ -261,7 +278,7 @@ void MachOWriter::writeSymbolTable() {
           .MachOLoadCommand.symtab_command_data;
 
   uint8_t *StrTable = (uint8_t *)B.getBufferStart() + SymTabCommand.stroff;
-  StrTableBuilder.write(StrTable);
+  LayoutBuilder.getStringTableBuilder().write(StrTable);
 }
 
 void MachOWriter::writeStringTable() {
@@ -275,7 +292,7 @@ void MachOWriter::writeStringTable() {
   for (auto Iter = O.SymTable.Symbols.begin(), End = O.SymTable.Symbols.end();
        Iter != End; Iter++) {
     SymbolEntry *Sym = Iter->get();
-    auto Nstrx = StrTableBuilder.getOffset(Sym->Name);
+    uint32_t Nstrx = LayoutBuilder.getStringTableBuilder().getOffset(Sym->Name);
 
     if (Is64Bit)
       writeNListEntry<MachO::nlist_64>(*Sym, IsLittleEndian, SymTable, Nstrx);
@@ -344,6 +361,45 @@ void MachOWriter::writeExportInfo() {
   memcpy(Out, O.Exports.Trie.data(), O.Exports.Trie.size());
 }
 
+void MachOWriter::writeIndirectSymbolTable() {
+  if (!O.DySymTabCommandIndex)
+    return;
+
+  const MachO::dysymtab_command &DySymTabCommand =
+      O.LoadCommands[*O.DySymTabCommandIndex]
+          .MachOLoadCommand.dysymtab_command_data;
+
+  char *Out = (char *)B.getBufferStart() + DySymTabCommand.indirectsymoff;
+  assert((DySymTabCommand.nindirectsyms == O.IndirectSymTable.Symbols.size()) &&
+         "Incorrect indirect symbol table size");
+  memcpy(Out, O.IndirectSymTable.Symbols.data(),
+         sizeof(uint32_t) * O.IndirectSymTable.Symbols.size());
+}
+
+void MachOWriter::writeDataInCodeData() {
+  if (!O.DataInCodeCommandIndex)
+    return;
+  const MachO::linkedit_data_command &LinkEditDataCommand =
+      O.LoadCommands[*O.DataInCodeCommandIndex]
+          .MachOLoadCommand.linkedit_data_command_data;
+  char *Out = (char *)B.getBufferStart() + LinkEditDataCommand.dataoff;
+  assert((LinkEditDataCommand.datasize == O.DataInCode.Data.size()) &&
+         "Incorrect data in code data size");
+  memcpy(Out, O.DataInCode.Data.data(), O.DataInCode.Data.size());
+}
+
+void MachOWriter::writeFunctionStartsData() {
+  if (!O.FunctionStartsCommandIndex)
+    return;
+  const MachO::linkedit_data_command &LinkEditDataCommand =
+      O.LoadCommands[*O.FunctionStartsCommandIndex]
+          .MachOLoadCommand.linkedit_data_command_data;
+  char *Out = (char *)B.getBufferStart() + LinkEditDataCommand.dataoff;
+  assert((LinkEditDataCommand.datasize == O.FunctionStarts.Data.size()) &&
+         "Incorrect function starts data size");
+  memcpy(Out, O.FunctionStarts.Data.data(), O.FunctionStarts.Data.size());
+}
+
 void MachOWriter::writeTail() {
   typedef void (MachOWriter::*WriteHandlerType)(void);
   typedef std::pair<uint64_t, WriteHandlerType> WriteOperation;
@@ -379,6 +435,36 @@ void MachOWriter::writeTail() {
           {DyLdInfoCommand.export_off, &MachOWriter::writeExportInfo});
   }
 
+  if (O.DySymTabCommandIndex) {
+    const MachO::dysymtab_command &DySymTabCommand =
+        O.LoadCommands[*O.DySymTabCommandIndex]
+            .MachOLoadCommand.dysymtab_command_data;
+
+    if (DySymTabCommand.indirectsymoff)
+      Queue.emplace_back(DySymTabCommand.indirectsymoff,
+                         &MachOWriter::writeIndirectSymbolTable);
+  }
+
+  if (O.DataInCodeCommandIndex) {
+    const MachO::linkedit_data_command &LinkEditDataCommand =
+        O.LoadCommands[*O.DataInCodeCommandIndex]
+            .MachOLoadCommand.linkedit_data_command_data;
+
+    if (LinkEditDataCommand.dataoff)
+      Queue.emplace_back(LinkEditDataCommand.dataoff,
+                         &MachOWriter::writeDataInCodeData);
+  }
+
+  if (O.FunctionStartsCommandIndex) {
+    const MachO::linkedit_data_command &LinkEditDataCommand =
+        O.LoadCommands[*O.FunctionStartsCommandIndex]
+            .MachOLoadCommand.linkedit_data_command_data;
+
+    if (LinkEditDataCommand.dataoff)
+      Queue.emplace_back(LinkEditDataCommand.dataoff,
+                         &MachOWriter::writeFunctionStartsData);
+  }
+
   llvm::sort(Queue, [](const WriteOperation &LHS, const WriteOperation &RHS) {
     return LHS.first < RHS.first;
   });
@@ -387,198 +473,13 @@ void MachOWriter::writeTail() {
     (this->*WriteOp.second)();
 }
 
-void MachOWriter::updateSizeOfCmds() {
-  auto Size = 0;
-  for (const auto &LC : O.LoadCommands) {
-    auto &MLC = LC.MachOLoadCommand;
-    auto cmd = MLC.load_command_data.cmd;
-
-    switch (cmd) {
-    case MachO::LC_SEGMENT:
-      Size += sizeof(MachO::segment_command) +
-              sizeof(MachO::section) * LC.Sections.size();
-      continue;
-    case MachO::LC_SEGMENT_64:
-      Size += sizeof(MachO::segment_command_64) +
-              sizeof(MachO::section_64) * LC.Sections.size();
-      continue;
-    }
-
-    switch (cmd) {
-#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct)                         \
-  case MachO::LCName:                                                          \
-    Size += sizeof(MachO::LCStruct);                                           \
-    break;
-#include "llvm/BinaryFormat/MachO.def"
-#undef HANDLE_LOAD_COMMAND
-    }
-  }
-
-  O.Header.SizeOfCmds = Size;
-}
-
-// Updates the index and the number of local/external/undefined symbols. Here we
-// assume that MLC is a LC_DYSYMTAB and the nlist entries in the symbol table
-// are already sorted by the those types.
-void MachOWriter::updateDySymTab(MachO::macho_load_command &MLC) {
-  uint32_t NumLocalSymbols = 0;
-  auto Iter = O.SymTable.Symbols.begin();
-  auto End = O.SymTable.Symbols.end();
-  for (; Iter != End; Iter++) {
-    if ((*Iter)->n_type & (MachO::N_EXT | MachO::N_PEXT))
-      break;
-
-    NumLocalSymbols++;
-  }
-
-  uint32_t NumExtDefSymbols = 0;
-  for (; Iter != End; Iter++) {
-    if (((*Iter)->n_type & MachO::N_TYPE) == MachO::N_UNDF)
-      break;
-
-    NumExtDefSymbols++;
-  }
-
-  MLC.dysymtab_command_data.ilocalsym = 0;
-  MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols;
-  MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols;
-  MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols;
-  MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols;
-  MLC.dysymtab_command_data.nundefsym =
-      O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols);
-}
-
-// Recomputes and updates offset and size fields in load commands and sections
-// since they could be modified.
-Error MachOWriter::layout() {
-  auto SizeOfCmds = loadCommandsSize();
-  auto Offset = headerSize() + SizeOfCmds;
-  O.Header.NCmds = O.LoadCommands.size();
-  O.Header.SizeOfCmds = SizeOfCmds;
-
-  // Lay out sections.
-  for (auto &LC : O.LoadCommands) {
-    uint64_t FileOff = Offset;
-    uint64_t VMSize = 0;
-    uint64_t FileOffsetInSegment = 0;
-    for (auto &Sec : LC.Sections) {
-      if (!Sec.isVirtualSection()) {
-        auto FilePaddingSize =
-            OffsetToAlignment(FileOffsetInSegment, 1ull << Sec.Align);
-        Sec.Offset = Offset + FileOffsetInSegment + FilePaddingSize;
-        Sec.Size = Sec.Content.size();
-        FileOffsetInSegment += FilePaddingSize + Sec.Size;
-      }
-
-      VMSize = std::max(VMSize, Sec.Addr + Sec.Size);
-    }
-
-    // TODO: Handle the __PAGEZERO segment.
-    auto &MLC = LC.MachOLoadCommand;
-    switch (MLC.load_command_data.cmd) {
-    case MachO::LC_SEGMENT:
-      MLC.segment_command_data.cmdsize =
-          sizeof(MachO::segment_command) +
-          sizeof(MachO::section) * LC.Sections.size();
-      MLC.segment_command_data.nsects = LC.Sections.size();
-      MLC.segment_command_data.fileoff = FileOff;
-      MLC.segment_command_data.vmsize = VMSize;
-      MLC.segment_command_data.filesize = FileOffsetInSegment;
-      break;
-    case MachO::LC_SEGMENT_64:
-      MLC.segment_command_64_data.cmdsize =
-          sizeof(MachO::segment_command_64) +
-          sizeof(MachO::section_64) * LC.Sections.size();
-      MLC.segment_command_64_data.nsects = LC.Sections.size();
-      MLC.segment_command_64_data.fileoff = FileOff;
-      MLC.segment_command_64_data.vmsize = VMSize;
-      MLC.segment_command_64_data.filesize = FileOffsetInSegment;
-      break;
-    }
-
-    Offset += FileOffsetInSegment;
-  }
-
-  // Lay out relocations.
-  for (auto &LC : O.LoadCommands)
-    for (auto &Sec : LC.Sections) {
-      Sec.RelOff = Sec.Relocations.empty() ? 0 : Offset;
-      Sec.NReloc = Sec.Relocations.size();
-      Offset += sizeof(MachO::any_relocation_info) * Sec.NReloc;
-    }
-
-  // Lay out tail stuff.
-  auto NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
-  for (auto &LC : O.LoadCommands) {
-    auto &MLC = LC.MachOLoadCommand;
-    auto cmd = MLC.load_command_data.cmd;
-    switch (cmd) {
-    case MachO::LC_SYMTAB:
-      MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size();
-      MLC.symtab_command_data.strsize = StrTableBuilder.getSize();
-      MLC.symtab_command_data.symoff = Offset;
-      Offset += NListSize * MLC.symtab_command_data.nsyms;
-      MLC.symtab_command_data.stroff = Offset;
-      Offset += MLC.symtab_command_data.strsize;
-      break;
-    case MachO::LC_DYSYMTAB: {
-      if (MLC.dysymtab_command_data.ntoc != 0 ||
-          MLC.dysymtab_command_data.nmodtab != 0 ||
-          MLC.dysymtab_command_data.nextrefsyms != 0 ||
-          MLC.dysymtab_command_data.nlocrel != 0 ||
-          MLC.dysymtab_command_data.nextrel != 0)
-        return createStringError(llvm::errc::not_supported,
-                                 "shared library is not yet supported");
-
-      if (MLC.dysymtab_command_data.nindirectsyms != 0)
-        return createStringError(llvm::errc::not_supported,
-                                 "indirect symbol table is not yet supported");
-
-      updateDySymTab(MLC);
-      break;
-    }
-    case MachO::LC_SEGMENT:
-    case MachO::LC_SEGMENT_64:
-    case MachO::LC_VERSION_MIN_MACOSX:
-    case MachO::LC_BUILD_VERSION:
-    case MachO::LC_ID_DYLIB:
-    case MachO::LC_LOAD_DYLIB:
-    case MachO::LC_UUID:
-    case MachO::LC_SOURCE_VERSION:
-      // Nothing to update.
-      break;
-    default:
-      // Abort if it's unsupported in order to prevent corrupting the object.
-      return createStringError(llvm::errc::not_supported,
-                               "unsupported load command (cmd=0x%x)", cmd);
-    }
-  }
-
-  return Error::success();
-}
-
-void MachOWriter::constructStringTable() {
-  for (std::unique_ptr<SymbolEntry> &Sym : O.SymTable.Symbols)
-    StrTableBuilder.add(Sym->Name);
-  StrTableBuilder.finalize();
-}
-
-Error MachOWriter::finalize() {
-  updateSizeOfCmds();
-  constructStringTable();
-
-  if (auto E = layout())
-    return E;
-
-  return Error::success();
-}
+Error MachOWriter::finalize() { return LayoutBuilder.layout(); }
 
 Error MachOWriter::write() {
   if (Error E = B.allocate(totalSize()))
     return E;
   memset(B.getBufferStart(), 0, totalSize());
   writeHeader();
-  updateSymbolIndexes();
   writeLoadCommands();
   writeSections();
   writeTail();

Modified: llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.h?rev=369298&r1=369297&r2=369298&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.h (original)
+++ llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.h Mon Aug 19 14:05:31 2019
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "../Buffer.h"
+#include "MachOLayoutBuilder.h"
 #include "MachOObjcopy.h"
 #include "Object.h"
 #include "llvm/BinaryFormat/MachO.h"
@@ -22,20 +23,15 @@ class MachOWriter {
   Object &O;
   bool Is64Bit;
   bool IsLittleEndian;
+  uint64_t PageSize;
   Buffer &B;
-  StringTableBuilder StrTableBuilder{StringTableBuilder::MachO};
+  MachOLayoutBuilder LayoutBuilder;
 
   size_t headerSize() const;
   size_t loadCommandsSize() const;
   size_t symTableSize() const;
   size_t strTableSize() const;
 
-  void updateDySymTab(MachO::macho_load_command &MLC);
-  void updateSizeOfCmds();
-  void updateSymbolIndexes();
-  void constructStringTable();
-  Error layout();
-
   void writeHeader();
   void writeLoadCommands();
   template <typename StructType>
@@ -48,11 +44,16 @@ class MachOWriter {
   void writeWeakBindInfo();
   void writeLazyBindInfo();
   void writeExportInfo();
+  void writeIndirectSymbolTable();
+  void writeDataInCodeData();
+  void writeFunctionStartsData();
   void writeTail();
 
 public:
-  MachOWriter(Object &O, bool Is64Bit, bool IsLittleEndian, Buffer &B)
-      : O(O), Is64Bit(Is64Bit), IsLittleEndian(IsLittleEndian), B(B) {}
+  MachOWriter(Object &O, bool Is64Bit, bool IsLittleEndian, uint64_t PageSize,
+              Buffer &B)
+      : O(O), Is64Bit(Is64Bit), IsLittleEndian(IsLittleEndian),
+        PageSize(PageSize), B(B), LayoutBuilder(O, Is64Bit, PageSize) {}
 
   size_t totalSize() const;
   Error finalize();

Modified: llvm/trunk/tools/llvm-objcopy/MachO/Object.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objcopy/MachO/Object.h?rev=369298&r1=369297&r2=369298&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-objcopy/MachO/Object.h (original)
+++ llvm/trunk/tools/llvm-objcopy/MachO/Object.h Mon Aug 19 14:05:31 2019
@@ -90,6 +90,16 @@ struct SymbolEntry {
   uint8_t n_sect;
   uint16_t n_desc;
   uint64_t n_value;
+
+  bool isExternalSymbol() const {
+    return n_type & ((MachO::N_EXT | MachO::N_PEXT));
+  }
+
+  bool isLocalSymbol() const { return !isExternalSymbol(); }
+
+  bool isUndefinedSymbol() const {
+    return (n_type & MachO::N_TYPE) == MachO::N_UNDF;
+  }
 };
 
 /// The location of the symbol table inside the binary is described by LC_SYMTAB
@@ -100,6 +110,10 @@ struct SymbolTable {
   const SymbolEntry *getSymbolByIndex(uint32_t Index) const;
 };
 
+struct IndirectSymbolTable {
+  std::vector<uint32_t> Symbols;
+};
+
 /// The location of the string table inside the binary is described by LC_SYMTAB
 /// load command.
 struct StringTable {
@@ -206,6 +220,10 @@ struct ExportInfo {
   ArrayRef<uint8_t> Trie;
 };
 
+struct LinkData {
+  ArrayRef<uint8_t> Data;
+};
+
 struct Object {
   MachHeader Header;
   std::vector<LoadCommand> LoadCommands;
@@ -218,11 +236,20 @@ struct Object {
   WeakBindInfo WeakBinds;
   LazyBindInfo LazyBinds;
   ExportInfo Exports;
+  IndirectSymbolTable IndirectSymTable;
+  LinkData DataInCode;
+  LinkData FunctionStarts;
 
   /// The index of LC_SYMTAB load command if present.
   Optional<size_t> SymTabCommandIndex;
   /// The index of LC_DYLD_INFO or LC_DYLD_INFO_ONLY load command if present.
   Optional<size_t> DyLdInfoCommandIndex;
+  /// The index LC_DYSYMTAB load comamnd if present.
+  Optional<size_t> DySymTabCommandIndex;
+  /// The index LC_DATA_IN_CODE load comamnd if present.
+  Optional<size_t> DataInCodeCommandIndex;
+  /// The index LC_FUNCTION_STARTS load comamnd if present.
+  Optional<size_t> FunctionStartsCommandIndex;
 };
 
 } // end namespace macho




More information about the llvm-commits mailing list