[llvm] r362863 - [llvm-objcopy][MachO] Recompute and update offset/size fields in the writer

Seiya Nuta via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 7 18:22:55 PDT 2019


Author: seiya
Date: Fri Jun  7 18:22:54 2019
New Revision: 362863

URL: http://llvm.org/viewvc/llvm-project?rev=362863&view=rev
Log:
[llvm-objcopy][MachO] Recompute and update offset/size fields in the writer

Summary:
Recompute and update offset/size fields so that we can implement llvm-objcopy options like --only-section.

This patch is the first step and focuses on supporting load commands that covered by existing tests: executable files and
dynamic libraries are not supported.

Reviewers: alexshap, rupprecht, jhenderson

Reviewed By: alexshap, rupprecht

Subscribers: compnerd, jakehehrlich, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62652

Added:
    llvm/trunk/test/tools/llvm-objcopy/MachO/Inputs/various-symbols.s
Modified:
    llvm/trunk/test/tools/llvm-objcopy/MachO/real-world-input-copy.test
    llvm/trunk/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
    llvm/trunk/tools/llvm-objcopy/MachO/MachOReader.cpp
    llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.cpp
    llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.h
    llvm/trunk/tools/llvm-objcopy/MachO/Object.h

Added: llvm/trunk/test/tools/llvm-objcopy/MachO/Inputs/various-symbols.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-objcopy/MachO/Inputs/various-symbols.s?rev=362863&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-objcopy/MachO/Inputs/various-symbols.s (added)
+++ llvm/trunk/test/tools/llvm-objcopy/MachO/Inputs/various-symbols.s Fri Jun  7 18:22:54 2019
@@ -0,0 +1,23 @@
+# static int i; // A local symbol.
+# int f(void) { return i; } // An external symbol.
+
+	.section	__TEXT,__text,regular,pure_instructions
+	.build_version macos, 10, 14
+	.globl	_f                      ## -- Begin function f
+	.p2align	4, 0x90
+_f:                                     ## @f
+	.cfi_startproc
+## %bb.0:
+	pushq	%rbp
+	.cfi_def_cfa_offset 16
+	.cfi_offset %rbp, -16
+	movq	%rsp, %rbp
+	.cfi_def_cfa_register %rbp
+	movl	_i(%rip), %eax
+	popq	%rbp
+	retq
+	.cfi_endproc
+                                        ## -- End function
+.zerofill __DATA,__bss,_i,4,2           ## @i
+
+.subsections_via_symbols

Modified: llvm/trunk/test/tools/llvm-objcopy/MachO/real-world-input-copy.test
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-objcopy/MachO/real-world-input-copy.test?rev=362863&r1=362862&r2=362863&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-objcopy/MachO/real-world-input-copy.test (original)
+++ llvm/trunk/test/tools/llvm-objcopy/MachO/real-world-input-copy.test Fri Jun  7 18:22:54 2019
@@ -6,4 +6,10 @@
 # RUN: llvm-objcopy %t.64.o %t.64.copy.o
 # RUN: cmp %t.64.o %t.64.copy.o
 
+# Make sure that it properly constructs LC_DYSYMTAB and handles virtual sections
+# (the bss section for example).
+# RUN: llvm-mc -assemble -triple x86_64-apple-darwin9 -filetype=obj %p/Inputs/various-symbols.s -o %t.various-symbols.o
+# RUN: llvm-objcopy %t.various-symbols.o %t.various-symbols.copy.o
+# RUN: cmp %t.various-symbols.o %t.various-symbols.copy.o
+
 # REQUIRES: x86-registered-target

Modified: llvm/trunk/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objcopy/MachO/MachOObjcopy.cpp?rev=362863&r1=362862&r2=362863&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-objcopy/MachO/MachOObjcopy.cpp (original)
+++ llvm/trunk/tools/llvm-objcopy/MachO/MachOObjcopy.cpp Fri Jun  7 18:22:54 2019
@@ -58,6 +58,8 @@ Error executeObjcopyOnBinary(const CopyC
     return createFileError(Config.InputFilename, std::move(E));
 
   MachOWriter Writer(*O, In.is64Bit(), In.isLittleEndian(), Out);
+  if (auto E = Writer.finalize())
+    return E;
   return Writer.write();
 }
 

Modified: llvm/trunk/tools/llvm-objcopy/MachO/MachOReader.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objcopy/MachO/MachOReader.cpp?rev=362863&r1=362862&r2=362863&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-objcopy/MachO/MachOReader.cpp (original)
+++ llvm/trunk/tools/llvm-objcopy/MachO/MachOReader.cpp Fri Jun  7 18:22:54 2019
@@ -30,8 +30,11 @@ void MachOReader::readHeader(Object &O)
 template <typename SectionType>
 Section constructSectionCommon(SectionType Sec) {
   Section S;
-  memcpy(S.Sectname, Sec.sectname, sizeof(Sec.sectname));
-  memcpy(S.Segname, Sec.segname, sizeof(Sec.segname));
+  S.Sectname =
+      StringRef(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname)))
+          .str();
+  S.Segname =
+      StringRef(Sec.segname, strnlen(Sec.segname, sizeof(Sec.sectname))).str();
   S.Addr = Sec.addr;
   S.Size = Sec.size;
   S.Offset = Sec.offset;
@@ -79,7 +82,6 @@ extractSections(const object::MachOObjec
 
     Section &S = Sections.back();
 
-    StringRef SectName(S.Sectname);
     Expected<object::SectionRef> SecRef =
         MachOObj.getSection(NextSectionIndex++);
     if (!SecRef)

Modified: llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.cpp?rev=362863&r1=362862&r2=362863&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.cpp (original)
+++ llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.cpp Fri Jun  7 18:22:54 2019
@@ -11,6 +11,8 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/BinaryFormat/MachO.h"
 #include "llvm/Object/MachO.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/ErrorHandling.h"
 #include <memory>
 
 namespace llvm {
@@ -128,13 +130,35 @@ void MachOWriter::writeHeader() {
 
 void MachOWriter::writeLoadCommands() {
   uint8_t *Begin = B.getBufferStart() + headerSize();
-  MachO::macho_load_command MLC;
   for (const auto &LC : O.LoadCommands) {
+    // Construct a load command.
+    MachO::macho_load_command MLC = LC.MachOLoadCommand;
+    switch (MLC.load_command_data.cmd) {
+    case MachO::LC_SEGMENT:
+      if (IsLittleEndian != sys::IsLittleEndianHost)
+        MachO::swapStruct(MLC.segment_command_data);
+      memcpy(Begin, &MLC.segment_command_data, sizeof(MachO::segment_command));
+      Begin += sizeof(MachO::segment_command);
+
+      for (const auto &Sec : LC.Sections)
+        writeSectionInLoadCommand<MachO::section>(Sec, Begin);
+      continue;
+    case MachO::LC_SEGMENT_64:
+      if (IsLittleEndian != sys::IsLittleEndianHost)
+        MachO::swapStruct(MLC.segment_command_64_data);
+      memcpy(Begin, &MLC.segment_command_64_data,
+             sizeof(MachO::segment_command_64));
+      Begin += sizeof(MachO::segment_command_64);
+
+      for (const auto &Sec : LC.Sections)
+        writeSectionInLoadCommand<MachO::section_64>(Sec, Begin);
+      continue;
+    }
+
 #define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct)                         \
   case MachO::LCName:                                                          \
     assert(sizeof(MachO::LCStruct) + LC.Payload.size() ==                      \
-           LC.MachOLoadCommand.load_command_data.cmdsize);                     \
-    MLC = LC.MachOLoadCommand;                                                 \
+           MLC.load_command_data.cmdsize);                                     \
     if (IsLittleEndian != sys::IsLittleEndianHost)                             \
       MachO::swapStruct(MLC.LCStruct##_data);                                  \
     memcpy(Begin, &MLC.LCStruct##_data, sizeof(MachO::LCStruct));              \
@@ -143,11 +167,11 @@ void MachOWriter::writeLoadCommands() {
     Begin += LC.Payload.size();                                                \
     break;
 
-    switch (LC.MachOLoadCommand.load_command_data.cmd) {
+    // Copy the load command as it is.
+    switch (MLC.load_command_data.cmd) {
     default:
       assert(sizeof(MachO::load_command) + LC.Payload.size() ==
-             LC.MachOLoadCommand.load_command_data.cmdsize);
-      MLC = LC.MachOLoadCommand;
+             MLC.load_command_data.cmdsize);
       if (IsLittleEndian != sys::IsLittleEndianHost)
         MachO::swapStruct(MLC.load_command_data);
       memcpy(Begin, &MLC.load_command_data, sizeof(MachO::load_command));
@@ -160,9 +184,37 @@ void MachOWriter::writeLoadCommands() {
   }
 }
 
+template <typename StructType>
+void MachOWriter::writeSectionInLoadCommand(const Section &Sec, uint8_t *&Out) {
+  StructType Temp;
+  assert(Sec.Segname.size() <= sizeof(Temp.segname) && "too long segment name");
+  assert(Sec.Sectname.size() <= sizeof(Temp.sectname) &&
+         "too long section name");
+  memset(&Temp, 0, sizeof(StructType));
+  memcpy(Temp.segname, Sec.Segname.data(), Sec.Segname.size());
+  memcpy(Temp.sectname, Sec.Sectname.data(), Sec.Sectname.size());
+  Temp.addr = Sec.Addr;
+  Temp.size = Sec.Size;
+  Temp.offset = Sec.Offset;
+  Temp.align = Sec.Align;
+  Temp.reloff = Sec.RelOff;
+  Temp.nreloc = Sec.NReloc;
+  Temp.flags = Sec.Flags;
+  Temp.reserved1 = Sec.Reserved1;
+  Temp.reserved2 = Sec.Reserved2;
+
+  if (IsLittleEndian != sys::IsLittleEndianHost)
+    MachO::swapStruct(Temp);
+  memcpy(Out, &Temp, sizeof(StructType));
+  Out += sizeof(StructType);
+}
+
 void MachOWriter::writeSections() {
   for (const auto &LC : O.LoadCommands)
     for (const auto &Sec : LC.Sections) {
+      if (Sec.isVirtualSection())
+        continue;
+
       assert(Sec.Offset && "Section offset can not be zero");
       assert((Sec.Size == Sec.Content.size()) && "Incorrect section size");
       memcpy(B.getBufferStart() + Sec.Offset, Sec.Content.data(),
@@ -333,6 +385,184 @@ void MachOWriter::writeTail() {
     (this->*WriteOp.second)();
 }
 
+void MachOWriter::updateSizeOfCmds() {
+  auto Size = 0;
+  for (const auto &LC : O.LoadCommands) {
+    auto &MLC = LC.MachOLoadCommand;
+    auto cmd = MLC.load_command_data.cmd;
+
+    switch (cmd) {
+    case MachO::LC_SEGMENT:
+      Size += sizeof(MachO::segment_command) +
+              sizeof(MachO::section) * LC.Sections.size();
+      continue;
+    case MachO::LC_SEGMENT_64:
+      Size += sizeof(MachO::segment_command_64) +
+              sizeof(MachO::section_64) * LC.Sections.size();
+      continue;
+    }
+
+    switch (cmd) {
+#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct)                         \
+  case MachO::LCName:                                                          \
+    Size += sizeof(MachO::LCStruct);                                           \
+    break;
+#include "llvm/BinaryFormat/MachO.def"
+#undef HANDLE_LOAD_COMMAND
+    }
+  }
+
+  O.Header.SizeOfCmds = Size;
+}
+
+// Updates the index and the number of local/external/undefined symbols. Here we
+// assume that MLC is a LC_DYSYMTAB and the nlist entries in the symbol table
+// are already sorted by the those types.
+void MachOWriter::updateDySymTab(MachO::macho_load_command &MLC) {
+  uint32_t NumLocalSymbols = 0;
+  auto Iter = O.SymTable.NameList.begin();
+  auto End = O.SymTable.NameList.end();
+  for (; Iter != End; Iter++) {
+    if (Iter->n_type & (MachO::N_EXT | MachO::N_PEXT))
+      break;
+
+    NumLocalSymbols++;
+  }
+
+  uint32_t NumExtDefSymbols = 0;
+  for (; Iter != End; Iter++) {
+    if ((Iter->n_type & MachO::N_TYPE) == MachO::N_UNDF)
+      break;
+
+    NumExtDefSymbols++;
+  }
+
+  MLC.dysymtab_command_data.ilocalsym = 0;
+  MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols;
+  MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols;
+  MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols;
+  MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols;
+  MLC.dysymtab_command_data.nundefsym =
+      O.SymTable.NameList.size() - (NumLocalSymbols + NumExtDefSymbols);
+}
+
+// Recomputes and updates offset and size fields in load commands and sections
+// since they could be modified.
+Error MachOWriter::layout() {
+  auto SizeOfCmds = loadCommandsSize();
+  auto Offset = headerSize() + SizeOfCmds;
+  O.Header.NCmds = O.LoadCommands.size();
+  O.Header.SizeOfCmds = SizeOfCmds;
+
+  // Lay out sections.
+  for (auto &LC : O.LoadCommands) {
+    uint64_t FileOff = Offset;
+    uint64_t VMSize = 0;
+    uint64_t FileOffsetInSegment = 0;
+    for (auto &Sec : LC.Sections) {
+      if (!Sec.isVirtualSection()) {
+        auto FilePaddingSize =
+            OffsetToAlignment(FileOffsetInSegment, 1 << Sec.Align);
+        Sec.Offset = Offset + FileOffsetInSegment + FilePaddingSize;
+        Sec.Size = Sec.Content.size();
+        FileOffsetInSegment += FilePaddingSize + Sec.Size;
+      }
+
+      VMSize = std::max(VMSize, Sec.Addr + Sec.Size);
+    }
+
+    // TODO: Handle the __PAGEZERO segment.
+    auto &MLC = LC.MachOLoadCommand;
+    switch (MLC.load_command_data.cmd) {
+    case MachO::LC_SEGMENT:
+      MLC.segment_command_data.cmdsize =
+          sizeof(MachO::segment_command) +
+          sizeof(MachO::section) * LC.Sections.size();
+      MLC.segment_command_data.nsects = LC.Sections.size();
+      MLC.segment_command_data.fileoff = FileOff;
+      MLC.segment_command_data.vmsize = VMSize;
+      MLC.segment_command_data.filesize = FileOffsetInSegment;
+      break;
+    case MachO::LC_SEGMENT_64:
+      MLC.segment_command_64_data.cmdsize =
+          sizeof(MachO::segment_command_64) +
+          sizeof(MachO::section_64) * LC.Sections.size();
+      MLC.segment_command_64_data.nsects = LC.Sections.size();
+      MLC.segment_command_64_data.fileoff = FileOff;
+      MLC.segment_command_64_data.vmsize = VMSize;
+      MLC.segment_command_64_data.filesize = FileOffsetInSegment;
+      break;
+    }
+
+    Offset += FileOffsetInSegment;
+  }
+
+  // Lay out relocations.
+  for (auto &LC : O.LoadCommands)
+    for (auto &Sec : LC.Sections) {
+      Sec.RelOff = Sec.Relocations.empty() ? 0 : Offset;
+      Sec.NReloc = Sec.Relocations.size();
+      Offset += sizeof(MachO::any_relocation_info) * Sec.NReloc;
+    }
+
+  // Lay out tail stuff.
+  auto NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
+  for (auto &LC : O.LoadCommands) {
+    auto &MLC = LC.MachOLoadCommand;
+    auto cmd = MLC.load_command_data.cmd;
+    switch (cmd) {
+    case MachO::LC_SYMTAB:
+      MLC.symtab_command_data.symoff = Offset;
+      MLC.symtab_command_data.nsyms = O.SymTable.NameList.size();
+      Offset += NListSize * MLC.symtab_command_data.nsyms;
+      MLC.symtab_command_data.stroff = Offset;
+      Offset += MLC.symtab_command_data.strsize;
+      break;
+    case MachO::LC_DYSYMTAB: {
+      if (MLC.dysymtab_command_data.ntoc != 0 ||
+          MLC.dysymtab_command_data.nmodtab != 0 ||
+          MLC.dysymtab_command_data.nextrefsyms != 0 ||
+          MLC.dysymtab_command_data.nlocrel != 0 ||
+          MLC.dysymtab_command_data.nextrel != 0)
+        return createStringError(llvm::errc::not_supported,
+                                 "shared library is not yet supported");
+
+      if (MLC.dysymtab_command_data.nindirectsyms != 0)
+        return createStringError(llvm::errc::not_supported,
+                                 "indirect symbol table is not yet supported");
+
+      updateDySymTab(MLC);
+      break;
+    }
+    case MachO::LC_SEGMENT:
+    case MachO::LC_SEGMENT_64:
+    case MachO::LC_VERSION_MIN_MACOSX:
+    case MachO::LC_BUILD_VERSION:
+    case MachO::LC_ID_DYLIB:
+    case MachO::LC_LOAD_DYLIB:
+    case MachO::LC_UUID:
+    case MachO::LC_SOURCE_VERSION:
+      // Nothing to update.
+      break;
+    default:
+      // Abort if it's unsupported in order to prevent corrupting the object.
+      return createStringError(llvm::errc::not_supported,
+                               "unsupported load command (cmd=0x%x)", cmd);
+    }
+  }
+
+  return Error::success();
+}
+
+Error MachOWriter::finalize() {
+  updateSizeOfCmds();
+
+  if (auto E = layout())
+    return E;
+
+  return Error::success();
+}
+
 Error MachOWriter::write() {
   if (Error E = B.allocate(totalSize()))
     return E;

Modified: llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.h?rev=362863&r1=362862&r2=362863&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.h (original)
+++ llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.h Fri Jun  7 18:22:54 2019
@@ -29,8 +29,14 @@ class MachOWriter {
   size_t symTableSize() const;
   size_t strTableSize() const;
 
+  void updateDySymTab(MachO::macho_load_command &MLC);
+  void updateSizeOfCmds();
+  Error layout();
+
   void writeHeader();
   void writeLoadCommands();
+  template <typename StructType>
+  void writeSectionInLoadCommand(const Section &Sec, uint8_t *&Out);
   void writeSections();
   void writeSymbolTable();
   void writeStringTable();
@@ -46,6 +52,7 @@ public:
       : O(O), Is64Bit(Is64Bit), IsLittleEndian(IsLittleEndian), B(B) {}
 
   size_t totalSize() const;
+  Error finalize();
   Error write();
 };
 

Modified: llvm/trunk/tools/llvm-objcopy/MachO/Object.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objcopy/MachO/Object.h?rev=362863&r1=362862&r2=362863&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-objcopy/MachO/Object.h (original)
+++ llvm/trunk/tools/llvm-objcopy/MachO/Object.h Fri Jun  7 18:22:54 2019
@@ -34,8 +34,8 @@ struct MachHeader {
 };
 
 struct Section {
-  char Sectname[16];
-  char Segname[16];
+  std::string Sectname;
+  std::string Segname;
   uint64_t Addr;
   uint64_t Size;
   uint32_t Offset;
@@ -49,6 +49,16 @@ struct Section {
 
   StringRef Content;
   std::vector<MachO::any_relocation_info> Relocations;
+
+  MachO::SectionType getType() const {
+    return static_cast<MachO::SectionType>(Flags & MachO::SECTION_TYPE);
+  }
+
+  bool isVirtualSection() const {
+    return (getType() == MachO::S_ZEROFILL ||
+            getType() == MachO::S_GB_ZEROFILL ||
+            getType() == MachO::S_THREAD_LOCAL_ZEROFILL);
+  }
 };
 
 struct LoadCommand {




More information about the llvm-commits mailing list