[llvm] r364000 - [llvm-objcopy][MachO] Rebuild the symbol/string table in the writer

Seiya Nuta via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 20 17:21:50 PDT 2019


Author: seiya
Date: Thu Jun 20 17:21:50 2019
New Revision: 364000

URL: http://llvm.org/viewvc/llvm-project?rev=364000&view=rev
Log:
[llvm-objcopy][MachO] Rebuild the symbol/string table in the writer

Summary: Build the string table using StringTableBuilder, reassign symbol indices, and update symbol indices in relocations to allow adding/modifying/removing symbols from the object.

Reviewers: alexshap, rupprecht, jhenderson

Reviewed By: alexshap

Subscribers: mgorny, jakehehrlich, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D63309

Added:
    llvm/trunk/tools/llvm-objcopy/MachO/Object.cpp
Modified:
    llvm/trunk/tools/llvm-objcopy/CMakeLists.txt
    llvm/trunk/tools/llvm-objcopy/MachO/MachOReader.cpp
    llvm/trunk/tools/llvm-objcopy/MachO/MachOReader.h
    llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.cpp
    llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.h
    llvm/trunk/tools/llvm-objcopy/MachO/Object.h

Modified: llvm/trunk/tools/llvm-objcopy/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objcopy/CMakeLists.txt?rev=364000&r1=363999&r2=364000&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-objcopy/CMakeLists.txt (original)
+++ llvm/trunk/tools/llvm-objcopy/CMakeLists.txt Thu Jun 20 17:21:50 2019
@@ -26,6 +26,7 @@ add_llvm_tool(llvm-objcopy
   MachO/MachOObjcopy.cpp
   MachO/MachOReader.cpp
   MachO/MachOWriter.cpp
+  MachO/Object.cpp
   DEPENDS
   ObjcopyOptsTableGen
   StripOptsTableGen

Modified: llvm/trunk/tools/llvm-objcopy/MachO/MachOReader.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objcopy/MachO/MachOReader.cpp?rev=364000&r1=363999&r2=364000&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-objcopy/MachO/MachOReader.cpp (original)
+++ llvm/trunk/tools/llvm-objcopy/MachO/MachOReader.cpp Thu Jun 20 17:21:50 2019
@@ -97,8 +97,16 @@ extractSections(const object::MachOObjec
     S.Relocations.reserve(S.NReloc);
     for (auto RI = MachOObj.section_rel_begin(SecRef->getRawDataRefImpl()),
               RE = MachOObj.section_rel_end(SecRef->getRawDataRefImpl());
-         RI != RE; ++RI)
-      S.Relocations.push_back(MachOObj.getRelocation(RI->getRawDataRefImpl()));
+         RI != RE; ++RI) {
+      RelocationInfo R;
+      R.Symbol = nullptr; // We'll fill this field later.
+      R.Info = MachOObj.getRelocation(RI->getRawDataRefImpl());
+      R.Scattered =
+          reinterpret_cast<MachO::scattered_relocation_info *>(&R.Info)
+              ->r_scattered;
+      S.Relocations.push_back(R);
+    }
+
     assert(S.NReloc == S.Relocations.size() &&
            "Incorrect number of relocations");
   }
@@ -157,35 +165,43 @@ void MachOReader::readLoadCommands(Objec
   }
 }
 
-template <typename nlist_t> NListEntry constructNameList(const nlist_t &nlist) {
-  NListEntry NL;
-  NL.n_strx = nlist.n_strx;
-  NL.n_type = nlist.n_type;
-  NL.n_sect = nlist.n_sect;
-  NL.n_desc = nlist.n_desc;
-  NL.n_value = nlist.n_value;
-  return NL;
+template <typename nlist_t>
+SymbolEntry constructSymbolEntry(StringRef StrTable, const nlist_t &nlist) {
+  assert(nlist.n_strx < StrTable.size() &&
+         "n_strx exceeds the size of the string table");
+  SymbolEntry SE;
+  SE.Name = StringRef(StrTable.data() + nlist.n_strx).str();
+  SE.n_type = nlist.n_type;
+  SE.n_sect = nlist.n_sect;
+  SE.n_desc = nlist.n_desc;
+  SE.n_value = nlist.n_value;
+  return SE;
 }
 
 void MachOReader::readSymbolTable(Object &O) const {
+  StringRef StrTable = MachOObj.getStringTableData();
   for (auto Symbol : MachOObj.symbols()) {
-    NListEntry NLE =
-        MachOObj.is64Bit()
-            ? constructNameList<MachO::nlist_64>(
-                  MachOObj.getSymbol64TableEntry(Symbol.getRawDataRefImpl()))
-            : constructNameList<MachO::nlist>(
-                  MachOObj.getSymbolTableEntry(Symbol.getRawDataRefImpl()));
-    O.SymTable.NameList.push_back(NLE);
+    SymbolEntry SE =
+        (MachOObj.is64Bit()
+             ? constructSymbolEntry(
+                   StrTable,
+                   MachOObj.getSymbol64TableEntry(Symbol.getRawDataRefImpl()))
+             : constructSymbolEntry(
+                   StrTable,
+                   MachOObj.getSymbolTableEntry(Symbol.getRawDataRefImpl())));
+
+    O.SymTable.Symbols.push_back(llvm::make_unique<SymbolEntry>(SE));
   }
 }
 
-void MachOReader::readStringTable(Object &O) const {
-  StringRef Data = MachOObj.getStringTableData();
-  SmallVector<StringRef, 10> Strs;
-  Data.split(Strs, '\0');
-  O.StrTable.Strings.reserve(Strs.size());
-  for (auto S : Strs)
-    O.StrTable.Strings.push_back(S.str());
+void MachOReader::setSymbolInRelocationInfo(Object &O) const {
+  for (auto &LC : O.LoadCommands)
+    for (auto &Sec : LC.Sections)
+      for (auto &Reloc : Sec.Relocations)
+        if (!Reloc.Scattered) {
+          auto *Info = reinterpret_cast<MachO::relocation_info *>(&Reloc.Info);
+          Reloc.Symbol = O.SymTable.getSymbolByIndex(Info->r_symbolnum);
+        }
 }
 
 void MachOReader::readRebaseInfo(Object &O) const {
@@ -213,7 +229,7 @@ std::unique_ptr<Object> MachOReader::cre
   readHeader(*Obj);
   readLoadCommands(*Obj);
   readSymbolTable(*Obj);
-  readStringTable(*Obj);
+  setSymbolInRelocationInfo(*Obj);
   readRebaseInfo(*Obj);
   readBindInfo(*Obj);
   readWeakBindInfo(*Obj);

Modified: llvm/trunk/tools/llvm-objcopy/MachO/MachOReader.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objcopy/MachO/MachOReader.h?rev=364000&r1=363999&r2=364000&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-objcopy/MachO/MachOReader.h (original)
+++ llvm/trunk/tools/llvm-objcopy/MachO/MachOReader.h Thu Jun 20 17:21:50 2019
@@ -30,7 +30,7 @@ class MachOReader : public Reader {
   void readHeader(Object &O) const;
   void readLoadCommands(Object &O) const;
   void readSymbolTable(Object &O) const;
-  void readStringTable(Object &O) const;
+  void setSymbolInRelocationInfo(Object &O) const;
   void readRebaseInfo(Object &O) const;
   void readBindInfo(Object &O) const;
   void readWeakBindInfo(Object &O) const;

Modified: llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.cpp?rev=364000&r1=363999&r2=364000&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.cpp (original)
+++ llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.cpp Thu Jun 20 17:21:50 2019
@@ -26,18 +26,10 @@ size_t MachOWriter::headerSize() const {
 size_t MachOWriter::loadCommandsSize() const { return O.Header.SizeOfCmds; }
 
 size_t MachOWriter::symTableSize() const {
-  return O.SymTable.NameList.size() *
+  return O.SymTable.Symbols.size() *
          (Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist));
 }
 
-size_t MachOWriter::strTableSize() const {
-  size_t S = 0;
-  for (const auto &Str : O.StrTable.Strings)
-    S += Str.size();
-  S += (O.StrTable.Strings.empty() ? 0 : O.StrTable.Strings.size() - 1);
-  return S;
-}
-
 size_t MachOWriter::totalSize() const {
   // Going from tail to head and looking for an appropriate "anchor" to
   // calculate the total size assuming that all the offsets are either valid
@@ -49,12 +41,12 @@ size_t MachOWriter::totalSize() const {
         O.LoadCommands[*O.SymTabCommandIndex]
             .MachOLoadCommand.symtab_command_data;
     if (SymTabCommand.symoff) {
-      assert((SymTabCommand.nsyms == O.SymTable.NameList.size()) &&
+      assert((SymTabCommand.nsyms == O.SymTable.Symbols.size()) &&
              "Incorrect number of symbols");
       Ends.push_back(SymTabCommand.symoff + symTableSize());
     }
     if (SymTabCommand.stroff) {
-      assert((SymTabCommand.strsize == strTableSize()) &&
+      assert((SymTabCommand.strsize == StrTableBuilder.getSize()) &&
              "Incorrect string table size");
       Ends.push_back(SymTabCommand.stroff + SymTabCommand.strsize);
     }
@@ -128,6 +120,14 @@ void MachOWriter::writeHeader() {
   memcpy(B.getBufferStart(), &Header, HeaderSize);
 }
 
+void MachOWriter::updateSymbolIndexes() {
+  uint32_t Index = 0;
+  for (auto &Symbol : O.SymTable.Symbols) {
+    Symbol->Index = Index;
+    Index++;
+  }
+}
+
 void MachOWriter::writeLoadCommands() {
   uint8_t *Begin = B.getBufferStart() + headerSize();
   for (const auto &LC : O.LoadCommands) {
@@ -220,24 +220,32 @@ void MachOWriter::writeSections() {
       memcpy(B.getBufferStart() + Sec.Offset, Sec.Content.data(),
              Sec.Content.size());
       for (size_t Index = 0; Index < Sec.Relocations.size(); ++Index) {
-        MachO::any_relocation_info R = Sec.Relocations[Index];
+        auto RelocInfo = Sec.Relocations[Index];
+        if (!RelocInfo.Scattered) {
+          auto *Info =
+              reinterpret_cast<MachO::relocation_info *>(&RelocInfo.Info);
+          Info->r_symbolnum = RelocInfo.Symbol->Index;
+        }
+
         if (IsLittleEndian != sys::IsLittleEndianHost)
-          MachO::swapStruct(R);
+          MachO::swapStruct(
+              reinterpret_cast<MachO::any_relocation_info &>(RelocInfo.Info));
         memcpy(B.getBufferStart() + Sec.RelOff +
                    Index * sizeof(MachO::any_relocation_info),
-               &R, sizeof(R));
+               &RelocInfo.Info, sizeof(RelocInfo.Info));
       }
     }
 }
 
 template <typename NListType>
-void writeNListEntry(const NListEntry &NLE, bool IsLittleEndian, char *&Out) {
+void writeNListEntry(const SymbolEntry &SE, bool IsLittleEndian, char *&Out,
+                     uint32_t Nstrx) {
   NListType ListEntry;
-  ListEntry.n_strx = NLE.n_strx;
-  ListEntry.n_type = NLE.n_type;
-  ListEntry.n_sect = NLE.n_sect;
-  ListEntry.n_desc = NLE.n_desc;
-  ListEntry.n_value = NLE.n_value;
+  ListEntry.n_strx = Nstrx;
+  ListEntry.n_type = SE.n_type;
+  ListEntry.n_sect = SE.n_sect;
+  ListEntry.n_desc = SE.n_desc;
+  ListEntry.n_value = SE.n_value;
 
   if (IsLittleEndian != sys::IsLittleEndianHost)
     MachO::swapStruct(ListEntry);
@@ -251,15 +259,9 @@ void MachOWriter::writeSymbolTable() {
   const MachO::symtab_command &SymTabCommand =
       O.LoadCommands[*O.SymTabCommandIndex]
           .MachOLoadCommand.symtab_command_data;
-  assert((SymTabCommand.nsyms == O.SymTable.NameList.size()) &&
-         "Incorrect number of symbols");
-  char *Out = (char *)B.getBufferStart() + SymTabCommand.symoff;
-  for (auto NLE : O.SymTable.NameList) {
-    if (Is64Bit)
-      writeNListEntry<MachO::nlist_64>(NLE, IsLittleEndian, Out);
-    else
-      writeNListEntry<MachO::nlist>(NLE, IsLittleEndian, Out);
-  }
+
+  uint8_t *StrTable = (uint8_t *)B.getBufferStart() + SymTabCommand.stroff;
+  StrTableBuilder.write(StrTable);
 }
 
 void MachOWriter::writeStringTable() {
@@ -268,17 +270,17 @@ void MachOWriter::writeStringTable() {
   const MachO::symtab_command &SymTabCommand =
       O.LoadCommands[*O.SymTabCommandIndex]
           .MachOLoadCommand.symtab_command_data;
-  char *Out = (char *)B.getBufferStart() + SymTabCommand.stroff;
-  assert((SymTabCommand.strsize == strTableSize()) &&
-         "Incorrect string table size");
-  for (size_t Index = 0; Index < O.StrTable.Strings.size(); ++Index) {
-    memcpy(Out, O.StrTable.Strings[Index].data(),
-           O.StrTable.Strings[Index].size());
-    Out += O.StrTable.Strings[Index].size();
-    if (Index + 1 != O.StrTable.Strings.size()) {
-      memcpy(Out, "\0", 1);
-      Out += 1;
-    }
+
+  char *SymTable = (char *)B.getBufferStart() + SymTabCommand.symoff;
+  for (auto Iter = O.SymTable.Symbols.begin(), End = O.SymTable.Symbols.end();
+       Iter != End; Iter++) {
+    SymbolEntry *Sym = Iter->get();
+    auto Nstrx = StrTableBuilder.getOffset(Sym->Name);
+
+    if (Is64Bit)
+      writeNListEntry<MachO::nlist_64>(*Sym, IsLittleEndian, SymTable, Nstrx);
+    else
+      writeNListEntry<MachO::nlist>(*Sym, IsLittleEndian, SymTable, Nstrx);
   }
 }
 
@@ -420,10 +422,10 @@ void MachOWriter::updateSizeOfCmds() {
 // are already sorted by the those types.
 void MachOWriter::updateDySymTab(MachO::macho_load_command &MLC) {
   uint32_t NumLocalSymbols = 0;
-  auto Iter = O.SymTable.NameList.begin();
-  auto End = O.SymTable.NameList.end();
+  auto Iter = O.SymTable.Symbols.begin();
+  auto End = O.SymTable.Symbols.end();
   for (; Iter != End; Iter++) {
-    if (Iter->n_type & (MachO::N_EXT | MachO::N_PEXT))
+    if ((*Iter)->n_type & (MachO::N_EXT | MachO::N_PEXT))
       break;
 
     NumLocalSymbols++;
@@ -431,7 +433,7 @@ void MachOWriter::updateDySymTab(MachO::
 
   uint32_t NumExtDefSymbols = 0;
   for (; Iter != End; Iter++) {
-    if ((Iter->n_type & MachO::N_TYPE) == MachO::N_UNDF)
+    if (((*Iter)->n_type & MachO::N_TYPE) == MachO::N_UNDF)
       break;
 
     NumExtDefSymbols++;
@@ -443,7 +445,7 @@ void MachOWriter::updateDySymTab(MachO::
   MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols;
   MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols;
   MLC.dysymtab_command_data.nundefsym =
-      O.SymTable.NameList.size() - (NumLocalSymbols + NumExtDefSymbols);
+      O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols);
 }
 
 // Recomputes and updates offset and size fields in load commands and sections
@@ -512,8 +514,9 @@ Error MachOWriter::layout() {
     auto cmd = MLC.load_command_data.cmd;
     switch (cmd) {
     case MachO::LC_SYMTAB:
+      MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size();
+      MLC.symtab_command_data.strsize = StrTableBuilder.getSize();
       MLC.symtab_command_data.symoff = Offset;
-      MLC.symtab_command_data.nsyms = O.SymTable.NameList.size();
       Offset += NListSize * MLC.symtab_command_data.nsyms;
       MLC.symtab_command_data.stroff = Offset;
       Offset += MLC.symtab_command_data.strsize;
@@ -554,8 +557,15 @@ Error MachOWriter::layout() {
   return Error::success();
 }
 
+void MachOWriter::constructStringTable() {
+  for (std::unique_ptr<SymbolEntry> &Sym : O.SymTable.Symbols)
+    StrTableBuilder.add(Sym->Name);
+  StrTableBuilder.finalize();
+}
+
 Error MachOWriter::finalize() {
   updateSizeOfCmds();
+  constructStringTable();
 
   if (auto E = layout())
     return E;
@@ -568,6 +578,7 @@ Error MachOWriter::write() {
     return E;
   memset(B.getBufferStart(), 0, totalSize());
   writeHeader();
+  updateSymbolIndexes();
   writeLoadCommands();
   writeSections();
   writeTail();

Modified: llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.h?rev=364000&r1=363999&r2=364000&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.h (original)
+++ llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.h Thu Jun 20 17:21:50 2019
@@ -23,6 +23,7 @@ class MachOWriter {
   bool Is64Bit;
   bool IsLittleEndian;
   Buffer &B;
+  StringTableBuilder StrTableBuilder{StringTableBuilder::MachO};
 
   size_t headerSize() const;
   size_t loadCommandsSize() const;
@@ -31,6 +32,8 @@ class MachOWriter {
 
   void updateDySymTab(MachO::macho_load_command &MLC);
   void updateSizeOfCmds();
+  void updateSymbolIndexes();
+  void constructStringTable();
   Error layout();
 
   void writeHeader();

Added: llvm/trunk/tools/llvm-objcopy/MachO/Object.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objcopy/MachO/Object.cpp?rev=364000&view=auto
==============================================================================
--- llvm/trunk/tools/llvm-objcopy/MachO/Object.cpp (added)
+++ llvm/trunk/tools/llvm-objcopy/MachO/Object.cpp Thu Jun 20 17:21:50 2019
@@ -0,0 +1,15 @@
+#include "Object.h"
+#include "../llvm-objcopy.h"
+
+namespace llvm {
+namespace objcopy {
+namespace macho {
+
+const SymbolEntry *SymbolTable::getSymbolByIndex(uint32_t Index) const {
+  assert(Index < Symbols.size() && "invalid symbol index");
+  return Symbols[Index].get();
+}
+
+} // end namespace macho
+} // end namespace objcopy
+} // end namespace llvm

Modified: llvm/trunk/tools/llvm-objcopy/MachO/Object.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objcopy/MachO/Object.h?rev=364000&r1=363999&r2=364000&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-objcopy/MachO/Object.h (original)
+++ llvm/trunk/tools/llvm-objcopy/MachO/Object.h Thu Jun 20 17:21:50 2019
@@ -12,6 +12,7 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/BinaryFormat/MachO.h"
+#include "llvm/MC/StringTableBuilder.h"
 #include "llvm/ObjectYAML/DWARFYAML.h"
 #include "llvm/Support/YAMLTraits.h"
 #include <cstdint>
@@ -33,6 +34,7 @@ struct MachHeader {
   uint32_t Reserved = 0;
 };
 
+struct RelocationInfo;
 struct Section {
   std::string Sectname;
   std::string Segname;
@@ -48,7 +50,7 @@ struct Section {
   uint32_t Reserved3;
 
   StringRef Content;
-  std::vector<MachO::any_relocation_info> Relocations;
+  std::vector<RelocationInfo> Relocations;
 
   MachO::SectionType getType() const {
     return static_cast<MachO::SectionType>(Flags & MachO::SECTION_TYPE);
@@ -79,8 +81,11 @@ struct LoadCommand {
   std::vector<Section> Sections;
 };
 
-struct NListEntry {
-  uint32_t n_strx;
+// A symbol information. Fields which starts with "n_" are same as them in the
+// nlist.
+struct SymbolEntry {
+  std::string Name;
+  uint32_t Index;
   uint8_t n_type;
   uint8_t n_sect;
   uint16_t n_desc;
@@ -90,7 +95,9 @@ struct NListEntry {
 /// The location of the symbol table inside the binary is described by LC_SYMTAB
 /// load command.
 struct SymbolTable {
-  std::vector<NListEntry> NameList;
+  std::vector<std::unique_ptr<SymbolEntry>> Symbols;
+
+  const SymbolEntry *getSymbolByIndex(uint32_t Index) const;
 };
 
 /// The location of the string table inside the binary is described by LC_SYMTAB
@@ -99,6 +106,13 @@ struct StringTable {
   std::vector<std::string> Strings;
 };
 
+struct RelocationInfo {
+  const SymbolEntry *Symbol;
+  // True if Info is a scattered_relocation_info.
+  bool Scattered;
+  MachO::any_relocation_info Info;
+};
+
 /// The location of the rebase info inside the binary is described by
 /// LC_DYLD_INFO load command. Dyld rebases an image whenever dyld loads it at
 /// an address different from its preferred address.  The rebase information is




More information about the llvm-commits mailing list