[llvm] f34fdbc - [llvm-objcopy][MachO] Make --remove-section clean up dead symbols

Alexander Shaposhnikov via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 22 14:27:10 PDT 2020


Author: Alexander Shaposhnikov
Date: 2020-04-22T14:26:42-07:00
New Revision: f34fdbcf996a9b944439007b1da087be8284b803

URL: https://github.com/llvm/llvm-project/commit/f34fdbcf996a9b944439007b1da087be8284b803
DIFF: https://github.com/llvm/llvm-project/commit/f34fdbcf996a9b944439007b1da087be8284b803.diff

LOG: [llvm-objcopy][MachO] Make --remove-section clean up dead symbols

Make --remove-section clean up dead symbols, return an Error if it can't be safely done.

Test plan: make check-all

Differential revision: https://reviews.llvm.org/D78474

Added: 
    llvm/test/tools/llvm-objcopy/MachO/remove-section-dead-symbols.test
    llvm/test/tools/llvm-objcopy/MachO/remove-section-error.test

Modified: 
    llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
    llvm/tools/llvm-objcopy/MachO/MachOReader.cpp
    llvm/tools/llvm-objcopy/MachO/Object.cpp
    llvm/tools/llvm-objcopy/MachO/Object.h

Removed: 
    


################################################################################
diff  --git a/llvm/test/tools/llvm-objcopy/MachO/remove-section-dead-symbols.test b/llvm/test/tools/llvm-objcopy/MachO/remove-section-dead-symbols.test
new file mode 100644
index 000000000000..474369439ba6
--- /dev/null
+++ b/llvm/test/tools/llvm-objcopy/MachO/remove-section-dead-symbols.test
@@ -0,0 +1,128 @@
+# RUN: yaml2obj %s -o %t
+# RUN: llvm-objcopy --remove-section __DATA,C %t %t.copy
+
+# RUN: llvm-readobj --symbols %t.copy | FileCheck %s
+
+# CHECK: Symbols [
+# CHECK-NEXT:   Symbol {
+# CHECK-NEXT:     Name: _A (1)
+# CHECK-NEXT:     Extern
+# CHECK-NEXT:     Type: Section (0xE)
+# CHECK-NEXT:     Section: __data (0x2)
+# CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+# CHECK-NEXT:     Flags [ (0x0)
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Value: 0x0
+# CHECK-NEXT:   }
+# CHECK-NEXT: ]
+
+--- !mach-o
+FileHeader:
+  magic:           0xFEEDFACF
+  cputype:         0x01000007
+  cpusubtype:      0x00000003
+  filetype:        0x00000001
+  ncmds:           4
+  sizeofcmds:      432
+  flags:           0x00002000
+  reserved:        0x00000000
+LoadCommands:
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         312
+    segname:         ''
+    vmaddr:          0
+    vmsize:          8
+    fileoff:         464
+    filesize:        8
+    maxprot:         7
+    initprot:        7
+    nsects:          3
+    flags:           0
+    Sections:
+      - sectname:        __text
+        segname:         __TEXT
+        addr:            0x0000000000000000
+        size:            0
+        offset:          0x000001D0
+        align:           0
+        reloff:          0x00000000
+        nreloc:          0
+        flags:           0x80000000
+        reserved1:       0x00000000
+        reserved2:       0x00000000
+        reserved3:       0x00000000
+        content:         ''
+      - sectname:        __data
+        segname:         __DATA
+        addr:            0x0000000000000000
+        size:            4
+        offset:          0x000001D0
+        align:           2
+        reloff:          0x00000000
+        nreloc:          0
+        flags:           0x00000000
+        reserved1:       0x00000000
+        reserved2:       0x00000000
+        reserved3:       0x00000000
+        content:         '01000000'
+      - sectname:        C
+        segname:         __DATA
+        addr:            0x0000000000000004
+        size:            4
+        offset:          0x000001D4
+        align:           2
+        reloff:          0x00000000
+        nreloc:          0
+        flags:           0x00000000
+        reserved1:       0x00000000
+        reserved2:       0x00000000
+        reserved3:       0x00000000
+        content:         '02000000'
+  - cmd:             LC_VERSION_MIN_MACOSX
+    cmdsize:         16
+    version:         658944
+    sdk:             0
+  - cmd:             LC_SYMTAB
+    cmdsize:         24
+    symoff:          472
+    nsyms:           2
+    stroff:          504
+    strsize:         8
+  - cmd:             LC_DYSYMTAB
+    cmdsize:         80
+    ilocalsym:       0
+    nlocalsym:       0
+    iextdefsym:      0
+    nextdefsym:      2
+    iundefsym:       2
+    nundefsym:       0
+    tocoff:          0
+    ntoc:            0
+    modtaboff:       0
+    nmodtab:         0
+    extrefsymoff:    0
+    nextrefsyms:     0
+    indirectsymoff:  0
+    nindirectsyms:   0
+    extreloff:       0
+    nextrel:         0
+    locreloff:       0
+    nlocrel:         0
+LinkEditData:
+  NameList:
+    - n_strx:          4
+      n_type:          0x0F
+      n_sect:          2
+      n_desc:          0
+      n_value:         0
+    - n_strx:          1
+      n_type:          0x0F
+      n_sect:          3
+      n_desc:          0
+      n_value:         4
+  StringTable:
+    - ''
+    - _B
+    - _A
+    - ''
+...

diff  --git a/llvm/test/tools/llvm-objcopy/MachO/remove-section-error.test b/llvm/test/tools/llvm-objcopy/MachO/remove-section-error.test
new file mode 100644
index 000000000000..e060212fb2da
--- /dev/null
+++ b/llvm/test/tools/llvm-objcopy/MachO/remove-section-error.test
@@ -0,0 +1,120 @@
+# RUN: yaml2obj %s -o %t
+# RUN: not llvm-objcopy --remove-section __DATA,C %t /dev/null 2>&1 | FileCheck %s
+
+# CHECK: symbol '_a' defined in section with index '2' cannot be removed because it is referenced by a relocation in section '__TEXT,__text'
+
+## The binary used in this test was built as follows: 
+## main.c: 
+##   __attribute__((section("__DATA,C"))) int a = 2;
+##   int f() { return a; }
+## build command:
+## clang -fno-exceptions -fno-unwind-tables -c main.c -o main.o
+
+--- !mach-o
+FileHeader:
+  magic:           0xFEEDFACF
+  cputype:         0x01000007
+  cpusubtype:      0x00000003
+  filetype:        0x00000001
+  ncmds:           4
+  sizeofcmds:      360
+  flags:           0x00002000
+  reserved:        0x00000000
+LoadCommands:
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         232
+    segname:         ''
+    vmaddr:          0
+    vmsize:          16
+    fileoff:         392
+    filesize:        16
+    maxprot:         7
+    initprot:        7
+    nsects:          2
+    flags:           0
+    Sections:
+      - sectname:        __text
+        segname:         __TEXT
+        addr:            0x0000000000000000
+        size:            12
+        offset:          0x00000188
+        align:           4
+        reloff:          0x00000198
+        nreloc:          1
+        flags:           0x80000400
+        reserved1:       0x00000000
+        reserved2:       0x00000000
+        reserved3:       0x00000000
+        content:         554889E58B05000000005DC3
+        relocations:
+          - address:         0x00000006
+            symbolnum:       0
+            pcrel:           true
+            length:          2
+            extern:          true
+            type:            1
+            scattered:       false
+            value:           0
+      - sectname:        C
+        segname:         __DATA
+        addr:            0x000000000000000C
+        size:            4
+        offset:          0x00000194
+        align:           2
+        reloff:          0x00000000
+        nreloc:          0
+        flags:           0x00000000
+        reserved1:       0x00000000
+        reserved2:       0x00000000
+        reserved3:       0x00000000
+        content:         '02000000'
+  - cmd:             LC_BUILD_VERSION
+    cmdsize:         24
+    platform:        1
+    minos:           658944
+    sdk:             658944
+    ntools:          0
+  - cmd:             LC_SYMTAB
+    cmdsize:         24
+    symoff:          416
+    nsyms:           2
+    stroff:          448
+    strsize:         8
+  - cmd:             LC_DYSYMTAB
+    cmdsize:         80
+    ilocalsym:       0
+    nlocalsym:       0
+    iextdefsym:      0
+    nextdefsym:      2
+    iundefsym:       2
+    nundefsym:       0
+    tocoff:          0
+    ntoc:            0
+    modtaboff:       0
+    nmodtab:         0
+    extrefsymoff:    0
+    nextrefsyms:     0
+    indirectsymoff:  0
+    nindirectsyms:   0
+    extreloff:       0
+    nextrel:         0
+    locreloff:       0
+    nlocrel:         0
+LinkEditData:
+  NameList:
+    - n_strx:          4
+      n_type:          0x0F
+      n_sect:          2
+      n_desc:          0
+      n_value:         12
+    - n_strx:          1
+      n_type:          0x0F
+      n_sect:          1
+      n_desc:          0
+      n_value:         0
+  StringTable:
+    - ''
+    - _f
+    - _a
+    - ''
+...

diff  --git a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
index 040a3710b92c..c2743748fa13 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
+++ b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
@@ -20,7 +20,7 @@ namespace macho {
 using namespace object;
 using SectionPred = std::function<bool(const std::unique_ptr<Section> &Sec)>;
 
-static void removeSections(const CopyConfig &Config, Object &Obj) {
+static Error removeSections(const CopyConfig &Config, Object &Obj) {
   SectionPred RemovePred = [](const std::unique_ptr<Section> &) { return false; };
 
   if (!Config.ToRemove.empty()) {
@@ -180,7 +180,9 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) {
     return createStringError(llvm::errc::invalid_argument,
                              "option not supported by llvm-objcopy for MachO");
   }
-  removeSections(Config, Obj);
+
+  if (Error E = removeSections(Config, Obj))
+    return E;
 
   // Mark symbols to determine which symbols are still needed.
   if (Config.StripAll)

diff  --git a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp
index 6d08e3abccf5..b4662ec5b5cd 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp
+++ b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp
@@ -28,10 +28,11 @@ void MachOReader::readHeader(Object &O) const {
 }
 
 template <typename SectionType>
-Section constructSectionCommon(SectionType Sec) {
+Section constructSectionCommon(SectionType Sec, uint32_t Index) {
   StringRef SegName(Sec.segname, strnlen(Sec.segname, sizeof(Sec.segname)));
   StringRef SectName(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname)));
   Section S(SegName, SectName);
+  S.Index = Index;
   S.Addr = Sec.addr;
   S.Size = Sec.size;
   S.Offset = Sec.offset;
@@ -45,14 +46,15 @@ Section constructSectionCommon(SectionType Sec) {
   return S;
 }
 
-template <typename SectionType> Section constructSection(SectionType Sec);
+template <typename SectionType>
+Section constructSection(SectionType Sec, uint32_t Index);
 
-template <> Section constructSection(MachO::section Sec) {
-  return constructSectionCommon(Sec);
+template <> Section constructSection(MachO::section Sec, uint32_t Index) {
+  return constructSectionCommon(Sec, Index);
 }
 
-template <> Section constructSection(MachO::section_64 Sec) {
-  Section S = constructSectionCommon(Sec);
+template <> Section constructSection(MachO::section_64 Sec, uint32_t Index) {
+  Section S = constructSectionCommon(Sec, Index);
   S.Reserved3 = Sec.reserved3;
   return S;
 }
@@ -62,7 +64,7 @@ template <typename SectionType, typename SegmentType>
 std::vector<std::unique_ptr<Section>>
 extractSections(const object::MachOObjectFile::LoadCommandInfo &LoadCmd,
                 const object::MachOObjectFile &MachOObj,
-                size_t &NextSectionIndex) {
+                uint32_t &NextSectionIndex) {
   auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize;
   const SectionType *Curr =
       reinterpret_cast<const SectionType *>(LoadCmd.Ptr + sizeof(SegmentType));
@@ -72,9 +74,11 @@ extractSections(const object::MachOObjectFile::LoadCommandInfo &LoadCmd,
       SectionType Sec;
       memcpy((void *)&Sec, Curr, sizeof(SectionType));
       MachO::swapStruct(Sec);
-      Sections.push_back(std::make_unique<Section>(constructSection(Sec)));
+      Sections.push_back(
+          std::make_unique<Section>(constructSection(Sec, NextSectionIndex)));
     } else {
-      Sections.push_back(std::make_unique<Section>(constructSection(*Curr)));
+      Sections.push_back(
+          std::make_unique<Section>(constructSection(*Curr, NextSectionIndex)));
     }
 
     Section &S = *Sections.back();
@@ -110,7 +114,7 @@ extractSections(const object::MachOObjectFile::LoadCommandInfo &LoadCmd,
 
 void MachOReader::readLoadCommands(Object &O) const {
   // For MachO sections indices start from 1.
-  size_t NextSectionIndex = 1;
+  uint32_t NextSectionIndex = 1;
   for (auto LoadCmd : MachOObj.load_commands()) {
     LoadCommand LC;
     switch (LoadCmd.C.cmd) {
@@ -189,12 +193,10 @@ void MachOReader::readSymbolTable(Object &O) const {
   for (auto Symbol : MachOObj.symbols()) {
     SymbolEntry SE =
         (MachOObj.is64Bit()
-             ? constructSymbolEntry(
-                   StrTable,
-                   MachOObj.getSymbol64TableEntry(Symbol.getRawDataRefImpl()))
-             : constructSymbolEntry(
-                   StrTable,
-                   MachOObj.getSymbolTableEntry(Symbol.getRawDataRefImpl())));
+             ? constructSymbolEntry(StrTable, MachOObj.getSymbol64TableEntry(
+                                                  Symbol.getRawDataRefImpl()))
+             : constructSymbolEntry(StrTable, MachOObj.getSymbolTableEntry(
+                                                  Symbol.getRawDataRefImpl())));
 
     O.SymTable.Symbols.push_back(std::make_unique<SymbolEntry>(SE));
   }

diff  --git a/llvm/tools/llvm-objcopy/MachO/Object.cpp b/llvm/tools/llvm-objcopy/MachO/Object.cpp
index 4e2b33d840bc..846098fb7729 100644
--- a/llvm/tools/llvm-objcopy/MachO/Object.cpp
+++ b/llvm/tools/llvm-objcopy/MachO/Object.cpp
@@ -1,5 +1,7 @@
 #include "Object.h"
 #include "../llvm-objcopy.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <unordered_set>
 
 namespace llvm {
 namespace objcopy {
@@ -22,11 +24,41 @@ void SymbolTable::removeSymbols(
       std::end(Symbols));
 }
 
-void Object::removeSections(function_ref<bool(const std::unique_ptr<Section> &)> ToRemove) {
-  for (LoadCommand &LC : LoadCommands)
-    LC.Sections.erase(std::remove_if(std::begin(LC.Sections),
-                                     std::end(LC.Sections), ToRemove),
-                      std::end(LC.Sections));
+Error Object::removeSections(
+    function_ref<bool(const std::unique_ptr<Section> &)> ToRemove) {
+  std::unordered_set<uint32_t> RemovedSectionsIndices;
+  for (LoadCommand &LC : LoadCommands) {
+    auto It = std::stable_partition(
+        std::begin(LC.Sections), std::end(LC.Sections),
+        [&](const std::unique_ptr<Section> &Sec) { return !ToRemove(Sec); });
+    for (auto I = It, End = LC.Sections.end(); I != End; ++I)
+      RemovedSectionsIndices.insert((*I)->Index);
+    LC.Sections.erase(It, LC.Sections.end());
+  }
+
+  auto IsDead = [&](const std::unique_ptr<SymbolEntry> &S) -> bool {
+    Optional<uint32_t> Section = S->section();
+    return (Section && RemovedSectionsIndices.count(*Section));
+  };
+
+  SmallPtrSet<const SymbolEntry *, 2> DeadSymbols;
+  for (const std::unique_ptr<SymbolEntry> &Sym : SymTable.Symbols)
+    if (IsDead(Sym))
+      DeadSymbols.insert(Sym.get());
+
+  for (const LoadCommand &LC : LoadCommands)
+    for (const std::unique_ptr<Section> &Sec : LC.Sections)
+      for (const RelocationInfo &R : Sec->Relocations)
+        if (R.Symbol && DeadSymbols.count(R.Symbol))
+          return createStringError(std::errc::invalid_argument,
+                                   "symbol '%s' defined in section with index "
+                                   "'%u' cannot be removed because it is "
+                                   "referenced by a relocation in section '%s'",
+                                   R.Symbol->Name.c_str(),
+                                   *(R.Symbol->section()),
+                                   Sec->CanonicalName.c_str());
+  SymTable.removeSymbols(IsDead);
+  return Error::success();
 }
 
 void Object::addLoadCommand(LoadCommand LC) {

diff  --git a/llvm/tools/llvm-objcopy/MachO/Object.h b/llvm/tools/llvm-objcopy/MachO/Object.h
index d0d4554d7560..dc828e66fcbd 100644
--- a/llvm/tools/llvm-objcopy/MachO/Object.h
+++ b/llvm/tools/llvm-objcopy/MachO/Object.h
@@ -37,6 +37,7 @@ struct MachHeader {
 
 struct RelocationInfo;
 struct Section {
+  uint32_t Index;
   std::string Segname;
   std::string Sectname;
   // CanonicalName is a string formatted as “<Segname>,<Sectname>".
@@ -83,7 +84,7 @@ struct LoadCommand {
   // The raw content of the payload of the load command (located right after the
   // corresponding struct). In some cases it is either empty or can be
   // copied-over without digging into its structure.
-  std::vector<uint8_t> Payload; 
+  std::vector<uint8_t> Payload;
 
   // Some load commands can contain (inside the payload) an array of sections,
   // though the contents of the sections are stored separately. The struct
@@ -115,6 +116,10 @@ struct SymbolEntry {
   bool isUndefinedSymbol() const {
     return (n_type & MachO::N_TYPE) == MachO::N_UNDF;
   }
+
+  Optional<uint32_t> section() const {
+    return n_sect == MachO::NO_SECT ? None : Optional<uint32_t>(n_sect);
+  }
 };
 
 /// The location of the symbol table inside the binary is described by LC_SYMTAB
@@ -306,7 +311,8 @@ struct Object {
 
   Object() : NewSectionsContents(Alloc) {}
 
-  void removeSections(function_ref<bool(const std::unique_ptr<Section> &)> ToRemove);
+  Error
+  removeSections(function_ref<bool(const std::unique_ptr<Section> &)> ToRemove);
   void addLoadCommand(LoadCommand LC);
 
   /// Creates a new segment load command in the object and returns a reference


        


More information about the llvm-commits mailing list