[llvm] f34fdbc - [llvm-objcopy][MachO] Make --remove-section clean up dead symbols
Alexander Shaposhnikov via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 22 14:27:10 PDT 2020
Author: Alexander Shaposhnikov
Date: 2020-04-22T14:26:42-07:00
New Revision: f34fdbcf996a9b944439007b1da087be8284b803
URL: https://github.com/llvm/llvm-project/commit/f34fdbcf996a9b944439007b1da087be8284b803
DIFF: https://github.com/llvm/llvm-project/commit/f34fdbcf996a9b944439007b1da087be8284b803.diff
LOG: [llvm-objcopy][MachO] Make --remove-section clean up dead symbols
Make --remove-section clean up dead symbols, return an Error if it can't be safely done.
Test plan: make check-all
Differential revision: https://reviews.llvm.org/D78474
Added:
llvm/test/tools/llvm-objcopy/MachO/remove-section-dead-symbols.test
llvm/test/tools/llvm-objcopy/MachO/remove-section-error.test
Modified:
llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
llvm/tools/llvm-objcopy/MachO/MachOReader.cpp
llvm/tools/llvm-objcopy/MachO/Object.cpp
llvm/tools/llvm-objcopy/MachO/Object.h
Removed:
################################################################################
diff --git a/llvm/test/tools/llvm-objcopy/MachO/remove-section-dead-symbols.test b/llvm/test/tools/llvm-objcopy/MachO/remove-section-dead-symbols.test
new file mode 100644
index 000000000000..474369439ba6
--- /dev/null
+++ b/llvm/test/tools/llvm-objcopy/MachO/remove-section-dead-symbols.test
@@ -0,0 +1,128 @@
+# RUN: yaml2obj %s -o %t
+# RUN: llvm-objcopy --remove-section __DATA,C %t %t.copy
+
+# RUN: llvm-readobj --symbols %t.copy | FileCheck %s
+
+# CHECK: Symbols [
+# CHECK-NEXT: Symbol {
+# CHECK-NEXT: Name: _A (1)
+# CHECK-NEXT: Extern
+# CHECK-NEXT: Type: Section (0xE)
+# CHECK-NEXT: Section: __data (0x2)
+# CHECK-NEXT: RefType: UndefinedNonLazy (0x0)
+# CHECK-NEXT: Flags [ (0x0)
+# CHECK-NEXT: ]
+# CHECK-NEXT: Value: 0x0
+# CHECK-NEXT: }
+# CHECK-NEXT: ]
+
+--- !mach-o
+FileHeader:
+ magic: 0xFEEDFACF
+ cputype: 0x01000007
+ cpusubtype: 0x00000003
+ filetype: 0x00000001
+ ncmds: 4
+ sizeofcmds: 432
+ flags: 0x00002000
+ reserved: 0x00000000
+LoadCommands:
+ - cmd: LC_SEGMENT_64
+ cmdsize: 312
+ segname: ''
+ vmaddr: 0
+ vmsize: 8
+ fileoff: 464
+ filesize: 8
+ maxprot: 7
+ initprot: 7
+ nsects: 3
+ flags: 0
+ Sections:
+ - sectname: __text
+ segname: __TEXT
+ addr: 0x0000000000000000
+ size: 0
+ offset: 0x000001D0
+ align: 0
+ reloff: 0x00000000
+ nreloc: 0
+ flags: 0x80000000
+ reserved1: 0x00000000
+ reserved2: 0x00000000
+ reserved3: 0x00000000
+ content: ''
+ - sectname: __data
+ segname: __DATA
+ addr: 0x0000000000000000
+ size: 4
+ offset: 0x000001D0
+ align: 2
+ reloff: 0x00000000
+ nreloc: 0
+ flags: 0x00000000
+ reserved1: 0x00000000
+ reserved2: 0x00000000
+ reserved3: 0x00000000
+ content: '01000000'
+ - sectname: C
+ segname: __DATA
+ addr: 0x0000000000000004
+ size: 4
+ offset: 0x000001D4
+ align: 2
+ reloff: 0x00000000
+ nreloc: 0
+ flags: 0x00000000
+ reserved1: 0x00000000
+ reserved2: 0x00000000
+ reserved3: 0x00000000
+ content: '02000000'
+ - cmd: LC_VERSION_MIN_MACOSX
+ cmdsize: 16
+ version: 658944
+ sdk: 0
+ - cmd: LC_SYMTAB
+ cmdsize: 24
+ symoff: 472
+ nsyms: 2
+ stroff: 504
+ strsize: 8
+ - cmd: LC_DYSYMTAB
+ cmdsize: 80
+ ilocalsym: 0
+ nlocalsym: 0
+ iextdefsym: 0
+ nextdefsym: 2
+ iundefsym: 2
+ nundefsym: 0
+ tocoff: 0
+ ntoc: 0
+ modtaboff: 0
+ nmodtab: 0
+ extrefsymoff: 0
+ nextrefsyms: 0
+ indirectsymoff: 0
+ nindirectsyms: 0
+ extreloff: 0
+ nextrel: 0
+ locreloff: 0
+ nlocrel: 0
+LinkEditData:
+ NameList:
+ - n_strx: 4
+ n_type: 0x0F
+ n_sect: 2
+ n_desc: 0
+ n_value: 0
+ - n_strx: 1
+ n_type: 0x0F
+ n_sect: 3
+ n_desc: 0
+ n_value: 4
+ StringTable:
+ - ''
+ - _B
+ - _A
+ - ''
+...
diff --git a/llvm/test/tools/llvm-objcopy/MachO/remove-section-error.test b/llvm/test/tools/llvm-objcopy/MachO/remove-section-error.test
new file mode 100644
index 000000000000..e060212fb2da
--- /dev/null
+++ b/llvm/test/tools/llvm-objcopy/MachO/remove-section-error.test
@@ -0,0 +1,120 @@
+# RUN: yaml2obj %s -o %t
+# RUN: not llvm-objcopy --remove-section __DATA,C %t /dev/null 2>&1 | FileCheck %s
+
+# CHECK: symbol '_a' defined in section with index '2' cannot be removed because it is referenced by a relocation in section '__TEXT,__text'
+
+## The binary used in this test was built as follows:
+## main.c:
+## __attribute__((section("__DATA,C"))) int a = 2;
+## int f() { return a; }
+## build command:
+## clang -fno-exceptions -fno-unwind-tables -c main.c -o main.o
+
+--- !mach-o
+FileHeader:
+ magic: 0xFEEDFACF
+ cputype: 0x01000007
+ cpusubtype: 0x00000003
+ filetype: 0x00000001
+ ncmds: 4
+ sizeofcmds: 360
+ flags: 0x00002000
+ reserved: 0x00000000
+LoadCommands:
+ - cmd: LC_SEGMENT_64
+ cmdsize: 232
+ segname: ''
+ vmaddr: 0
+ vmsize: 16
+ fileoff: 392
+ filesize: 16
+ maxprot: 7
+ initprot: 7
+ nsects: 2
+ flags: 0
+ Sections:
+ - sectname: __text
+ segname: __TEXT
+ addr: 0x0000000000000000
+ size: 12
+ offset: 0x00000188
+ align: 4
+ reloff: 0x00000198
+ nreloc: 1
+ flags: 0x80000400
+ reserved1: 0x00000000
+ reserved2: 0x00000000
+ reserved3: 0x00000000
+ content: 554889E58B05000000005DC3
+ relocations:
+ - address: 0x00000006
+ symbolnum: 0
+ pcrel: true
+ length: 2
+ extern: true
+ type: 1
+ scattered: false
+ value: 0
+ - sectname: C
+ segname: __DATA
+ addr: 0x000000000000000C
+ size: 4
+ offset: 0x00000194
+ align: 2
+ reloff: 0x00000000
+ nreloc: 0
+ flags: 0x00000000
+ reserved1: 0x00000000
+ reserved2: 0x00000000
+ reserved3: 0x00000000
+ content: '02000000'
+ - cmd: LC_BUILD_VERSION
+ cmdsize: 24
+ platform: 1
+ minos: 658944
+ sdk: 658944
+ ntools: 0
+ - cmd: LC_SYMTAB
+ cmdsize: 24
+ symoff: 416
+ nsyms: 2
+ stroff: 448
+ strsize: 8
+ - cmd: LC_DYSYMTAB
+ cmdsize: 80
+ ilocalsym: 0
+ nlocalsym: 0
+ iextdefsym: 0
+ nextdefsym: 2
+ iundefsym: 2
+ nundefsym: 0
+ tocoff: 0
+ ntoc: 0
+ modtaboff: 0
+ nmodtab: 0
+ extrefsymoff: 0
+ nextrefsyms: 0
+ indirectsymoff: 0
+ nindirectsyms: 0
+ extreloff: 0
+ nextrel: 0
+ locreloff: 0
+ nlocrel: 0
+LinkEditData:
+ NameList:
+ - n_strx: 4
+ n_type: 0x0F
+ n_sect: 2
+ n_desc: 0
+ n_value: 12
+ - n_strx: 1
+ n_type: 0x0F
+ n_sect: 1
+ n_desc: 0
+ n_value: 0
+ StringTable:
+ - ''
+ - _f
+ - _a
+ - ''
+...
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
index 040a3710b92c..c2743748fa13 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
+++ b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
@@ -20,7 +20,7 @@ namespace macho {
using namespace object;
using SectionPred = std::function<bool(const std::unique_ptr<Section> &Sec)>;
-static void removeSections(const CopyConfig &Config, Object &Obj) {
+static Error removeSections(const CopyConfig &Config, Object &Obj) {
SectionPred RemovePred = [](const std::unique_ptr<Section> &) { return false; };
if (!Config.ToRemove.empty()) {
@@ -180,7 +180,9 @@ static Error handleArgs(const CopyConfig &Config, Object &Obj) {
return createStringError(llvm::errc::invalid_argument,
"option not supported by llvm-objcopy for MachO");
}
- removeSections(Config, Obj);
+
+ if (Error E = removeSections(Config, Obj))
+ return E;
// Mark symbols to determine which symbols are still needed.
if (Config.StripAll)
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp
index 6d08e3abccf5..b4662ec5b5cd 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp
+++ b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp
@@ -28,10 +28,11 @@ void MachOReader::readHeader(Object &O) const {
}
template <typename SectionType>
-Section constructSectionCommon(SectionType Sec) {
+Section constructSectionCommon(SectionType Sec, uint32_t Index) {
StringRef SegName(Sec.segname, strnlen(Sec.segname, sizeof(Sec.segname)));
StringRef SectName(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname)));
Section S(SegName, SectName);
+ S.Index = Index;
S.Addr = Sec.addr;
S.Size = Sec.size;
S.Offset = Sec.offset;
@@ -45,14 +46,15 @@ Section constructSectionCommon(SectionType Sec) {
return S;
}
-template <typename SectionType> Section constructSection(SectionType Sec);
+template <typename SectionType>
+Section constructSection(SectionType Sec, uint32_t Index);
-template <> Section constructSection(MachO::section Sec) {
- return constructSectionCommon(Sec);
+template <> Section constructSection(MachO::section Sec, uint32_t Index) {
+ return constructSectionCommon(Sec, Index);
}
-template <> Section constructSection(MachO::section_64 Sec) {
- Section S = constructSectionCommon(Sec);
+template <> Section constructSection(MachO::section_64 Sec, uint32_t Index) {
+ Section S = constructSectionCommon(Sec, Index);
S.Reserved3 = Sec.reserved3;
return S;
}
@@ -62,7 +64,7 @@ template <typename SectionType, typename SegmentType>
std::vector<std::unique_ptr<Section>>
extractSections(const object::MachOObjectFile::LoadCommandInfo &LoadCmd,
const object::MachOObjectFile &MachOObj,
- size_t &NextSectionIndex) {
+ uint32_t &NextSectionIndex) {
auto End = LoadCmd.Ptr + LoadCmd.C.cmdsize;
const SectionType *Curr =
reinterpret_cast<const SectionType *>(LoadCmd.Ptr + sizeof(SegmentType));
@@ -72,9 +74,11 @@ extractSections(const object::MachOObjectFile::LoadCommandInfo &LoadCmd,
SectionType Sec;
memcpy((void *)&Sec, Curr, sizeof(SectionType));
MachO::swapStruct(Sec);
- Sections.push_back(std::make_unique<Section>(constructSection(Sec)));
+ Sections.push_back(
+ std::make_unique<Section>(constructSection(Sec, NextSectionIndex)));
} else {
- Sections.push_back(std::make_unique<Section>(constructSection(*Curr)));
+ Sections.push_back(
+ std::make_unique<Section>(constructSection(*Curr, NextSectionIndex)));
}
Section &S = *Sections.back();
@@ -110,7 +114,7 @@ extractSections(const object::MachOObjectFile::LoadCommandInfo &LoadCmd,
void MachOReader::readLoadCommands(Object &O) const {
// For MachO sections indices start from 1.
- size_t NextSectionIndex = 1;
+ uint32_t NextSectionIndex = 1;
for (auto LoadCmd : MachOObj.load_commands()) {
LoadCommand LC;
switch (LoadCmd.C.cmd) {
@@ -189,12 +193,10 @@ void MachOReader::readSymbolTable(Object &O) const {
for (auto Symbol : MachOObj.symbols()) {
SymbolEntry SE =
(MachOObj.is64Bit()
- ? constructSymbolEntry(
- StrTable,
- MachOObj.getSymbol64TableEntry(Symbol.getRawDataRefImpl()))
- : constructSymbolEntry(
- StrTable,
- MachOObj.getSymbolTableEntry(Symbol.getRawDataRefImpl())));
+ ? constructSymbolEntry(StrTable, MachOObj.getSymbol64TableEntry(
+ Symbol.getRawDataRefImpl()))
+ : constructSymbolEntry(StrTable, MachOObj.getSymbolTableEntry(
+ Symbol.getRawDataRefImpl())));
O.SymTable.Symbols.push_back(std::make_unique<SymbolEntry>(SE));
}
diff --git a/llvm/tools/llvm-objcopy/MachO/Object.cpp b/llvm/tools/llvm-objcopy/MachO/Object.cpp
index 4e2b33d840bc..846098fb7729 100644
--- a/llvm/tools/llvm-objcopy/MachO/Object.cpp
+++ b/llvm/tools/llvm-objcopy/MachO/Object.cpp
@@ -1,5 +1,7 @@
#include "Object.h"
#include "../llvm-objcopy.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <unordered_set>
namespace llvm {
namespace objcopy {
@@ -22,11 +24,41 @@ void SymbolTable::removeSymbols(
std::end(Symbols));
}
-void Object::removeSections(function_ref<bool(const std::unique_ptr<Section> &)> ToRemove) {
- for (LoadCommand &LC : LoadCommands)
- LC.Sections.erase(std::remove_if(std::begin(LC.Sections),
- std::end(LC.Sections), ToRemove),
- std::end(LC.Sections));
+Error Object::removeSections(
+ function_ref<bool(const std::unique_ptr<Section> &)> ToRemove) {
+ std::unordered_set<uint32_t> RemovedSectionsIndices;
+ for (LoadCommand &LC : LoadCommands) {
+ auto It = std::stable_partition(
+ std::begin(LC.Sections), std::end(LC.Sections),
+ [&](const std::unique_ptr<Section> &Sec) { return !ToRemove(Sec); });
+ for (auto I = It, End = LC.Sections.end(); I != End; ++I)
+ RemovedSectionsIndices.insert((*I)->Index);
+ LC.Sections.erase(It, LC.Sections.end());
+ }
+
+ auto IsDead = [&](const std::unique_ptr<SymbolEntry> &S) -> bool {
+ Optional<uint32_t> Section = S->section();
+ return (Section && RemovedSectionsIndices.count(*Section));
+ };
+
+ SmallPtrSet<const SymbolEntry *, 2> DeadSymbols;
+ for (const std::unique_ptr<SymbolEntry> &Sym : SymTable.Symbols)
+ if (IsDead(Sym))
+ DeadSymbols.insert(Sym.get());
+
+ for (const LoadCommand &LC : LoadCommands)
+ for (const std::unique_ptr<Section> &Sec : LC.Sections)
+ for (const RelocationInfo &R : Sec->Relocations)
+ if (R.Symbol && DeadSymbols.count(R.Symbol))
+ return createStringError(std::errc::invalid_argument,
+ "symbol '%s' defined in section with index "
+ "'%u' cannot be removed because it is "
+ "referenced by a relocation in section '%s'",
+ R.Symbol->Name.c_str(),
+ *(R.Symbol->section()),
+ Sec->CanonicalName.c_str());
+ SymTable.removeSymbols(IsDead);
+ return Error::success();
}
void Object::addLoadCommand(LoadCommand LC) {
diff --git a/llvm/tools/llvm-objcopy/MachO/Object.h b/llvm/tools/llvm-objcopy/MachO/Object.h
index d0d4554d7560..dc828e66fcbd 100644
--- a/llvm/tools/llvm-objcopy/MachO/Object.h
+++ b/llvm/tools/llvm-objcopy/MachO/Object.h
@@ -37,6 +37,7 @@ struct MachHeader {
struct RelocationInfo;
struct Section {
+ uint32_t Index;
std::string Segname;
std::string Sectname;
// CanonicalName is a string formatted as “<Segname>,<Sectname>".
@@ -83,7 +84,7 @@ struct LoadCommand {
// The raw content of the payload of the load command (located right after the
// corresponding struct). In some cases it is either empty or can be
// copied-over without digging into its structure.
- std::vector<uint8_t> Payload;
+ std::vector<uint8_t> Payload;
// Some load commands can contain (inside the payload) an array of sections,
// though the contents of the sections are stored separately. The struct
@@ -115,6 +116,10 @@ struct SymbolEntry {
bool isUndefinedSymbol() const {
return (n_type & MachO::N_TYPE) == MachO::N_UNDF;
}
+
+ Optional<uint32_t> section() const {
+ return n_sect == MachO::NO_SECT ? None : Optional<uint32_t>(n_sect);
+ }
};
/// The location of the symbol table inside the binary is described by LC_SYMTAB
@@ -306,7 +311,8 @@ struct Object {
Object() : NewSectionsContents(Alloc) {}
- void removeSections(function_ref<bool(const std::unique_ptr<Section> &)> ToRemove);
+ Error
+ removeSections(function_ref<bool(const std::unique_ptr<Section> &)> ToRemove);
void addLoadCommand(LoadCommand LC);
/// Creates a new segment load command in the object and returns a reference
More information about the llvm-commits
mailing list