[llvm] r362863 - [llvm-objcopy][MachO] Recompute and update offset/size fields in the writer
Seiya Nuta via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 7 18:22:55 PDT 2019
Author: seiya
Date: Fri Jun 7 18:22:54 2019
New Revision: 362863
URL: http://llvm.org/viewvc/llvm-project?rev=362863&view=rev
Log:
[llvm-objcopy][MachO] Recompute and update offset/size fields in the writer
Summary:
Recompute and update offset/size fields so that we can implement llvm-objcopy options like --only-section.
This patch is the first step and focuses on supporting load commands that covered by existing tests: executable files and
dynamic libraries are not supported.
Reviewers: alexshap, rupprecht, jhenderson
Reviewed By: alexshap, rupprecht
Subscribers: compnerd, jakehehrlich, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D62652
Added:
llvm/trunk/test/tools/llvm-objcopy/MachO/Inputs/various-symbols.s
Modified:
llvm/trunk/test/tools/llvm-objcopy/MachO/real-world-input-copy.test
llvm/trunk/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
llvm/trunk/tools/llvm-objcopy/MachO/MachOReader.cpp
llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.cpp
llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.h
llvm/trunk/tools/llvm-objcopy/MachO/Object.h
Added: llvm/trunk/test/tools/llvm-objcopy/MachO/Inputs/various-symbols.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-objcopy/MachO/Inputs/various-symbols.s?rev=362863&view=auto
==============================================================================
--- llvm/trunk/test/tools/llvm-objcopy/MachO/Inputs/various-symbols.s (added)
+++ llvm/trunk/test/tools/llvm-objcopy/MachO/Inputs/various-symbols.s Fri Jun 7 18:22:54 2019
@@ -0,0 +1,23 @@
+# static int i; // A local symbol.
+# int f(void) { return i; } // An external symbol.
+
+ .section __TEXT,__text,regular,pure_instructions
+ .build_version macos, 10, 14
+ .globl _f ## -- Begin function f
+ .p2align 4, 0x90
+_f: ## @f
+ .cfi_startproc
+## %bb.0:
+ pushq %rbp
+ .cfi_def_cfa_offset 16
+ .cfi_offset %rbp, -16
+ movq %rsp, %rbp
+ .cfi_def_cfa_register %rbp
+ movl _i(%rip), %eax
+ popq %rbp
+ retq
+ .cfi_endproc
+ ## -- End function
+.zerofill __DATA,__bss,_i,4,2 ## @i
+
+.subsections_via_symbols
Modified: llvm/trunk/test/tools/llvm-objcopy/MachO/real-world-input-copy.test
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-objcopy/MachO/real-world-input-copy.test?rev=362863&r1=362862&r2=362863&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-objcopy/MachO/real-world-input-copy.test (original)
+++ llvm/trunk/test/tools/llvm-objcopy/MachO/real-world-input-copy.test Fri Jun 7 18:22:54 2019
@@ -6,4 +6,10 @@
# RUN: llvm-objcopy %t.64.o %t.64.copy.o
# RUN: cmp %t.64.o %t.64.copy.o
+# Make sure that it properly constructs LC_DYSYMTAB and handles virtual sections
+# (the bss section for example).
+# RUN: llvm-mc -assemble -triple x86_64-apple-darwin9 -filetype=obj %p/Inputs/various-symbols.s -o %t.various-symbols.o
+# RUN: llvm-objcopy %t.various-symbols.o %t.various-symbols.copy.o
+# RUN: cmp %t.various-symbols.o %t.various-symbols.copy.o
+
# REQUIRES: x86-registered-target
Modified: llvm/trunk/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objcopy/MachO/MachOObjcopy.cpp?rev=362863&r1=362862&r2=362863&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-objcopy/MachO/MachOObjcopy.cpp (original)
+++ llvm/trunk/tools/llvm-objcopy/MachO/MachOObjcopy.cpp Fri Jun 7 18:22:54 2019
@@ -58,6 +58,8 @@ Error executeObjcopyOnBinary(const CopyC
return createFileError(Config.InputFilename, std::move(E));
MachOWriter Writer(*O, In.is64Bit(), In.isLittleEndian(), Out);
+ if (auto E = Writer.finalize())
+ return E;
return Writer.write();
}
Modified: llvm/trunk/tools/llvm-objcopy/MachO/MachOReader.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objcopy/MachO/MachOReader.cpp?rev=362863&r1=362862&r2=362863&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-objcopy/MachO/MachOReader.cpp (original)
+++ llvm/trunk/tools/llvm-objcopy/MachO/MachOReader.cpp Fri Jun 7 18:22:54 2019
@@ -30,8 +30,11 @@ void MachOReader::readHeader(Object &O)
template <typename SectionType>
Section constructSectionCommon(SectionType Sec) {
Section S;
- memcpy(S.Sectname, Sec.sectname, sizeof(Sec.sectname));
- memcpy(S.Segname, Sec.segname, sizeof(Sec.segname));
+ S.Sectname =
+ StringRef(Sec.sectname, strnlen(Sec.sectname, sizeof(Sec.sectname)))
+ .str();
+ S.Segname =
+ StringRef(Sec.segname, strnlen(Sec.segname, sizeof(Sec.sectname))).str();
S.Addr = Sec.addr;
S.Size = Sec.size;
S.Offset = Sec.offset;
@@ -79,7 +82,6 @@ extractSections(const object::MachOObjec
Section &S = Sections.back();
- StringRef SectName(S.Sectname);
Expected<object::SectionRef> SecRef =
MachOObj.getSection(NextSectionIndex++);
if (!SecRef)
Modified: llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.cpp?rev=362863&r1=362862&r2=362863&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.cpp (original)
+++ llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.cpp Fri Jun 7 18:22:54 2019
@@ -11,6 +11,8 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Object/MachO.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/ErrorHandling.h"
#include <memory>
namespace llvm {
@@ -128,13 +130,35 @@ void MachOWriter::writeHeader() {
void MachOWriter::writeLoadCommands() {
uint8_t *Begin = B.getBufferStart() + headerSize();
- MachO::macho_load_command MLC;
for (const auto &LC : O.LoadCommands) {
+ // Construct a load command.
+ MachO::macho_load_command MLC = LC.MachOLoadCommand;
+ switch (MLC.load_command_data.cmd) {
+ case MachO::LC_SEGMENT:
+ if (IsLittleEndian != sys::IsLittleEndianHost)
+ MachO::swapStruct(MLC.segment_command_data);
+ memcpy(Begin, &MLC.segment_command_data, sizeof(MachO::segment_command));
+ Begin += sizeof(MachO::segment_command);
+
+ for (const auto &Sec : LC.Sections)
+ writeSectionInLoadCommand<MachO::section>(Sec, Begin);
+ continue;
+ case MachO::LC_SEGMENT_64:
+ if (IsLittleEndian != sys::IsLittleEndianHost)
+ MachO::swapStruct(MLC.segment_command_64_data);
+ memcpy(Begin, &MLC.segment_command_64_data,
+ sizeof(MachO::segment_command_64));
+ Begin += sizeof(MachO::segment_command_64);
+
+ for (const auto &Sec : LC.Sections)
+ writeSectionInLoadCommand<MachO::section_64>(Sec, Begin);
+ continue;
+ }
+
#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
case MachO::LCName: \
assert(sizeof(MachO::LCStruct) + LC.Payload.size() == \
- LC.MachOLoadCommand.load_command_data.cmdsize); \
- MLC = LC.MachOLoadCommand; \
+ MLC.load_command_data.cmdsize); \
if (IsLittleEndian != sys::IsLittleEndianHost) \
MachO::swapStruct(MLC.LCStruct##_data); \
memcpy(Begin, &MLC.LCStruct##_data, sizeof(MachO::LCStruct)); \
@@ -143,11 +167,11 @@ void MachOWriter::writeLoadCommands() {
Begin += LC.Payload.size(); \
break;
- switch (LC.MachOLoadCommand.load_command_data.cmd) {
+ // Copy the load command as it is.
+ switch (MLC.load_command_data.cmd) {
default:
assert(sizeof(MachO::load_command) + LC.Payload.size() ==
- LC.MachOLoadCommand.load_command_data.cmdsize);
- MLC = LC.MachOLoadCommand;
+ MLC.load_command_data.cmdsize);
if (IsLittleEndian != sys::IsLittleEndianHost)
MachO::swapStruct(MLC.load_command_data);
memcpy(Begin, &MLC.load_command_data, sizeof(MachO::load_command));
@@ -160,9 +184,37 @@ void MachOWriter::writeLoadCommands() {
}
}
+template <typename StructType>
+void MachOWriter::writeSectionInLoadCommand(const Section &Sec, uint8_t *&Out) {
+ StructType Temp;
+ assert(Sec.Segname.size() <= sizeof(Temp.segname) && "too long segment name");
+ assert(Sec.Sectname.size() <= sizeof(Temp.sectname) &&
+ "too long section name");
+ memset(&Temp, 0, sizeof(StructType));
+ memcpy(Temp.segname, Sec.Segname.data(), Sec.Segname.size());
+ memcpy(Temp.sectname, Sec.Sectname.data(), Sec.Sectname.size());
+ Temp.addr = Sec.Addr;
+ Temp.size = Sec.Size;
+ Temp.offset = Sec.Offset;
+ Temp.align = Sec.Align;
+ Temp.reloff = Sec.RelOff;
+ Temp.nreloc = Sec.NReloc;
+ Temp.flags = Sec.Flags;
+ Temp.reserved1 = Sec.Reserved1;
+ Temp.reserved2 = Sec.Reserved2;
+
+ if (IsLittleEndian != sys::IsLittleEndianHost)
+ MachO::swapStruct(Temp);
+ memcpy(Out, &Temp, sizeof(StructType));
+ Out += sizeof(StructType);
+}
+
void MachOWriter::writeSections() {
for (const auto &LC : O.LoadCommands)
for (const auto &Sec : LC.Sections) {
+ if (Sec.isVirtualSection())
+ continue;
+
assert(Sec.Offset && "Section offset can not be zero");
assert((Sec.Size == Sec.Content.size()) && "Incorrect section size");
memcpy(B.getBufferStart() + Sec.Offset, Sec.Content.data(),
@@ -333,6 +385,184 @@ void MachOWriter::writeTail() {
(this->*WriteOp.second)();
}
+void MachOWriter::updateSizeOfCmds() {
+ auto Size = 0;
+ for (const auto &LC : O.LoadCommands) {
+ auto &MLC = LC.MachOLoadCommand;
+ auto cmd = MLC.load_command_data.cmd;
+
+ switch (cmd) {
+ case MachO::LC_SEGMENT:
+ Size += sizeof(MachO::segment_command) +
+ sizeof(MachO::section) * LC.Sections.size();
+ continue;
+ case MachO::LC_SEGMENT_64:
+ Size += sizeof(MachO::segment_command_64) +
+ sizeof(MachO::section_64) * LC.Sections.size();
+ continue;
+ }
+
+ switch (cmd) {
+#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
+ case MachO::LCName: \
+ Size += sizeof(MachO::LCStruct); \
+ break;
+#include "llvm/BinaryFormat/MachO.def"
+#undef HANDLE_LOAD_COMMAND
+ }
+ }
+
+ O.Header.SizeOfCmds = Size;
+}
+
+// Updates the index and the number of local/external/undefined symbols. Here we
+// assume that MLC is a LC_DYSYMTAB and the nlist entries in the symbol table
+// are already sorted by the those types.
+void MachOWriter::updateDySymTab(MachO::macho_load_command &MLC) {
+ uint32_t NumLocalSymbols = 0;
+ auto Iter = O.SymTable.NameList.begin();
+ auto End = O.SymTable.NameList.end();
+ for (; Iter != End; Iter++) {
+ if (Iter->n_type & (MachO::N_EXT | MachO::N_PEXT))
+ break;
+
+ NumLocalSymbols++;
+ }
+
+ uint32_t NumExtDefSymbols = 0;
+ for (; Iter != End; Iter++) {
+ if ((Iter->n_type & MachO::N_TYPE) == MachO::N_UNDF)
+ break;
+
+ NumExtDefSymbols++;
+ }
+
+ MLC.dysymtab_command_data.ilocalsym = 0;
+ MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols;
+ MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols;
+ MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols;
+ MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols;
+ MLC.dysymtab_command_data.nundefsym =
+ O.SymTable.NameList.size() - (NumLocalSymbols + NumExtDefSymbols);
+}
+
+// Recomputes and updates offset and size fields in load commands and sections
+// since they could be modified.
+Error MachOWriter::layout() {
+ auto SizeOfCmds = loadCommandsSize();
+ auto Offset = headerSize() + SizeOfCmds;
+ O.Header.NCmds = O.LoadCommands.size();
+ O.Header.SizeOfCmds = SizeOfCmds;
+
+ // Lay out sections.
+ for (auto &LC : O.LoadCommands) {
+ uint64_t FileOff = Offset;
+ uint64_t VMSize = 0;
+ uint64_t FileOffsetInSegment = 0;
+ for (auto &Sec : LC.Sections) {
+ if (!Sec.isVirtualSection()) {
+ auto FilePaddingSize =
+ OffsetToAlignment(FileOffsetInSegment, 1 << Sec.Align);
+ Sec.Offset = Offset + FileOffsetInSegment + FilePaddingSize;
+ Sec.Size = Sec.Content.size();
+ FileOffsetInSegment += FilePaddingSize + Sec.Size;
+ }
+
+ VMSize = std::max(VMSize, Sec.Addr + Sec.Size);
+ }
+
+ // TODO: Handle the __PAGEZERO segment.
+ auto &MLC = LC.MachOLoadCommand;
+ switch (MLC.load_command_data.cmd) {
+ case MachO::LC_SEGMENT:
+ MLC.segment_command_data.cmdsize =
+ sizeof(MachO::segment_command) +
+ sizeof(MachO::section) * LC.Sections.size();
+ MLC.segment_command_data.nsects = LC.Sections.size();
+ MLC.segment_command_data.fileoff = FileOff;
+ MLC.segment_command_data.vmsize = VMSize;
+ MLC.segment_command_data.filesize = FileOffsetInSegment;
+ break;
+ case MachO::LC_SEGMENT_64:
+ MLC.segment_command_64_data.cmdsize =
+ sizeof(MachO::segment_command_64) +
+ sizeof(MachO::section_64) * LC.Sections.size();
+ MLC.segment_command_64_data.nsects = LC.Sections.size();
+ MLC.segment_command_64_data.fileoff = FileOff;
+ MLC.segment_command_64_data.vmsize = VMSize;
+ MLC.segment_command_64_data.filesize = FileOffsetInSegment;
+ break;
+ }
+
+ Offset += FileOffsetInSegment;
+ }
+
+ // Lay out relocations.
+ for (auto &LC : O.LoadCommands)
+ for (auto &Sec : LC.Sections) {
+ Sec.RelOff = Sec.Relocations.empty() ? 0 : Offset;
+ Sec.NReloc = Sec.Relocations.size();
+ Offset += sizeof(MachO::any_relocation_info) * Sec.NReloc;
+ }
+
+ // Lay out tail stuff.
+ auto NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
+ for (auto &LC : O.LoadCommands) {
+ auto &MLC = LC.MachOLoadCommand;
+ auto cmd = MLC.load_command_data.cmd;
+ switch (cmd) {
+ case MachO::LC_SYMTAB:
+ MLC.symtab_command_data.symoff = Offset;
+ MLC.symtab_command_data.nsyms = O.SymTable.NameList.size();
+ Offset += NListSize * MLC.symtab_command_data.nsyms;
+ MLC.symtab_command_data.stroff = Offset;
+ Offset += MLC.symtab_command_data.strsize;
+ break;
+ case MachO::LC_DYSYMTAB: {
+ if (MLC.dysymtab_command_data.ntoc != 0 ||
+ MLC.dysymtab_command_data.nmodtab != 0 ||
+ MLC.dysymtab_command_data.nextrefsyms != 0 ||
+ MLC.dysymtab_command_data.nlocrel != 0 ||
+ MLC.dysymtab_command_data.nextrel != 0)
+ return createStringError(llvm::errc::not_supported,
+ "shared library is not yet supported");
+
+ if (MLC.dysymtab_command_data.nindirectsyms != 0)
+ return createStringError(llvm::errc::not_supported,
+ "indirect symbol table is not yet supported");
+
+ updateDySymTab(MLC);
+ break;
+ }
+ case MachO::LC_SEGMENT:
+ case MachO::LC_SEGMENT_64:
+ case MachO::LC_VERSION_MIN_MACOSX:
+ case MachO::LC_BUILD_VERSION:
+ case MachO::LC_ID_DYLIB:
+ case MachO::LC_LOAD_DYLIB:
+ case MachO::LC_UUID:
+ case MachO::LC_SOURCE_VERSION:
+ // Nothing to update.
+ break;
+ default:
+ // Abort if it's unsupported in order to prevent corrupting the object.
+ return createStringError(llvm::errc::not_supported,
+ "unsupported load command (cmd=0x%x)", cmd);
+ }
+ }
+
+ return Error::success();
+}
+
+Error MachOWriter::finalize() {
+ updateSizeOfCmds();
+
+ if (auto E = layout())
+ return E;
+
+ return Error::success();
+}
+
Error MachOWriter::write() {
if (Error E = B.allocate(totalSize()))
return E;
Modified: llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.h?rev=362863&r1=362862&r2=362863&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.h (original)
+++ llvm/trunk/tools/llvm-objcopy/MachO/MachOWriter.h Fri Jun 7 18:22:54 2019
@@ -29,8 +29,14 @@ class MachOWriter {
size_t symTableSize() const;
size_t strTableSize() const;
+ void updateDySymTab(MachO::macho_load_command &MLC);
+ void updateSizeOfCmds();
+ Error layout();
+
void writeHeader();
void writeLoadCommands();
+ template <typename StructType>
+ void writeSectionInLoadCommand(const Section &Sec, uint8_t *&Out);
void writeSections();
void writeSymbolTable();
void writeStringTable();
@@ -46,6 +52,7 @@ public:
: O(O), Is64Bit(Is64Bit), IsLittleEndian(IsLittleEndian), B(B) {}
size_t totalSize() const;
+ Error finalize();
Error write();
};
Modified: llvm/trunk/tools/llvm-objcopy/MachO/Object.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-objcopy/MachO/Object.h?rev=362863&r1=362862&r2=362863&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-objcopy/MachO/Object.h (original)
+++ llvm/trunk/tools/llvm-objcopy/MachO/Object.h Fri Jun 7 18:22:54 2019
@@ -34,8 +34,8 @@ struct MachHeader {
};
struct Section {
- char Sectname[16];
- char Segname[16];
+ std::string Sectname;
+ std::string Segname;
uint64_t Addr;
uint64_t Size;
uint32_t Offset;
@@ -49,6 +49,16 @@ struct Section {
StringRef Content;
std::vector<MachO::any_relocation_info> Relocations;
+
+ MachO::SectionType getType() const {
+ return static_cast<MachO::SectionType>(Flags & MachO::SECTION_TYPE);
+ }
+
+ bool isVirtualSection() const {
+ return (getType() == MachO::S_ZEROFILL ||
+ getType() == MachO::S_GB_ZEROFILL ||
+ getType() == MachO::S_THREAD_LOCAL_ZEROFILL);
+ }
};
struct LoadCommand {
More information about the llvm-commits
mailing list