[PATCH] [lld][mach-o] binary reader and writer
Shankar Kalpathi Easwaran
shankarke at gmail.com
Mon Nov 4 19:45:39 PST 2013
================
Comment at: include/lld/ReaderWriter/MachOLinkingContext.h:123
@@ -118,2 +122,3 @@
void setBundleLoader(StringRef loader) { _bundleLoader = loader; }
+ StringRef dyldPath() { return "/usr/lib/dyld"; }
----------------
const.
================
Comment at: lib/ReaderWriter/MachO/MachOLinkingContext.cpp:133
@@ -128,3 +132,3 @@
_doNothing(false), _arch(arch_unknown), _os(OS::macOSX), _osMinVersion(0),
- _pageZeroSize(0x1000), _compatibilityVersion(0), _currentVersion(0),
+ _pageZeroSize(UINT64_MAX), _compatibilityVersion(0), _currentVersion(0),
_deadStrippableDylib(false), _kindHandler(nullptr) {}
----------------
Why is _pageZeroSize set to UINT64_MAX ?
================
Comment at: lib/ReaderWriter/MachO/MachOLinkingContext.cpp:93-95
@@ -88,5 +92,5 @@
for (ArchInfo *info = archInfos; !info->archName.empty(); ++info) {
if ((info->cputype == cputype) && (info->cpusubtype == cpusubtype)) {
return info->arch;
}
}
----------------
single block if statement - no need of a brace.
================
Comment at: lib/ReaderWriter/MachO/MachOLinkingContext.cpp:30-31
@@ -29,4 +30,4 @@
bool MachOLinkingContext::parsePackedVersion(StringRef str, uint32_t &result) {
result = 0;
----------------
It looks like some of these functions have the intention of
false -> success
true -> failure
There was a previous discussion on inverting return value of functions to mean
true -> success
false -> failure
Are we moving to this model (or) we plan on changing styles later ?
================
Comment at: lib/ReaderWriter/MachO/MachOLinkingContext.cpp:75-87
@@ -72,13 +74,15 @@
static ArchInfo archInfos[] = {
- { "x86_64", MachOLinkingContext::arch_x86_64, llvm::MachO::CPU_TYPE_X86_64,
- llvm::MachO::CPU_SUBTYPE_X86_64_ALL },
- { "i386", MachOLinkingContext::arch_x86, llvm::MachO::CPU_TYPE_I386,
- llvm::MachO::CPU_SUBTYPE_X86_ALL },
- { "armv6", MachOLinkingContext::arch_armv6, llvm::MachO::CPU_TYPE_ARM,
- llvm::MachO::CPU_SUBTYPE_ARM_V6 },
- { "armv7", MachOLinkingContext::arch_armv7, llvm::MachO::CPU_TYPE_ARM,
- llvm::MachO::CPU_SUBTYPE_ARM_V7 },
- { "armv7s", MachOLinkingContext::arch_armv7s, llvm::MachO::CPU_TYPE_ARM,
- llvm::MachO::CPU_SUBTYPE_ARM_V7S },
- { StringRef(), MachOLinkingContext::arch_unknown, 0, 0 }
+ { "x86_64", MachOLinkingContext::arch_x86_64, true,
+ llvm::MachO::CPU_TYPE_X86_64, llvm::MachO::CPU_SUBTYPE_X86_64_ALL },
+ { "i386", MachOLinkingContext::arch_x86, true,
+ llvm::MachO::CPU_TYPE_I386, llvm::MachO::CPU_SUBTYPE_X86_ALL },
+ { "ppc", MachOLinkingContext::arch_ppc, false,
+ llvm::MachO::CPU_TYPE_POWERPC, llvm::MachO::CPU_SUBTYPE_POWERPC_ALL },
+ { "armv6", MachOLinkingContext::arch_armv6, true,
+ llvm::MachO::CPU_TYPE_ARM, llvm::MachO::CPU_SUBTYPE_ARM_V6 },
+ { "armv7", MachOLinkingContext::arch_armv7, true,
+ llvm::MachO::CPU_TYPE_ARM, llvm::MachO::CPU_SUBTYPE_ARM_V7 },
+ { "armv7s", MachOLinkingContext::arch_armv7s, true,
+ llvm::MachO::CPU_TYPE_ARM, llvm::MachO::CPU_SUBTYPE_ARM_V7S },
+ { StringRef(), MachOLinkingContext::arch_unknown, false, 0, 0 }
};
----------------
kledzik at apple.com wrote:
> Michael Spencer wrote:
> > Indentation is weird here.
> Its a really wide table. I'll make each row be three lines so things line up.
Using llvm::MachO can solve a lot of indentation here.
================
Comment at: lib/ReaderWriter/MachO/MachOLinkingContext.cpp:103-105
@@ -98,5 +102,5 @@
for (ArchInfo *info = archInfos; !info->archName.empty(); ++info) {
if (info->archName.equals(archName)) {
return info->arch;
}
}
----------------
same as above.
================
Comment at: lib/ReaderWriter/MachO/MachOLinkingContext.cpp:101
@@ -96,3 +100,3 @@
MachOLinkingContext::Arch
MachOLinkingContext::archFromName(StringRef archName) {
for (ArchInfo *info = archInfos; !info->archName.empty(); ++info) {
----------------
const ?
================
Comment at: lib/ReaderWriter/MachO/MachOLinkingContext.cpp:91
@@ -86,3 +90,3 @@
MachOLinkingContext::Arch
MachOLinkingContext::archFromCpuType(uint32_t cputype, uint32_t cpusubtype) {
for (ArchInfo *info = archInfos; !info->archName.empty(); ++info) {
----------------
const ?
================
Comment at: lib/ReaderWriter/MachO/MachOLinkingContext.cpp:113-115
@@ -108,5 +112,5 @@
for (ArchInfo *info = archInfos; !info->archName.empty(); ++info) {
if (info->arch == arch) {
return info->cputype;
}
}
----------------
same as above.
================
Comment at: lib/ReaderWriter/MachO/MachOLinkingContext.cpp:110
@@ -105,3 +109,3 @@
uint32_t MachOLinkingContext::cpuTypeFromArch(Arch arch) {
assert(arch != arch_unknown);
----------------
const.
================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:102-108
@@ +101,9 @@
+
+/// Utility class for writing a mach-o binary file given an in-memory
+/// normalized file.
+class MachOFileLayout {
+public:
+ /// All layout computation is done in the constructor.
+ MachOFileLayout(const NormalizedFile &file);
+
+ /// Returns the final file size as computed in the constructor.
----------------
indentation is off here.
================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:110
@@ +109,3 @@
+ /// Returns the final file size as computed in the constructor.
+ size_t size();
+
----------------
const ?
================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:220-222
@@ +219,5 @@
+
+StringRef MachOFileLayout::dyldPath() {
+ return "/usr/lib/dyld";
+}
+
----------------
looks like function is duplicated here, you could as well get the info from the context.
================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:224-229
@@ +223,8 @@
+
+uint32_t MachOFileLayout::pointerAlign(uint32_t value) {
+ if (_is64)
+ return (value + 7) & -8;
+ else
+ return (value + 3) & -4;
+}
+
----------------
use MathExtras. llvm::RoundUpToAlignment.
================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:241-268
@@ +240,30 @@
+
+void MachOFileLayout::ByteBuffer::append_uleb128(uint64_t value) {
+ uint8_t byte;
+ do {
+ byte = value & 0x7F;
+ value &= ~0x7F;
+ if ( value != 0 )
+ byte |= 0x80;
+ _bytes.push_back(byte);
+ value = value >> 7;
+ } while( byte >= 0x80 );
+}
+
+void MachOFileLayout::ByteBuffer::append_sleb128(int64_t value) {
+ bool isNeg = ( value < 0 );
+ uint8_t byte;
+ bool more;
+ do {
+ byte = value & 0x7F;
+ value = value >> 7;
+ if ( isNeg )
+ more = ( (value != -1) || ((byte & 0x40) == 0) );
+ else
+ more = ( (value != 0) || ((byte & 0x40) != 0) );
+ if ( more )
+ byte |= 0x80;
+ _bytes.push_back(byte);
+ }
+ while( more );
+}
----------------
Looks like this has been moved from WriterMachO.cpp ? Do you think it might be good to move this to to llvm/Support ? DWARF also has these types isnt it ?
================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp:231-251
@@ +230,23 @@
+
+void Util::appendAtom(SectionInfo *sect, const DefinedAtom *atom) {
+ // Figure out offset for atom in this section given alignment constraints.
+ uint64_t offset = sect->size;
+ DefinedAtom::Alignment atomAlign = atom->alignment();
+ uint64_t align2 = 1 << atomAlign.powerOf2;
+ uint64_t requiredModulus = atomAlign.modulus;
+ uint64_t currentModulus = (offset % align2);
+ if ( currentModulus != requiredModulus ) {
+ if ( requiredModulus > currentModulus )
+ offset += requiredModulus-currentModulus;
+ else
+ offset += align2+requiredModulus-currentModulus;
+ }
+ // Record max alignment of any atom in this section.
+ if ( atomAlign.powerOf2 > sect->alignment )
+ sect->alignment = atomAlign.powerOf2;
+ // Assign atom to this section with this offset.
+ AtomInfo ai = {atom, offset};
+ sect->atomsAndOffsets.push_back(ai);
+ // Update section size to include this atom.
+ sect->size = offset + atom->size();
+}
----------------
looks like even BSS atoms take up file space ? Is that something to be fixed later ?
================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp:346-349
@@ +345,6 @@
+
+uint64_t Util::alignTo(uint64_t value, uint8_t align2) {
+ uint64_t align = 1 << align2;
+ return ( (value + (align-1)) & (-align) );
+}
+
----------------
llvm::RoundUpToAlignment.
================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp:358
@@ +357,3 @@
+ }
+ seg->size = alignTo(addr - seg->address, 12);
+}
----------------
Align at 12 byte boundaries ?
================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp:374
@@ +373,3 @@
+ while (padding < 0)
+ padding += 4096;
+ // Start assigning section address starting at padded offset.
----------------
Do you want to use the context pagesize function here.
================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:276-279
@@ +275,6 @@
+
+void MachOFileLayout::ByteBuffer::align(unsigned alignment) {
+ while ( (_bytes.size() % alignment) != 0 )
+ _bytes.push_back(0);
+}
+
----------------
What about when you want to align text ? Does 0 correspond to nop ?
================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:322-323
@@ +321,4 @@
+
+ // Align start of relocations.
+ _startOfRelocations = pointerAlign(_endOfSectionsContent);
+ _startOfSymbols = _startOfRelocations + relocCount * 8;
----------------
This is uninitialized in the else part. I believe these are only relevant to object files, but still worried on a lot of uninitialized stuff that may just fall through.
================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:460-463
@@ +459,6 @@
+ continue;
+ if (overlaps(sg1,sg2)) {
+ _ec = llvm::make_error_code(llvm::errc::executable_format_error);
+ return;
+ }
+ }
----------------
should this function return error_code instead ?
================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:456-461
@@ +455,8 @@
+ // Verify no segments overlap
+ for (const Segment &sg1 : _file.segments) {
+ for (const Segment &sg2 : _file.segments) {
+ if (&sg1 == &sg2)
+ continue;
+ if (overlaps(sg1,sg2)) {
+ _ec = llvm::make_error_code(llvm::errc::executable_format_error);
+ return;
----------------
O(n^2) a sort of segments by their address might help. This part could be moved to verify ?
================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:507
@@ +506,3 @@
+ for (const Segment &sg : _file.segments) {
+ _segInfo[&sg].fileOffset = (fileOffset + 4095) & (-4096);
+ if ((_seg1addr == INT64_MAX) && sg.access)
----------------
use llvm::RoundUpToAlignment. May be you can add a function to LinkingContext pageSize().
================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:522
@@ +521,3 @@
+ }
+ _startOfLinkEdit = (fileOffset + 4095) & (-4096);
+}
----------------
same here, use llvm::RoundUpToAlignment.
================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:526
@@ +525,3 @@
+
+size_t MachOFileLayout::size() {
+ return _endOfSymbolStrings;
----------------
const.
================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:530
@@ +529,3 @@
+
+void MachOFileLayout::writeMachHeader() {
+ mach_header *mh = reinterpret_cast<mach_header*>(_buffer);
----------------
writeMachOHeader() ?
================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:543-544
@@ +542,4 @@
+
+uint32_t MachOFileLayout::indirectSymbolIndex(const Section §,
+ uint32_t &index) {
+ if (sect.indirectSymbols.empty())
----------------
alignment.
================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:571-572
@@ +570,4 @@
+ seg->filesize = seg->vmsize;
+ seg->maxprot = 7;
+ seg->initprot = 7;
+ seg->nsects = _file.sections.size();
----------------
I think these are the initial protection flags for the segment right ? Why are they set to RWX (I thought 7 is coming from there).
================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:648
@@ +647,3 @@
+
+error_code MachOFileLayout::writeSegment64LoadCommands(uint8_t *&lc) {
+ uint32_t indirectSymRunningIndex = 0;
----------------
can writeSegment64LoadCommands and writeSegment32LoadCommands be combined ?
================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:1079-1083
@@ +1078,7 @@
+
+void MachOFileLayout::writeLinkEditContent() {
+ if (_file.fileType == llvm::MachO::MH_OBJECT) {
+ writeRelocations();
+ writeSymbolTable();
+ }
+ else {
----------------
wow looks like this patch handles partial linking too ?
http://llvm-reviews.chandlerc.com/D2101
BRANCH
svn
ARCANIST PROJECT
lld
More information about the llvm-commits
mailing list