[PATCH] [lld][mach-o] binary reader and writer

Shankar Kalpathi Easwaran shankarke at gmail.com
Mon Nov 4 19:45:39 PST 2013



================
Comment at: include/lld/ReaderWriter/MachOLinkingContext.h:123
@@ -118,2 +122,3 @@
   void setBundleLoader(StringRef loader) { _bundleLoader = loader; }
+  StringRef dyldPath() { return "/usr/lib/dyld"; }
 
----------------
const. 

================
Comment at: lib/ReaderWriter/MachO/MachOLinkingContext.cpp:133
@@ -128,3 +132,3 @@
       _doNothing(false), _arch(arch_unknown), _os(OS::macOSX), _osMinVersion(0),
-      _pageZeroSize(0x1000), _compatibilityVersion(0), _currentVersion(0),
+      _pageZeroSize(UINT64_MAX), _compatibilityVersion(0), _currentVersion(0),
       _deadStrippableDylib(false), _kindHandler(nullptr) {}
----------------
Why is _pageZeroSize set to UINT64_MAX ?

================
Comment at: lib/ReaderWriter/MachO/MachOLinkingContext.cpp:93-95
@@ -88,5 +92,5 @@
   for (ArchInfo *info = archInfos; !info->archName.empty(); ++info) {
     if ((info->cputype == cputype) && (info->cpusubtype == cpusubtype)) {
       return info->arch;
     }
   }
----------------
single block if statement - no need of a brace.

================
Comment at: lib/ReaderWriter/MachO/MachOLinkingContext.cpp:30-31
@@ -29,4 +30,4 @@
 
 bool MachOLinkingContext::parsePackedVersion(StringRef str, uint32_t &result) {
   result = 0;
 
----------------
It looks like some of these functions have the intention of

false -> success
true -> failure

There was a previous discussion on inverting return value of functions to mean

true -> success
false -> failure

Are we moving to this model (or) we plan on changing styles later ?




================
Comment at: lib/ReaderWriter/MachO/MachOLinkingContext.cpp:75-87
@@ -72,13 +74,15 @@
 static ArchInfo archInfos[] = {
-  { "x86_64", MachOLinkingContext::arch_x86_64, llvm::MachO::CPU_TYPE_X86_64,
-    llvm::MachO::CPU_SUBTYPE_X86_64_ALL },
-  { "i386", MachOLinkingContext::arch_x86, llvm::MachO::CPU_TYPE_I386,
-    llvm::MachO::CPU_SUBTYPE_X86_ALL },
-  { "armv6", MachOLinkingContext::arch_armv6, llvm::MachO::CPU_TYPE_ARM,
-    llvm::MachO::CPU_SUBTYPE_ARM_V6 },
-  { "armv7", MachOLinkingContext::arch_armv7, llvm::MachO::CPU_TYPE_ARM,
-    llvm::MachO::CPU_SUBTYPE_ARM_V7 },
-  { "armv7s", MachOLinkingContext::arch_armv7s, llvm::MachO::CPU_TYPE_ARM,
-    llvm::MachO::CPU_SUBTYPE_ARM_V7S },
-  { StringRef(), MachOLinkingContext::arch_unknown, 0, 0 }
+  { "x86_64", MachOLinkingContext::arch_x86_64, true, 
+          llvm::MachO::CPU_TYPE_X86_64, llvm::MachO::CPU_SUBTYPE_X86_64_ALL },
+  { "i386", MachOLinkingContext::arch_x86,  true, 
+              llvm::MachO::CPU_TYPE_I386, llvm::MachO::CPU_SUBTYPE_X86_ALL },
+  { "ppc", MachOLinkingContext::arch_ppc,  false, 
+          llvm::MachO::CPU_TYPE_POWERPC, llvm::MachO::CPU_SUBTYPE_POWERPC_ALL },
+  { "armv6", MachOLinkingContext::arch_armv6,  true, 
+                  llvm::MachO::CPU_TYPE_ARM, llvm::MachO::CPU_SUBTYPE_ARM_V6 },
+  { "armv7", MachOLinkingContext::arch_armv7,  true, 
+                  llvm::MachO::CPU_TYPE_ARM, llvm::MachO::CPU_SUBTYPE_ARM_V7 },
+  { "armv7s", MachOLinkingContext::arch_armv7s,  true, 
+                  llvm::MachO::CPU_TYPE_ARM, llvm::MachO::CPU_SUBTYPE_ARM_V7S },
+  { StringRef(), MachOLinkingContext::arch_unknown, false, 0, 0 }
 };
----------------
kledzik at apple.com wrote:
> Michael Spencer wrote:
> > Indentation is weird here.
> Its a really wide table.  I'll make each row be three lines so things line up.
Using llvm::MachO can solve a lot of indentation here.

================
Comment at: lib/ReaderWriter/MachO/MachOLinkingContext.cpp:103-105
@@ -98,5 +102,5 @@
   for (ArchInfo *info = archInfos; !info->archName.empty(); ++info) {
     if (info->archName.equals(archName)) {
       return info->arch;
     }
   }
----------------
same as above.

================
Comment at: lib/ReaderWriter/MachO/MachOLinkingContext.cpp:101
@@ -96,3 +100,3 @@
 MachOLinkingContext::Arch
 MachOLinkingContext::archFromName(StringRef archName) {
   for (ArchInfo *info = archInfos; !info->archName.empty(); ++info) {
----------------
const ?

================
Comment at: lib/ReaderWriter/MachO/MachOLinkingContext.cpp:91
@@ -86,3 +90,3 @@
 MachOLinkingContext::Arch
 MachOLinkingContext::archFromCpuType(uint32_t cputype, uint32_t cpusubtype) {
   for (ArchInfo *info = archInfos; !info->archName.empty(); ++info) {
----------------
const ?

================
Comment at: lib/ReaderWriter/MachO/MachOLinkingContext.cpp:113-115
@@ -108,5 +112,5 @@
   for (ArchInfo *info = archInfos; !info->archName.empty(); ++info) {
     if (info->arch == arch) {
       return info->cputype;
     }
   }
----------------
same as above.

================
Comment at: lib/ReaderWriter/MachO/MachOLinkingContext.cpp:110
@@ -105,3 +109,3 @@
 
 uint32_t MachOLinkingContext::cpuTypeFromArch(Arch arch) {
   assert(arch != arch_unknown);
----------------
const.

================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:102-108
@@ +101,9 @@
+
+/// Utility class for writing a mach-o binary file given an in-memory
+/// normalized file.  
+class MachOFileLayout {
+public:
+              /// All layout computation is done in the constructor.
+              MachOFileLayout(const NormalizedFile &file);
+              
+  /// Returns the final file size as computed in the constructor.
----------------
indentation is off here.

================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:110
@@ +109,3 @@
+  /// Returns the final file size as computed in the constructor.
+  size_t      size();
+
----------------
const ?

================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:220-222
@@ +219,5 @@
+
+StringRef MachOFileLayout::dyldPath() {
+  return "/usr/lib/dyld";
+}
+
----------------
looks like function is duplicated here, you could as well get the info from the context.

================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:224-229
@@ +223,8 @@
+
+uint32_t MachOFileLayout::pointerAlign(uint32_t value) {
+  if (_is64)
+    return (value + 7) & -8;
+  else
+    return (value + 3) & -4;
+}
+
----------------
use MathExtras. llvm::RoundUpToAlignment.

================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:241-268
@@ +240,30 @@
+
+void MachOFileLayout::ByteBuffer::append_uleb128(uint64_t value) {
+  uint8_t byte;
+  do {
+    byte = value & 0x7F;
+    value &= ~0x7F;
+    if ( value != 0 )
+      byte |= 0x80;
+    _bytes.push_back(byte);
+    value = value >> 7;
+  } while( byte >= 0x80 );
+}
+
+void MachOFileLayout::ByteBuffer::append_sleb128(int64_t value) {
+  bool isNeg = ( value < 0 );
+  uint8_t byte;
+  bool more;
+  do {
+    byte = value & 0x7F;
+    value = value >> 7;
+    if ( isNeg ) 
+      more = ( (value != -1) || ((byte & 0x40) == 0) );
+    else
+      more = ( (value != 0) || ((byte & 0x40) != 0) );
+    if ( more )
+      byte |= 0x80;
+    _bytes.push_back(byte);
+  } 
+  while( more );
+}
----------------
Looks like this has been moved from WriterMachO.cpp ? Do you think it might be good to move this to to llvm/Support ? DWARF also has these types isnt it ? 

================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp:231-251
@@ +230,23 @@
+
+void Util::appendAtom(SectionInfo *sect, const DefinedAtom *atom) {
+  // Figure out offset for atom in this section given alignment constraints.
+  uint64_t offset = sect->size;
+  DefinedAtom::Alignment atomAlign = atom->alignment();
+  uint64_t align2 = 1 << atomAlign.powerOf2;
+  uint64_t requiredModulus = atomAlign.modulus;
+  uint64_t currentModulus = (offset % align2);
+  if ( currentModulus != requiredModulus ) {
+    if ( requiredModulus > currentModulus )
+      offset += requiredModulus-currentModulus;
+    else
+      offset += align2+requiredModulus-currentModulus;
+  }
+  // Record max alignment of any atom in this section.
+  if ( atomAlign.powerOf2 > sect->alignment )
+    sect->alignment = atomAlign.powerOf2;
+  // Assign atom to this section with this offset.
+  AtomInfo ai = {atom, offset};
+  sect->atomsAndOffsets.push_back(ai);
+  // Update section size to include this atom.
+  sect->size = offset + atom->size();
+}
----------------
looks like even BSS atoms take up file space ? Is that something to be fixed later ?

================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp:346-349
@@ +345,6 @@
+
+uint64_t Util::alignTo(uint64_t value, uint8_t align2) {
+  uint64_t align = 1 << align2;
+  return ( (value + (align-1)) & (-align) );
+}
+
----------------
llvm::RoundUpToAlignment.

================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp:358
@@ +357,3 @@
+  }
+  seg->size = alignTo(addr - seg->address, 12);
+}
----------------
Align at 12 byte boundaries ?

================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp:374
@@ +373,3 @@
+  while (padding < 0)
+    padding += 4096;
+  // Start assigning section address starting at padded offset.
----------------
Do you want to use the context pagesize function here.

================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:276-279
@@ +275,6 @@
+
+void MachOFileLayout::ByteBuffer::align(unsigned alignment) {
+  while ( (_bytes.size() % alignment) != 0 )
+    _bytes.push_back(0);
+}
+
----------------
What about when you want to align text ? Does 0 correspond to nop ?

================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:322-323
@@ +321,4 @@
+    
+    // Align start of relocations.
+    _startOfRelocations = pointerAlign(_endOfSectionsContent);    
+    _startOfSymbols = _startOfRelocations + relocCount * 8;
----------------
This is uninitialized in the else part. I believe these are only relevant to object files, but still worried on a lot of uninitialized stuff that may just fall through.

================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:460-463
@@ +459,6 @@
+        continue;
+      if (overlaps(sg1,sg2)) {
+        _ec = llvm::make_error_code(llvm::errc::executable_format_error);
+        return;
+      }
+    }
----------------
should this function return error_code instead ?

================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:456-461
@@ +455,8 @@
+  // Verify no segments overlap
+  for (const Segment &sg1 : _file.segments) {
+    for (const Segment &sg2 : _file.segments) {
+      if (&sg1 == &sg2)
+        continue;
+      if (overlaps(sg1,sg2)) {
+        _ec = llvm::make_error_code(llvm::errc::executable_format_error);
+        return;
----------------
O(n^2) a sort of segments by their address might help. This part could be moved to verify ?

================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:507
@@ +506,3 @@
+  for (const Segment &sg : _file.segments) {
+    _segInfo[&sg].fileOffset = (fileOffset + 4095) & (-4096);
+    if ((_seg1addr == INT64_MAX) && sg.access)
----------------
use llvm::RoundUpToAlignment. May be you can add a function to LinkingContext pageSize().

================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:522
@@ +521,3 @@
+  }
+  _startOfLinkEdit = (fileOffset + 4095) & (-4096);
+}
----------------
same here, use llvm::RoundUpToAlignment.

================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:526
@@ +525,3 @@
+
+size_t MachOFileLayout::size() {
+  return _endOfSymbolStrings;
----------------
const.

================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:530
@@ +529,3 @@
+
+void MachOFileLayout::writeMachHeader() {
+  mach_header *mh = reinterpret_cast<mach_header*>(_buffer);
----------------
writeMachOHeader() ?

================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:543-544
@@ +542,4 @@
+
+uint32_t MachOFileLayout::indirectSymbolIndex(const Section &sect, 
+                                                            uint32_t &index) {
+  if (sect.indirectSymbols.empty())
----------------
alignment.

================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:571-572
@@ +570,4 @@
+  seg->filesize = seg->vmsize;
+  seg->maxprot = 7;
+  seg->initprot = 7;
+  seg->nsects = _file.sections.size();
----------------
I think these are the initial protection flags for the segment right ? Why are they set to RWX (I thought 7 is coming from there).

================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:648
@@ +647,3 @@
+
+error_code MachOFileLayout::writeSegment64LoadCommands(uint8_t *&lc) {
+  uint32_t indirectSymRunningIndex = 0;
----------------
can writeSegment64LoadCommands and writeSegment32LoadCommands be combined ?

================
Comment at: lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp:1079-1083
@@ +1078,7 @@
+
+void MachOFileLayout::writeLinkEditContent() {
+  if (_file.fileType == llvm::MachO::MH_OBJECT) {
+    writeRelocations();
+    writeSymbolTable();
+  }
+  else {
----------------
wow looks like this patch handles partial linking too ?


http://llvm-reviews.chandlerc.com/D2101

BRANCH
  svn

ARCANIST PROJECT
  lld



More information about the llvm-commits mailing list