[lld] r213901 - [mach-o] Add support for LC_DATA_IN_CODE

Nick Kledzik kledzik at apple.com
Thu Jul 24 16:06:57 PDT 2014


Author: kledzik
Date: Thu Jul 24 18:06:56 2014
New Revision: 213901

URL: http://llvm.org/viewvc/llvm-project?rev=213901&view=rev
Log:
[mach-o] Add support for LC_DATA_IN_CODE

Sometimes compilers emit data into code sections (e.g. constant pools or
jump tables). These runs of data can throw off disassemblers.  The solution
in mach-o is that ranges of data-in-code are encoded into a table pointed to
by the LC_DATA_IN_CODE load command.

The way the data-in-code information is encoded into lld's Atom model is that
that start and end of each data run is marked with a Reference whose offset
is the start/end of the data run.  For arm, the switch back to code also marks
whether it is thumb or arm code.

Added:
    lld/trunk/test/mach-o/parse-data-in-code-armv7.yaml
    lld/trunk/test/mach-o/parse-data-in-code-x86.yaml
Modified:
    lld/trunk/lib/ReaderWriter/MachO/ArchHandler.h
    lld/trunk/lib/ReaderWriter/MachO/ArchHandler_arm.cpp
    lld/trunk/lib/ReaderWriter/MachO/ArchHandler_x86.cpp
    lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFile.h
    lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp
    lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp
    lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp
    lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp
    lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp

Modified: lld/trunk/lib/ReaderWriter/MachO/ArchHandler.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/lib/ReaderWriter/MachO/ArchHandler.h?rev=213901&r1=213900&r2=213901&view=diff
==============================================================================
--- lld/trunk/lib/ReaderWriter/MachO/ArchHandler.h (original)
+++ lld/trunk/lib/ReaderWriter/MachO/ArchHandler.h Thu Jul 24 18:06:56 2014
@@ -140,6 +140,30 @@ public:
   /// Add arch-specific References.
   virtual void addAdditionalReferences(MachODefinedAtom &atom) { }
 
+  // Add Reference for data-in-code marker.
+  virtual void addDataInCodeReference(MachODefinedAtom &atom, uint32_t atomOff,
+                                      uint16_t length, uint16_t kind) { }
+
+  /// Returns true if the specificed Reference value marks the start or end
+  /// of a data-in-code range in an atom.
+  virtual bool isDataInCodeTransition(Reference::KindValue refKind) {
+    return false;
+  }
+
+  /// Returns the Reference value for a Reference that marks that start of
+  /// a data-in-code range.
+  virtual Reference::KindValue dataInCodeTransitionStart(
+                                                const MachODefinedAtom &atom) {
+    return 0;
+  }
+
+  /// Returns the Reference value for a Reference that marks that end of
+  /// a data-in-code range.
+  virtual Reference::KindValue dataInCodeTransitionEnd(
+                                                const MachODefinedAtom &atom) {
+    return 0;
+  }
+
   /// Only relevant for 32-bit arm archs.
   virtual bool isThumbFunction(const DefinedAtom &atom) { return false; }
 

Modified: lld/trunk/lib/ReaderWriter/MachO/ArchHandler_arm.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/lib/ReaderWriter/MachO/ArchHandler_arm.cpp?rev=213901&r1=213900&r2=213901&view=diff
==============================================================================
--- lld/trunk/lib/ReaderWriter/MachO/ArchHandler_arm.cpp (original)
+++ lld/trunk/lib/ReaderWriter/MachO/ArchHandler_arm.cpp Thu Jul 24 18:06:56 2014
@@ -71,6 +71,28 @@ public:
 
   void addAdditionalReferences(MachODefinedAtom &atom) override;
 
+  bool isDataInCodeTransition(Reference::KindValue refKind) override {
+    switch (refKind) {
+    case modeThumbCode:
+    case modeArmCode:
+    case modeData:
+      return true;
+    default:
+      return false;
+      break;
+    }
+  }
+
+  Reference::KindValue dataInCodeTransitionStart(
+                                        const MachODefinedAtom &atom) override {
+    return modeData;
+  }
+
+  Reference::KindValue dataInCodeTransitionEnd(
+                                        const MachODefinedAtom &atom) override {
+    return atom.isThumb() ? modeThumbCode : modeArmCode;
+  }
+
   bool isThumbFunction(const DefinedAtom &atom) override;
 
 private:
@@ -82,6 +104,7 @@ private:
 
     modeThumbCode,         /// Content starting at this offset is thumb.
     modeArmCode,           /// Content starting at this offset is arm.
+    modeData,              /// Content starting at this offset is data.
 
     // Kinds found in mach-o .o files:
     thumb_b22,             /// ex: bl _foo
@@ -143,6 +166,7 @@ ArchHandler_arm::~ArchHandler_arm() { }
 const Registry::KindStrings ArchHandler_arm::_sKindStrings[] = {
   LLD_KIND_STRING_ENTRY(modeThumbCode),
   LLD_KIND_STRING_ENTRY(modeArmCode),
+  LLD_KIND_STRING_ENTRY(modeData),
   LLD_KIND_STRING_ENTRY(thumb_b22),
   LLD_KIND_STRING_ENTRY(thumb_movw),
   LLD_KIND_STRING_ENTRY(thumb_movt),
@@ -735,6 +759,8 @@ void ArchHandler_arm::applyFixupFinal(co
   case modeArmCode:
     thumbMode = false;
     break;
+  case modeData:
+    break;
   case thumb_b22:
     assert(thumbMode);
     displacement = (targetAddress - (fixupAddress + 4)) + ref.addend();
@@ -868,6 +894,8 @@ void ArchHandler_arm::applyFixupRelocata
   case modeArmCode:
     thumbMode = false;
     break;
+  case modeData:
+    break;
   case thumb_b22:
     assert(thumbMode);
     if (useExternalReloc)
@@ -971,6 +999,8 @@ void ArchHandler_arm::appendSectionReloc
   switch (ref.kindValue()) {
   case modeThumbCode:
   case modeArmCode:
+  case modeData:
+    break;
     // Do nothing.
     break;
   case thumb_b22:
@@ -1174,7 +1204,7 @@ bool ArchHandler_arm::isThumbFunction(co
       return false;
     if (ref->kindNamespace() != Reference::KindNamespace::mach_o)
       continue;
-     assert(ref->kindArch() == Reference::KindArch::ARM);
+    assert(ref->kindArch() == Reference::KindArch::ARM);
     if (ref->kindValue() == modeThumbCode)
       return true;
   }

Modified: lld/trunk/lib/ReaderWriter/MachO/ArchHandler_x86.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/lib/ReaderWriter/MachO/ArchHandler_x86.cpp?rev=213901&r1=213900&r2=213901&view=diff
==============================================================================
--- lld/trunk/lib/ReaderWriter/MachO/ArchHandler_x86.cpp (original)
+++ lld/trunk/lib/ReaderWriter/MachO/ArchHandler_x86.cpp Thu Jul 24 18:06:56 2014
@@ -69,6 +69,27 @@ public:
                                 FindAddressForAtom addressForAtom,
                                 normalized::Relocations &relocs) override;
 
+  bool isDataInCodeTransition(Reference::KindValue refKind) override {
+    switch (refKind) {
+    case modeCode:
+    case modeData:
+      return true;
+    default:
+      return false;
+      break;
+    }
+  }
+
+  Reference::KindValue dataInCodeTransitionStart(
+                                        const MachODefinedAtom &atom) override {
+    return modeData;
+  }
+
+  Reference::KindValue dataInCodeTransitionEnd(
+                                        const MachODefinedAtom &atom) override {
+    return modeCode;
+  }
+
 private:
   static const Registry::KindStrings _sKindStrings[];
   static const StubInfo              _sStubInfo;
@@ -76,6 +97,9 @@ private:
   enum : Reference::KindValue {
     invalid,               /// for error condition
 
+    modeCode,              /// Content starting at this offset is code.
+    modeData,              /// Content starting at this offset is data.
+
     // Kinds found in mach-o .o files:
     branch32,              /// ex: call _foo
     branch16,              /// ex: callw _foo
@@ -115,6 +139,8 @@ ArchHandler_x86::~ArchHandler_x86() { }
   
 const Registry::KindStrings ArchHandler_x86::_sKindStrings[] = {
   LLD_KIND_STRING_ENTRY(invalid),
+  LLD_KIND_STRING_ENTRY(modeCode),
+  LLD_KIND_STRING_ENTRY(modeData),
   LLD_KIND_STRING_ENTRY(branch32),
   LLD_KIND_STRING_ENTRY(branch16),
   LLD_KIND_STRING_ENTRY(abs32),
@@ -390,6 +416,8 @@ void ArchHandler_x86::applyFixupFinal(co
   case negDelta32:
     write32(*loc32, _swap, fixupAddress - targetAddress + ref.addend());
     break;
+  case modeCode:
+  case modeData:
   case lazyPointer:
   case lazyImmediateLocation:
     // do nothing
@@ -434,6 +462,8 @@ void ArchHandler_x86::applyFixupRelocata
   case negDelta32:
     write32(*loc32, _swap, fixupAddress - targetAddress + ref.addend());
     break;
+  case modeCode:
+  case modeData:
   case lazyPointer:
   case lazyImmediateLocation:
     // do nothing
@@ -480,6 +510,9 @@ void ArchHandler_x86::appendSectionReloc
   uint32_t sectionOffset = atomSectionOffset + ref.offsetInAtom();
   bool useExternalReloc = useExternalRelocationTo(*ref.target());
   switch (ref.kindValue()) {
+  case modeCode:
+  case modeData:
+    break;
   case branch32:
     if (useExternalReloc) {
       appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0,

Modified: lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFile.h
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFile.h?rev=213901&r1=213900&r2=213901&view=diff
==============================================================================
--- lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFile.h (original)
+++ lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFile.h Thu Jul 24 18:06:56 2014
@@ -56,6 +56,7 @@
 using llvm::BumpPtrAllocator;
 using llvm::yaml::Hex64;
 using llvm::yaml::Hex32;
+using llvm::yaml::Hex16;
 using llvm::yaml::Hex8;
 using llvm::yaml::SequenceTraits;
 using llvm::MachO::HeaderFileType;
@@ -66,6 +67,7 @@ using llvm::MachO::RelocationInfoType;
 using llvm::MachO::SectionType;
 using llvm::MachO::LoadCommandType;
 using llvm::MachO::ExportSymbolKind;
+using llvm::MachO::DataRegionType;
 
 namespace lld {
 namespace mach_o {
@@ -191,10 +193,18 @@ struct Export {
   StringRef         otherName;
 };
 
+/// A normalized data-in-code entry.
+struct DataInCode {
+  Hex32           offset;
+  Hex16           length;
+  DataRegionType  kind;
+};
+
 
 /// A typedef so that YAML I/O can encode/decode mach_header.flags.
 LLVM_YAML_STRONG_TYPEDEF(uint32_t, FileFlags)
 
+
 ///
 struct NormalizedFile {
   NormalizedFile() : arch(MachOLinkingContext::arch_unknown),
@@ -231,12 +241,12 @@ struct NormalizedFile {
   std::vector<BindLocation>   weakBindingInfo;
   std::vector<BindLocation>   lazyBindingInfo;
   std::vector<Export>         exportInfo;
+  std::vector<DataInCode>     dataInCode;
 
   // TODO:
   // code-signature
   // split-seg-info
   // function-starts
-  // data-in-code
 
   // For any allocations in this struct which need to be owned by this struct.
   BumpPtrAllocator            ownedAllocations;

Modified: lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp?rev=213901&r1=213900&r2=213901&view=diff
==============================================================================
--- lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp (original)
+++ lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp Thu Jul 24 18:06:56 2014
@@ -229,6 +229,8 @@ readBinary(std::unique_ptr<MemoryBuffer>
     return ec;
 
   // Walk load commands looking for segments/sections and the symbol table.
+  const data_in_code_entry *dataInCode = nullptr;
+  uint32_t dataInCodeSize = 0;
   ec = forEachLoadCommand(lcRange, lcCount, swap, is64,
                     [&] (uint32_t cmd, uint32_t size, const char* lc) -> bool {
     if (is64) {
@@ -387,21 +389,32 @@ readBinary(std::unique_ptr<MemoryBuffer>
             f->localSymbols.push_back(sout);
         }
       }
-    }
-    if (cmd == LC_ID_DYLIB) {
+    } else if (cmd == LC_ID_DYLIB) {
       const dylib_command *dl = reinterpret_cast<const dylib_command*>(lc);
-      dylib_command tempDL;
-      if (swap) {
-        tempDL = *dl; swapStruct(tempDL); dl = &tempDL;
-      }
-
-      f->installName = lc + dl->dylib.name;
+      f->installName = lc + read32(swap, dl->dylib.name);
+    } else if (cmd == LC_DATA_IN_CODE) {
+      const linkedit_data_command *ldc =
+                            reinterpret_cast<const linkedit_data_command*>(lc);
+      dataInCode = reinterpret_cast<const data_in_code_entry*>(
+                                            start + read32(swap, ldc->dataoff));
+      dataInCodeSize = read32(swap, ldc->datasize);
     }
     return false;
   });
   if (ec)
     return ec;
 
+  if (dataInCode) {
+    // Convert on-disk data_in_code_entry array to DataInCode vector.
+    for (unsigned i=0; i < dataInCodeSize/sizeof(data_in_code_entry); ++i) {
+      DataInCode entry;
+      entry.offset = read32(swap, dataInCode[i].offset);
+      entry.length = read16(swap, dataInCode[i].length);
+      entry.kind   = (DataRegionType)read16(swap, dataInCode[i].kind);
+      f->dataInCode.push_back(entry);
+    }
+  }
+
   return std::move(f);
 }
 

Modified: lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp?rev=213901&r1=213900&r2=213901&view=diff
==============================================================================
--- lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp (original)
+++ lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp Thu Jul 24 18:06:56 2014
@@ -74,11 +74,13 @@ private:
   void        writeRebaseInfo();
   void        writeBindingInfo();
   void        writeLazyBindingInfo();
+  void        writeDataInCodeInfo();
   void        writeLinkEditContent();
   void        buildLinkEditInfo();
   void        buildRebaseInfo();
   void        buildBindInfo();
   void        buildLazyBindInfo();
+  void        computeDataInCodeSize();
   void        computeSymbolTableSizes();
   void        buildSectionRelocations();
   void        appendSymbols(const std::vector<Symbol> &symbols,
@@ -162,6 +164,7 @@ private:
   uint32_t              _countOfLoadCommands;
   uint32_t              _endOfLoadCommands;
   uint32_t              _startOfRelocations;
+  uint32_t              _startOfDataInCode;
   uint32_t              _startOfSymbols;
   uint32_t              _startOfIndirectSymbols;
   uint32_t              _startOfSymbolStrings;
@@ -171,6 +174,7 @@ private:
   uint32_t              _symbolTableUndefinesStartIndex;
   uint32_t              _symbolStringPoolSize;
   uint32_t              _symbolTableSize;
+  uint32_t              _dataInCodeSize;
   uint32_t              _indirectSymbolTableCount;
   // Used in object file creation only
   uint32_t              _startOfSectionsContent;
@@ -227,7 +231,10 @@ MachOFileLayout::MachOFileLayout(const N
                                + file.sections.size() * sectsSize
                                + sizeof(symtab_command);
     _countOfLoadCommands = 2;
-
+    if (!_file.dataInCode.empty()) {
+      _endOfLoadCommands += sizeof(linkedit_data_command);
+      _countOfLoadCommands++;
+    }
     // Accumulate size of each section.
     _startOfSectionsContent = _endOfLoadCommands;
     _endOfSectionsContent = _startOfSectionsContent;
@@ -239,10 +246,12 @@ MachOFileLayout::MachOFileLayout(const N
     }
 
     computeSymbolTableSizes();
+    computeDataInCodeSize();
 
     // Align start of relocations.
     _startOfRelocations = pointerAlign(_endOfSectionsContent);
-    _startOfSymbols = _startOfRelocations + relocCount * 8;
+    _startOfDataInCode = _startOfRelocations + relocCount * 8;
+    _startOfSymbols = _startOfDataInCode + _dataInCodeSize;
     // Add Indirect symbol table.
     _startOfIndirectSymbols = _startOfSymbols + _symbolTableSize;
     // Align start of symbol table and symbol strings.
@@ -273,15 +282,15 @@ MachOFileLayout::MachOFileLayout(const N
 
     // LINKEDIT of final linked images has in order:
     // rebase info, binding info, lazy binding info, weak binding info,
-    // indirect symbol table, symbol table, symbol table strings.
+    // data-in-code, symbol table, indirect symbol table, symbol table strings.
     _startOfRebaseInfo = _startOfLinkEdit;
     _endOfRebaseInfo = _startOfRebaseInfo + _rebaseInfo.size();
     _startOfBindingInfo = _endOfRebaseInfo;
     _endOfBindingInfo = _startOfBindingInfo + _bindingInfo.size();
     _startOfLazyBindingInfo = _endOfBindingInfo;
     _endOfLazyBindingInfo = _startOfLazyBindingInfo + _lazyBindingInfo.size();
-
-    _startOfSymbols = _endOfLazyBindingInfo;
+    _startOfDataInCode = _endOfLazyBindingInfo;
+    _startOfSymbols = _startOfDataInCode + _dataInCodeSize;
     _startOfIndirectSymbols = _startOfSymbols + _symbolTableSize;
     _startOfSymbolStrings = _startOfIndirectSymbols
                   + pointerAlign(_indirectSymbolTableCount * sizeof(uint32_t));
@@ -300,6 +309,7 @@ MachOFileLayout::MachOFileLayout(const N
       << "  endOfBindingInfo=" << _endOfBindingInfo << "\n"
       << "  startOfLazyBindingInfo=" << _startOfLazyBindingInfo << "\n"
       << "  endOfLazyBindingInfo=" << _endOfLazyBindingInfo << "\n"
+      << "  startOfDataInCode=" << _startOfDataInCode << "\n"
       << "  startOfSymbols=" << _startOfSymbols << "\n"
       << "  startOfSymbolStrings=" << _startOfSymbolStrings << "\n"
       << "  endOfSymbolStrings=" << _endOfSymbolStrings << "\n"
@@ -620,6 +630,18 @@ std::error_code MachOFileLayout::writeLo
     st->strsize = _endOfSymbolStrings - _startOfSymbolStrings;
     if (_swap)
       swapStruct(*st);
+    lc += sizeof(symtab_command);
+    // Add LC_DATA_IN_CODE if needed.
+    if (_dataInCodeSize != 0) {
+      linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc);
+      dl->cmd      = LC_DATA_IN_CODE;
+      dl->cmdsize  = sizeof(linkedit_data_command);
+      dl->dataoff  = _startOfDataInCode;
+      dl->datasize = _dataInCodeSize;
+      if (_swap)
+        swapStruct(*dl);
+      lc += sizeof(linkedit_data_command);
+    }
   } else {
     // Final linked images have sections under segments.
     if (_is64)
@@ -804,6 +826,20 @@ void MachOFileLayout::appendSymbols(cons
   }
 }
 
+void MachOFileLayout::writeDataInCodeInfo() {
+  uint32_t offset = _startOfDataInCode;
+  for (const DataInCode &entry : _file.dataInCode) {
+    data_in_code_entry *dst = reinterpret_cast<data_in_code_entry*>(
+                                                             &_buffer[offset]);
+    dst->offset = entry.offset;
+    dst->length = entry.length;
+    dst->kind   = entry.kind;
+    if (_swap)
+      swapStruct(*dst);
+    offset += sizeof(data_in_code_entry);
+  }
+}
+
 void MachOFileLayout::writeSymbolTable() {
   // Write symbol table and symbol strings in parallel.
   uint32_t symOffset = _startOfSymbols;
@@ -860,6 +896,7 @@ void MachOFileLayout::buildLinkEditInfo(
   buildBindInfo();
   buildLazyBindInfo();
   computeSymbolTableSizes();
+  computeDataInCodeSize();
 }
 
 void MachOFileLayout::buildSectionRelocations() {
@@ -941,10 +978,14 @@ void MachOFileLayout::computeSymbolTable
   }
 }
 
+void MachOFileLayout::computeDataInCodeSize() {
+  _dataInCodeSize = _file.dataInCode.size() * sizeof(data_in_code_entry);
+}
 
 void MachOFileLayout::writeLinkEditContent() {
   if (_file.fileType == llvm::MachO::MH_OBJECT) {
     writeRelocations();
+    writeDataInCodeInfo();
     writeSymbolTable();
   } else {
     writeRebaseInfo();

Modified: lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp?rev=213901&r1=213900&r2=213901&view=diff
==============================================================================
--- lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp (original)
+++ lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp Thu Jul 24 18:06:56 2014
@@ -102,6 +102,7 @@ public:
   void      addIndirectSymbols(const lld::File &atomFile, NormalizedFile &file);
   void      addRebaseAndBindingInfo(const lld::File &, NormalizedFile &file);
   void      addSectionRelocs(const lld::File &, NormalizedFile &file);
+  void      buildDataInCodeArray(const lld::File &, NormalizedFile &file);
   void      addDependentDylibs(const lld::File &, NormalizedFile &file);
   void      copyEntryPointAddress(NormalizedFile &file);
 
@@ -899,6 +900,46 @@ void Util::addSectionRelocs(const lld::F
   }
 }
 
+void Util::buildDataInCodeArray(const lld::File &, NormalizedFile &file) {
+  for (SectionInfo *si : _sectionInfos) {
+    for (const AtomInfo &info : si->atomsAndOffsets) {
+      // Atoms that contain data-in-code have "transition" references
+      // which mark a point where the embedded data starts of ends.
+      // This needs to be converted to the mach-o format which is an array
+      // of data-in-code ranges.
+      uint32_t startOffset = 0;
+      DataRegionType mode = DataRegionType(0);
+      for (const Reference *ref : *info.atom) {
+        if (ref->kindNamespace() != Reference::KindNamespace::mach_o)
+          continue;
+        if (_archHandler.isDataInCodeTransition(ref->kindValue())) {
+          DataRegionType nextMode = (DataRegionType)ref->addend();
+          if (mode != nextMode) {
+            if (mode != 0) {
+              // Found end data range, so make range entry.
+              DataInCode entry;
+              entry.offset = si->address + info.offsetInSection + startOffset;
+              entry.length = ref->offsetInAtom() - startOffset;
+              entry.kind   = mode;
+              file.dataInCode.push_back(entry);
+            }
+          }
+          mode = nextMode;
+          startOffset = ref->offsetInAtom();
+        }
+      }
+      if (mode != 0) {
+        // Function ends with data (no end transition).
+        DataInCode entry;
+        entry.offset = si->address + info.offsetInSection + startOffset;
+        entry.length = info.atom->size() - startOffset;
+        entry.kind   = mode;
+        file.dataInCode.push_back(entry);
+      }
+    }
+  }
+}
+
 void Util::addRebaseAndBindingInfo(const lld::File &atomFile,
                                                         NormalizedFile &nFile) {
   if (_context.outputMachOType() == llvm::MachO::MH_OBJECT)
@@ -992,6 +1033,7 @@ normalizedFromAtoms(const lld::File &ato
   util.addIndirectSymbols(atomFile, normFile);
   util.addRebaseAndBindingInfo(atomFile, normFile);
   util.addSectionRelocs(atomFile, normFile);
+  util.buildDataInCodeArray(atomFile, normFile);
   util.copyEntryPointAddress(normFile);
 
   return std::move(f);

Modified: lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp?rev=213901&r1=213900&r2=213901&view=diff
==============================================================================
--- lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp (original)
+++ lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp Thu Jul 24 18:06:56 2014
@@ -426,6 +426,17 @@ std::error_code processSection(DefinedAt
   return std::error_code();
 }
 
+const Section* findSectionCoveringAddress(const NormalizedFile &normalizedFile,
+                                          uint64_t address) {
+  for (const Section &s : normalizedFile.sections) {
+    uint64_t sAddr = s.address;
+    if ((sAddr <= address) && (address < sAddr+s.content.size())) {
+      return &s;
+    }
+  }
+  return nullptr;
+}
+
 // Walks all relocations for a section in a normalized .o file and
 // creates corresponding lld::Reference objects.
 std::error_code convertRelocs(const Section &section,
@@ -441,17 +452,10 @@ std::error_code convertRelocs(const Sect
                                      "index (") + Twine(sectIndex) + ")");
     const Section *sect = nullptr;
     if (sectIndex == 0) {
-      for (const Section &s : normalizedFile.sections) {
-        uint64_t sAddr = s.address;
-        if ((sAddr <= addr) && (addr < sAddr+s.content.size())) {
-          sect = &s;
-          break;
-        }
-      }
-      if (!sect) {
+      sect = findSectionCoveringAddress(normalizedFile, addr);
+      if (!sect)
         return make_dynamic_error_code(Twine("address (" + Twine(addr)
-                                           + ") is not in any section"));
-      }
+                                       + ") is not in any section"));
     } else {
       sect = &normalizedFile.sections[sectIndex-1];
     }
@@ -612,6 +616,50 @@ normalizedObjectToAtoms(const Normalized
     handler->addAdditionalReferences(*atom);
   });
 
+  // Process mach-o data-in-code regions array. That information is encoded in
+  // atoms as References at each transition point.
+  unsigned nextIndex = 0;
+  for (const DataInCode &entry : normalizedFile.dataInCode) {
+    ++nextIndex;
+    const Section* s = findSectionCoveringAddress(normalizedFile, entry.offset);
+    if (!s) {
+      return make_dynamic_error_code(Twine("LC_DATA_IN_CODE address ("
+                                     + Twine(entry.offset)
+                                     + ") is not in any section"));
+    }
+    uint64_t offsetInSect = entry.offset - s->address;
+    uint32_t offsetInAtom;
+    MachODefinedAtom *atom = file->findAtomCoveringAddress(*s, offsetInSect,
+                                                           &offsetInAtom);
+    if (offsetInAtom + entry.length > atom->size()) {
+      return make_dynamic_error_code(Twine("LC_DATA_IN_CODE entry (offset="
+                                     + Twine(entry.offset)
+                                     + ", length="
+                                     + Twine(entry.length)
+                                     + ") crosses atom boundary."));
+    }
+    // Add reference that marks start of data-in-code.
+    atom->addReference(offsetInAtom,
+                       handler->dataInCodeTransitionStart(*atom), atom,
+                       entry.kind, handler->kindArch());
+
+    // Peek at next entry, if it starts where this one ends, skip ending ref.
+    if (nextIndex < normalizedFile.dataInCode.size()) {
+      const DataInCode &nextEntry = normalizedFile.dataInCode[nextIndex];
+      if (nextEntry.offset == (entry.offset + entry.length))
+        continue;
+    }
+
+    // If data goes to end of function, skip ending ref.
+    if ((offsetInAtom + entry.length) == atom->size())
+      continue;
+
+    // Add reference that marks end of data-in-code.
+    atom->addReference(offsetInAtom+entry.length,
+                       handler->dataInCodeTransitionEnd(*atom), atom, 0,
+                       handler->kindArch());
+  }
+
   // Sort references in each atom to their canonical order.
   for (const DefinedAtom* defAtom : file->defined()) {
     reinterpret_cast<const SimpleDefinedAtom*>(defAtom)->sortReferences();

Modified: lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp?rev=213901&r1=213900&r2=213901&view=diff
==============================================================================
--- lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp (original)
+++ lld/trunk/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp Thu Jul 24 18:06:56 2014
@@ -45,6 +45,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(RebaseLocat
 LLVM_YAML_IS_SEQUENCE_VECTOR(BindLocation)
 LLVM_YAML_IS_SEQUENCE_VECTOR(Export)
 LLVM_YAML_IS_SEQUENCE_VECTOR(StringRef)
+LLVM_YAML_IS_SEQUENCE_VECTOR(DataInCode)
 
 
 // for compatibility with gcc-4.7 in C++11 mode, add extra namespace
@@ -596,6 +597,31 @@ struct MappingTraits<Export> {
   }
 };
 
+template <>
+struct ScalarEnumerationTraits<DataRegionType> {
+  static void enumeration(IO &io, DataRegionType &value) {
+    io.enumCase(value, "DICE_KIND_DATA",
+                        llvm::MachO::DICE_KIND_DATA);
+    io.enumCase(value, "DICE_KIND_JUMP_TABLE8",
+                        llvm::MachO::DICE_KIND_JUMP_TABLE8);
+    io.enumCase(value, "DICE_KIND_JUMP_TABLE16",
+                        llvm::MachO::DICE_KIND_JUMP_TABLE16);
+    io.enumCase(value, "DICE_KIND_JUMP_TABLE32",
+                        llvm::MachO::DICE_KIND_JUMP_TABLE32);
+    io.enumCase(value, "DICE_KIND_ABS_JUMP_TABLE32",
+                        llvm::MachO::DICE_KIND_ABS_JUMP_TABLE32);
+  }
+};
+
+template <>
+struct MappingTraits<DataInCode> {
+  static void mapping(IO &io, DataInCode &entry) {
+    io.mapRequired("offset",       entry.offset);
+    io.mapRequired("length",       entry.length);
+    io.mapRequired("kind",         entry.kind);
+  }
+};
+
 
 template <>
 struct MappingTraits<NormalizedFile> {
@@ -622,6 +648,7 @@ struct MappingTraits<NormalizedFile> {
     io.mapOptional("weak-bindings",    file.weakBindingInfo);
     io.mapOptional("lazy-bindings",    file.lazyBindingInfo);
     io.mapOptional("exports",          file.exportInfo);
+    io.mapOptional("dataInCode",       file.dataInCode);
   }
   static StringRef validate(IO &io, NormalizedFile &file) {
     return StringRef();

Added: lld/trunk/test/mach-o/parse-data-in-code-armv7.yaml
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/mach-o/parse-data-in-code-armv7.yaml?rev=213901&view=auto
==============================================================================
--- lld/trunk/test/mach-o/parse-data-in-code-armv7.yaml (added)
+++ lld/trunk/test/mach-o/parse-data-in-code-armv7.yaml Thu Jul 24 18:06:56 2014
@@ -0,0 +1,151 @@
+# RUN: lld -flavor darwin -arch armv7 -r -print_atoms %s -o %t  | FileCheck %s \
+# RUN: && lld -flavor darwin -arch armv7 -r -print_atoms %t -o %t2  | FileCheck %s
+#
+# Test parsing LC_DATA_IN_CODE
+#
+#
+
+--- !mach-o
+arch:            armv7
+file-type:       MH_OBJECT
+flags:           [ MH_SUBSECTIONS_VIA_SYMBOLS ]
+sections:
+  - segment:         __TEXT
+    section:         __text
+    type:            S_REGULAR
+    attributes:      [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ]
+    alignment:       2
+    address:         0x0000000000000000
+    content:         [ 0x00, 0xBF, 0x00, 0xBF, 0x00, 0x00, 0x00, 0x00, 
+                       0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 
+                       0x03, 0x00, 0x00, 0x00, 0x00, 0xBF, 0x00, 0xBF, 
+                       0x00, 0xF0, 0x20, 0xE3, 0x0A, 0x00, 0x00, 0x00, 
+                       0x0B, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, 
+                       0x0D, 0x00, 0x00, 0x00, 0x00, 0xF0, 0x20, 0xE3 ]
+local-symbols:
+  - name:            _foo_thumb
+    type:            N_SECT
+    sect:            1
+    desc:            [ N_ARM_THUMB_DEF ]
+    value:           0x0000000000000000
+  - name:            _foo_arm
+    type:            N_SECT
+    sect:            1
+    value:           0x0000000000000018
+dataInCode:
+  - offset:          0x00000004
+    length:          0x0004
+    kind:            DICE_KIND_DATA
+  - offset:          0x00000008
+    length:          0x0004
+    kind:            DICE_KIND_JUMP_TABLE32
+  - offset:          0x0000000C
+    length:          0x0004
+    kind:            DICE_KIND_JUMP_TABLE16
+  - offset:          0x00000010
+    length:          0x0004
+    kind:            DICE_KIND_JUMP_TABLE8
+  - offset:          0x0000001C
+    length:          0x0004
+    kind:            DICE_KIND_DATA
+  - offset:          0x00000020
+    length:          0x0004
+    kind:            DICE_KIND_JUMP_TABLE32
+  - offset:          0x00000024
+    length:          0x0004
+    kind:            DICE_KIND_JUMP_TABLE16
+  - offset:          0x00000028
+    length:          0x0004
+    kind:            DICE_KIND_JUMP_TABLE8
+...
+
+
+
+# CHECK: defined-atoms:
+# CHECK:   - name:            _foo_thumb
+# CHECK:     references:
+# CHECK:       - kind:            modeThumbCode
+# CHECK:         offset:          0
+# CHECK:       - kind:            modeData
+# CHECK:         offset:          4
+# CHECK:         addend:          1
+# CHECK:       - kind:            modeData
+# CHECK:         offset:          8
+# CHECK:         addend:          4
+# CHECK:       - kind:            modeData
+# CHECK:         offset:          12
+# CHECK:         addend:          3
+# CHECK:       - kind:            modeData
+# CHECK:         offset:          16
+# CHECK:         addend:          2
+# CHECK:       - kind:            modeThumbCode
+# CHECK:         offset:          20
+# CHECK:   - name:            _foo_arm
+# CHECK:     references:
+# CHECK:       - kind:            modeData
+# CHECK:         offset:          4
+# CHECK:         addend:          1
+# CHECK:       - kind:            modeData
+# CHECK:         offset:          8
+# CHECK:         addend:          4
+# CHECK:       - kind:            modeData
+# CHECK:         offset:          12
+# CHECK:         addend:          3
+# CHECK:       - kind:            modeData
+# CHECK:         offset:          16
+# CHECK:         addend:          2
+# CHECK:       - kind:            modeArmCode
+# CHECK:         offset:          20
+
+
+
+#	.code	16
+#  .thumb_func	_foo_thumb
+#_foo_thumb:
+# nop
+# nop
+#
+#	.data_region
+#  .long 0
+#	.end_data_region
+#
+#	.data_region jt32
+#  .long 1
+#	.end_data_region
+#
+#	.data_region jt16
+#  .long 2
+#	.end_data_region
+#
+#	.data_region jt8
+#  .long 3
+#	.end_data_region
+#
+#  nop
+#  nop
+#
+#
+#
+#	.code	32
+#  .align 2  
+#_foo_arm:
+#  nop
+#
+#	.data_region
+#  .long 10
+#	.end_data_region
+#
+#	.data_region jt32
+#  .long 11
+#	.end_data_region
+#
+#	.data_region jt16
+#  .long 12
+#	.end_data_region
+#
+#	.data_region jt8
+#  .long 13
+#	.end_data_region
+#
+#  nop
+#

Added: lld/trunk/test/mach-o/parse-data-in-code-x86.yaml
URL: http://llvm.org/viewvc/llvm-project/lld/trunk/test/mach-o/parse-data-in-code-x86.yaml?rev=213901&view=auto
==============================================================================
--- lld/trunk/test/mach-o/parse-data-in-code-x86.yaml (added)
+++ lld/trunk/test/mach-o/parse-data-in-code-x86.yaml Thu Jul 24 18:06:56 2014
@@ -0,0 +1,77 @@
+# RUN: lld -flavor darwin -arch i386 -r -print_atoms %s -o %t  | FileCheck %s \
+# RUN: && lld -flavor darwin -arch i386 -r -print_atoms %t -o %t2  | FileCheck %s
+#
+# Test parsing LC_DATA_IN_CODE
+#
+#
+
+--- !mach-o
+arch:            x86
+file-type:       MH_OBJECT
+flags:           [ MH_SUBSECTIONS_VIA_SYMBOLS ]
+sections:
+  - segment:         __TEXT
+    section:         __text
+    type:            S_REGULAR
+    attributes:      [ S_ATTR_PURE_INSTRUCTIONS, S_ATTR_SOME_INSTRUCTIONS ]
+    address:         0x0000000000000000
+    content:         [ 0x90, 0x90, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00,
+                       0x00, 0x00, 0x90, 0x90, 0x90, 0x90, 0x03, 0x00,
+                       0x00, 0x00 ]
+local-symbols:
+  - name:            _func1
+    type:            N_SECT
+    sect:            1
+    value:           0x0000000000000000
+  - name:            _func2
+    type:            N_SECT
+    sect:            1
+    value:           0x000000000000000B
+dataInCode:
+  - offset:          0x00000002
+    length:          0x0008
+    kind:            DICE_KIND_JUMP_TABLE32
+  - offset:          0x0000000E
+    length:          0x0004
+    kind:            DICE_KIND_JUMP_TABLE32
+...
+
+
+
+# CHECK: defined-atoms:
+# CHECK:   - name:            _func1
+# CHECK:     references:
+# CHECK:       - kind:            modeData
+# CHECK:         offset:          2
+# CHECK:         addend:          4
+# CHECK:       - kind:            modeCode
+# CHECK:         offset:          10
+# CHECK:   - name:            _func2
+# CHECK:     references:
+# CHECK:       - kind:            modeData
+# CHECK:         offset:          3
+# CHECK:         addend:          4
+# CHECK-NOT:   - kind:            modeData
+
+
+
+
+#
+#_func1:
+#  nop
+#  nop
+#  .data_region jt32
+#  .long 1
+#  .long 2
+#  .end_data_region
+#  nop
+#
+#
+# _func2:
+#  nop
+#  nop
+#  nop
+#  .data_region jt32
+#  .long 3
+#  .end_data_region
+#





More information about the llvm-commits mailing list