[lld] 928394d - [lld][MachO] Add support for LC_DATA_IN_CODE

Alexander Shaposhnikov via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 14 19:22:19 PDT 2021


Author: Alexander Shaposhnikov
Date: 2021-06-14T19:21:59-07:00
New Revision: 928394d10918c97880ef36e4e9853888b0d55207

URL: https://github.com/llvm/llvm-project/commit/928394d10918c97880ef36e4e9853888b0d55207
DIFF: https://github.com/llvm/llvm-project/commit/928394d10918c97880ef36e4e9853888b0d55207.diff

LOG: [lld][MachO] Add support for LC_DATA_IN_CODE

Add first bits for emitting LC_DATA_IN_CODE.

Test plan: make check-lld-macho

Differential revision: https://reviews.llvm.org/D103006

Added: 
    lld/test/MachO/data-in-code.s

Modified: 
    lld/MachO/InputFiles.cpp
    lld/MachO/InputFiles.h
    lld/MachO/InputSection.h
    lld/MachO/SyntheticSections.cpp
    lld/MachO/SyntheticSections.h
    lld/MachO/Writer.cpp
    lld/test/MachO/headerpad.s
    lld/test/MachO/local-got.s

Removed: 
    


################################################################################
diff  --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 3ee19598661a2..6b20d30fca3e2 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -735,6 +735,7 @@ template <class LP> void ObjFile::parse() {
       parseRelocations(sectionHeaders, sectionHeaders[i], subsections[i]);
 
   parseDebugInfo();
+  parseDataInCode();
 }
 
 void ObjFile::parseDebugInfo() {
@@ -760,6 +761,21 @@ void ObjFile::parseDebugInfo() {
   compileUnit = it->get();
 }
 
+void ObjFile::parseDataInCode() {
+  const auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
+  const load_command *cmd = findCommand(buf, LC_DATA_IN_CODE);
+  if (!cmd)
+    return;
+  const auto *c = reinterpret_cast<const linkedit_data_command *>(cmd);
+  dataInCodeEntries = {
+      reinterpret_cast<const data_in_code_entry *>(buf + c->dataoff),
+      c->datasize / sizeof(data_in_code_entry)};
+  assert(is_sorted(dataInCodeEntries, [](const data_in_code_entry &lhs,
+                                         const data_in_code_entry &rhs) {
+    return lhs.offset < rhs.offset;
+  }));
+}
+
 // The path can point to either a dylib or a .tbd file.
 static DylibFile *loadDylib(StringRef path, DylibFile *umbrella) {
   Optional<MemoryBufferRef> mbref = readFile(path);

diff  --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index d3601ffd5a2ca..fa34dbeb6fc95 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -104,6 +104,7 @@ class ObjFile final : public InputFile {
   llvm::DWARFUnit *compileUnit = nullptr;
   const uint32_t modTime;
   std::vector<ConcatInputSection *> debugSections;
+  ArrayRef<llvm::MachO::data_in_code_entry> dataInCodeEntries;
 
 private:
   template <class LP> void parse();
@@ -118,6 +119,7 @@ class ObjFile final : public InputFile {
   void parseRelocations(ArrayRef<Section> sectionHeaders, const Section &,
                         SubsectionMap &);
   void parseDebugInfo();
+  void parseDataInCode();
 };
 
 // command-line -sectcreate file

diff  --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index acf8c9d4a68d9..dabf30a5e1135 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -239,6 +239,7 @@ constexpr const char debugInfo[] = "__debug_info";
 constexpr const char debugStr[] = "__debug_str";
 constexpr const char ehFrame[] = "__eh_frame";
 constexpr const char export_[] = "__export";
+constexpr const char dataInCode[] = "__data_in_code";
 constexpr const char functionStarts[] = "__func_starts";
 constexpr const char got[] = "__got";
 constexpr const char header[] = "__mach_header";

diff  --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 9317d2e99072c..458bbaa289f44 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -574,6 +574,68 @@ void ExportSection::finalizeContents() {
 
 void ExportSection::writeTo(uint8_t *buf) const { trieBuilder.writeTo(buf); }
 
+DataInCodeSection::DataInCodeSection()
+    : LinkEditSection(segment_names::linkEdit, section_names::dataInCode) {}
+
+template <class LP>
+static std::vector<MachO::data_in_code_entry> collectDataInCodeEntries() {
+  using SegmentCommand = typename LP::segment_command;
+  using Section = typename LP::section;
+
+  std::vector<MachO::data_in_code_entry> dataInCodeEntries;
+  for (const InputFile *inputFile : inputFiles) {
+    if (!isa<ObjFile>(inputFile))
+      continue;
+    const ObjFile *objFile = cast<ObjFile>(inputFile);
+    const auto *c = reinterpret_cast<const SegmentCommand *>(
+        findCommand(objFile->mb.getBufferStart(), LP::segmentLCType));
+    if (!c)
+      continue;
+    ArrayRef<Section> sections{reinterpret_cast<const Section *>(c + 1),
+                               c->nsects};
+
+    ArrayRef<MachO::data_in_code_entry> entries = objFile->dataInCodeEntries;
+    if (entries.empty())
+      continue;
+    // For each code subsection find 'data in code' entries residing in it.
+    // Compute the new offset values as
+    // <offset within subsection> + <subsection address> - <__TEXT address>.
+    for (size_t i = 0, n = sections.size(); i < n; ++i) {
+      const SubsectionMap &subsecMap = objFile->subsections[i];
+      for (const SubsectionEntry &subsecEntry : subsecMap) {
+        const InputSection *isec = subsecEntry.isec;
+        if (!isCodeSection(isec))
+          continue;
+        if (cast<ConcatInputSection>(isec)->shouldOmitFromOutput())
+          continue;
+        const uint64_t beginAddr = sections[i].addr + subsecEntry.offset;
+        auto it = llvm::lower_bound(
+            entries, beginAddr,
+            [](const MachO::data_in_code_entry &entry, uint64_t addr) {
+              return entry.offset < addr;
+            });
+        const uint64_t endAddr = beginAddr + isec->getFileSize();
+        for (const auto end = entries.end();
+             it != end && it->offset + it->length <= endAddr; ++it)
+          dataInCodeEntries.push_back(
+              {static_cast<uint32_t>(isec->getVA(it->offset - beginAddr) -
+                                     in.header->addr),
+               it->length, it->kind});
+      }
+    }
+  }
+  return dataInCodeEntries;
+}
+
+void DataInCodeSection::finalizeContents() {
+  entries = target->wordSize == 8 ? collectDataInCodeEntries<LP64>()
+                                  : collectDataInCodeEntries<ILP32>();
+}
+
+void DataInCodeSection::writeTo(uint8_t *buf) const {
+  memcpy(buf, entries.data(), getRawSize());
+}
+
 FunctionStartsSection::FunctionStartsSection()
     : LinkEditSection(segment_names::linkEdit, section_names::functionStarts) {}
 

diff  --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index 1e9bba72932ff..36141772697b2 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -371,6 +371,21 @@ class ExportSection final : public LinkEditSection {
   size_t size = 0;
 };
 
+// Stores 'data in code' entries that describe the locations of
+// data regions inside code sections.
+class DataInCodeSection final : public LinkEditSection {
+public:
+  DataInCodeSection();
+  void finalizeContents() override;
+  uint64_t getRawSize() const override {
+    return sizeof(llvm::MachO::data_in_code_entry) * entries.size();
+  }
+  void writeTo(uint8_t *buf) const override;
+
+private:
+  std::vector<llvm::MachO::data_in_code_entry> entries;
+};
+
 // Stores ULEB128 delta encoded addresses of functions.
 class FunctionStartsSection final : public LinkEditSection {
 public:

diff  --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index c620d847441ea..b55795da831ba 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -71,6 +71,7 @@ class Writer {
   SymtabSection *symtabSection = nullptr;
   IndirectSymtabSection *indirectSymtabSection = nullptr;
   CodeSignatureSection *codeSignatureSection = nullptr;
+  DataInCodeSection *dataInCodeSection = nullptr;
   FunctionStartsSection *functionStartsSection = nullptr;
 
   LCUuid *uuidCommand = nullptr;
@@ -142,6 +143,25 @@ class LCFunctionStarts final : public LoadCommand {
   FunctionStartsSection *functionStartsSection;
 };
 
+class LCDataInCode final : public LoadCommand {
+public:
+  explicit LCDataInCode(DataInCodeSection *dataInCodeSection)
+      : dataInCodeSection(dataInCodeSection) {}
+
+  uint32_t getSize() const override { return sizeof(linkedit_data_command); }
+
+  void writeTo(uint8_t *buf) const override {
+    auto *c = reinterpret_cast<linkedit_data_command *>(buf);
+    c->cmd = LC_DATA_IN_CODE;
+    c->cmdsize = getSize();
+    c->dataoff = dataInCodeSection->fileOff;
+    c->datasize = dataInCodeSection->getFileSize();
+  }
+
+private:
+  DataInCodeSection *dataInCodeSection;
+};
+
 class LCDysymtab final : public LoadCommand {
 public:
   LCDysymtab(SymtabSection *symtabSection,
@@ -646,6 +666,7 @@ template <class LP> void Writer::createLoadCommands() {
       make<LCDysymtab>(symtabSection, indirectSymtabSection));
   if (functionStartsSection)
     in.header->addLoadCommand(make<LCFunctionStarts>(functionStartsSection));
+  in.header->addLoadCommand(make<LCDataInCode>(dataInCodeSection));
   if (config->emitEncryptionInfo)
     in.header->addLoadCommand(make<LCEncryptionInfo<LP>>());
   for (StringRef path : config->runtimePaths)
@@ -844,6 +865,7 @@ template <class LP> void Writer::createOutputSections() {
   indirectSymtabSection = make<IndirectSymtabSection>();
   if (config->adhocCodesign)
     codeSignatureSection = make<CodeSignatureSection>();
+  dataInCodeSection = make<DataInCodeSection>();
   if (config->emitFunctionStarts)
     functionStartsSection = make<FunctionStartsSection>();
   if (config->emitBitcodeBundle)
@@ -944,8 +966,15 @@ void Writer::finalizeLinkEditSegment() {
   TimeTraceScope timeScope("Finalize __LINKEDIT segment");
   // Fill __LINKEDIT contents.
   std::vector<LinkEditSection *> linkEditSections{
-      in.rebase,  in.binding,    in.weakBinding,        in.lazyBinding,
-      in.exports, symtabSection, indirectSymtabSection, functionStartsSection,
+      in.rebase,
+      in.binding,
+      in.weakBinding,
+      in.lazyBinding,
+      in.exports,
+      symtabSection,
+      indirectSymtabSection,
+      dataInCodeSection,
+      functionStartsSection,
   };
   parallelForEach(linkEditSections, [](LinkEditSection *osec) {
     if (osec)

diff  --git a/lld/test/MachO/data-in-code.s b/lld/test/MachO/data-in-code.s
new file mode 100644
index 0000000000000..6e06fb6bbf34a
--- /dev/null
+++ b/lld/test/MachO/data-in-code.s
@@ -0,0 +1,64 @@
+# REQUIRES: x86
+
+# RUN: rm -rf %t; split-file %s %t
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/foo.s -o %t/foo.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/bar.s -o %t/bar.o
+# RUN: %lld %t/foo.o %t/bar.o -o %t/main.exe
+# RUN: llvm-objdump --private-headers %t/main.exe > %t/objdump
+# RUN: llvm-objdump --macho --data-in-code %t/main.exe >> %t/objdump
+# RUN: FileCheck %s < %t/objdump
+
+
+# CHECK-LABEL:  sectname __text
+# CHECK-NEXT:   segname __TEXT
+# CHECK-NEXT:   addr
+# CHECK-NEXT:   size
+# CHECK-NEXT:   offset [[#%,TEXT:]]
+
+# CHECK-LABEL:  cmd LC_DATA_IN_CODE
+# CHECK-NEXT:   cmdsize 16
+# CHECK-NEXT:   dataoff
+# CHECK-NEXT:   datasize 16
+
+# CHECK-LABEL:  Data in code table (2 entries)
+# CHECK-NEXT:   offset length kind
+# CHECK-NEXT:   [[#%x,TEXT + 28]] 24 JUMP_TABLE32
+# CHECK-NEXT:   [[#%x,TEXT + 68]] 12 JUMP_TABLE32
+
+#--- foo.s
+.text
+.globl _main
+.p2align 4, 0x90
+_main:
+pushq	%rbp
+movq	%rsp, %rbp
+subq	$16, %rsp
+movl	$0, -4(%rbp)
+movb	$0, %al
+callq	_bar
+addq	$16, %rsp
+popq	%rbp
+retq
+.p2align 2, 0x90
+.data_region jt32
+.long 0
+.long 0
+.long 0
+.long 0
+.long 0
+.long 0
+.end_data_region
+
+#--- bar.s
+.text
+.globl _bar
+.p2align 4
+_bar:
+retq
+.p2align 2, 0x90
+.data_region jt32
+.long 0
+.long 0
+.long 0
+.end_data_region

diff  --git a/lld/test/MachO/headerpad.s b/lld/test/MachO/headerpad.s
index 3f7f201ba7d8e..fa59a9f79d8a7 100644
--- a/lld/test/MachO/headerpad.s
+++ b/lld/test/MachO/headerpad.s
@@ -72,7 +72,7 @@
 # PADMAX-NEXT: segname __TEXT
 # PADMAX-NEXT: addr
 # PADMAX-NEXT: size
-# PADMAX-NEXT: offset [[#%u, CMDSIZE + 0x20 + mul(0x400, N - 8)]]
+# PADMAX-NEXT: offset [[#%u, CMDSIZE + 0x20 + mul(0x400, N - 9)]]
 
 ################ All 3 kinds of LCDylib swamped by a larger override
 # RUN: %lld -o %t/libnull.dylib %t/null.o -dylib \

diff  --git a/lld/test/MachO/local-got.s b/lld/test/MachO/local-got.s
index f79fef40a7eef..1d415e35e91af 100644
--- a/lld/test/MachO/local-got.s
+++ b/lld/test/MachO/local-got.s
@@ -15,12 +15,12 @@
 ## address offset and the contents at that address very similarly, so am using
 ## --match-full-lines to make sure we match on the right thing.
 # CHECK:      Contents of section __TEXT,__cstring:
-# CHECK-NEXT: 100000434 {{.*}}
+# CHECK-NEXT: 100000444 {{.*}}
 
 ## 1st 8 bytes refer to the start of __cstring + 0xe, 2nd 8 bytes refer to the
 ## start of __cstring
 # CHECK:      Contents of section __DATA_CONST,__got:
-# CHECK-NEXT: [[#%X,ADDR:]]  42040000 01000000 34040000 01000000 {{.*}}
+# CHECK-NEXT: [[#%X,ADDR:]]  52040000 01000000 44040000 01000000 {{.*}}
 # CHECK-NEXT: [[#ADDR + 16]] 00000000 00000000 {{.*}}
 
 ## Check that the rebase table is empty.


        


More information about the llvm-commits mailing list