[lld] 928394d - [lld][MachO] Add support for LC_DATA_IN_CODE
Alexander Shaposhnikov via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 14 19:22:19 PDT 2021
Author: Alexander Shaposhnikov
Date: 2021-06-14T19:21:59-07:00
New Revision: 928394d10918c97880ef36e4e9853888b0d55207
URL: https://github.com/llvm/llvm-project/commit/928394d10918c97880ef36e4e9853888b0d55207
DIFF: https://github.com/llvm/llvm-project/commit/928394d10918c97880ef36e4e9853888b0d55207.diff
LOG: [lld][MachO] Add support for LC_DATA_IN_CODE
Add first bits for emitting LC_DATA_IN_CODE.
Test plan: make check-lld-macho
Differential revision: https://reviews.llvm.org/D103006
Added:
lld/test/MachO/data-in-code.s
Modified:
lld/MachO/InputFiles.cpp
lld/MachO/InputFiles.h
lld/MachO/InputSection.h
lld/MachO/SyntheticSections.cpp
lld/MachO/SyntheticSections.h
lld/MachO/Writer.cpp
lld/test/MachO/headerpad.s
lld/test/MachO/local-got.s
Removed:
################################################################################
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 3ee19598661a2..6b20d30fca3e2 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -735,6 +735,7 @@ template <class LP> void ObjFile::parse() {
parseRelocations(sectionHeaders, sectionHeaders[i], subsections[i]);
parseDebugInfo();
+ parseDataInCode();
}
void ObjFile::parseDebugInfo() {
@@ -760,6 +761,21 @@ void ObjFile::parseDebugInfo() {
compileUnit = it->get();
}
+void ObjFile::parseDataInCode() {
+ const auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart());
+ const load_command *cmd = findCommand(buf, LC_DATA_IN_CODE);
+ if (!cmd)
+ return;
+ const auto *c = reinterpret_cast<const linkedit_data_command *>(cmd);
+ dataInCodeEntries = {
+ reinterpret_cast<const data_in_code_entry *>(buf + c->dataoff),
+ c->datasize / sizeof(data_in_code_entry)};
+ assert(is_sorted(dataInCodeEntries, [](const data_in_code_entry &lhs,
+ const data_in_code_entry &rhs) {
+ return lhs.offset < rhs.offset;
+ }));
+}
+
// The path can point to either a dylib or a .tbd file.
static DylibFile *loadDylib(StringRef path, DylibFile *umbrella) {
Optional<MemoryBufferRef> mbref = readFile(path);
diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h
index d3601ffd5a2ca..fa34dbeb6fc95 100644
--- a/lld/MachO/InputFiles.h
+++ b/lld/MachO/InputFiles.h
@@ -104,6 +104,7 @@ class ObjFile final : public InputFile {
llvm::DWARFUnit *compileUnit = nullptr;
const uint32_t modTime;
std::vector<ConcatInputSection *> debugSections;
+ ArrayRef<llvm::MachO::data_in_code_entry> dataInCodeEntries;
private:
template <class LP> void parse();
@@ -118,6 +119,7 @@ class ObjFile final : public InputFile {
void parseRelocations(ArrayRef<Section> sectionHeaders, const Section &,
SubsectionMap &);
void parseDebugInfo();
+ void parseDataInCode();
};
// command-line -sectcreate file
diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h
index acf8c9d4a68d9..dabf30a5e1135 100644
--- a/lld/MachO/InputSection.h
+++ b/lld/MachO/InputSection.h
@@ -239,6 +239,7 @@ constexpr const char debugInfo[] = "__debug_info";
constexpr const char debugStr[] = "__debug_str";
constexpr const char ehFrame[] = "__eh_frame";
constexpr const char export_[] = "__export";
+constexpr const char dataInCode[] = "__data_in_code";
constexpr const char functionStarts[] = "__func_starts";
constexpr const char got[] = "__got";
constexpr const char header[] = "__mach_header";
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 9317d2e99072c..458bbaa289f44 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -574,6 +574,68 @@ void ExportSection::finalizeContents() {
void ExportSection::writeTo(uint8_t *buf) const { trieBuilder.writeTo(buf); }
+DataInCodeSection::DataInCodeSection()
+ : LinkEditSection(segment_names::linkEdit, section_names::dataInCode) {}
+
+template <class LP>
+static std::vector<MachO::data_in_code_entry> collectDataInCodeEntries() {
+ using SegmentCommand = typename LP::segment_command;
+ using Section = typename LP::section;
+
+ std::vector<MachO::data_in_code_entry> dataInCodeEntries;
+ for (const InputFile *inputFile : inputFiles) {
+ if (!isa<ObjFile>(inputFile))
+ continue;
+ const ObjFile *objFile = cast<ObjFile>(inputFile);
+ const auto *c = reinterpret_cast<const SegmentCommand *>(
+ findCommand(objFile->mb.getBufferStart(), LP::segmentLCType));
+ if (!c)
+ continue;
+ ArrayRef<Section> sections{reinterpret_cast<const Section *>(c + 1),
+ c->nsects};
+
+ ArrayRef<MachO::data_in_code_entry> entries = objFile->dataInCodeEntries;
+ if (entries.empty())
+ continue;
+ // For each code subsection find 'data in code' entries residing in it.
+ // Compute the new offset values as
+ // <offset within subsection> + <subsection address> - <__TEXT address>.
+ for (size_t i = 0, n = sections.size(); i < n; ++i) {
+ const SubsectionMap &subsecMap = objFile->subsections[i];
+ for (const SubsectionEntry &subsecEntry : subsecMap) {
+ const InputSection *isec = subsecEntry.isec;
+ if (!isCodeSection(isec))
+ continue;
+ if (cast<ConcatInputSection>(isec)->shouldOmitFromOutput())
+ continue;
+ const uint64_t beginAddr = sections[i].addr + subsecEntry.offset;
+ auto it = llvm::lower_bound(
+ entries, beginAddr,
+ [](const MachO::data_in_code_entry &entry, uint64_t addr) {
+ return entry.offset < addr;
+ });
+ const uint64_t endAddr = beginAddr + isec->getFileSize();
+ for (const auto end = entries.end();
+ it != end && it->offset + it->length <= endAddr; ++it)
+ dataInCodeEntries.push_back(
+ {static_cast<uint32_t>(isec->getVA(it->offset - beginAddr) -
+ in.header->addr),
+ it->length, it->kind});
+ }
+ }
+ }
+ return dataInCodeEntries;
+}
+
+void DataInCodeSection::finalizeContents() {
+ entries = target->wordSize == 8 ? collectDataInCodeEntries<LP64>()
+ : collectDataInCodeEntries<ILP32>();
+}
+
+void DataInCodeSection::writeTo(uint8_t *buf) const {
+ memcpy(buf, entries.data(), getRawSize());
+}
+
FunctionStartsSection::FunctionStartsSection()
: LinkEditSection(segment_names::linkEdit, section_names::functionStarts) {}
diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h
index 1e9bba72932ff..36141772697b2 100644
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -371,6 +371,21 @@ class ExportSection final : public LinkEditSection {
size_t size = 0;
};
+// Stores 'data in code' entries that describe the locations of
+// data regions inside code sections.
+class DataInCodeSection final : public LinkEditSection {
+public:
+ DataInCodeSection();
+ void finalizeContents() override;
+ uint64_t getRawSize() const override {
+ return sizeof(llvm::MachO::data_in_code_entry) * entries.size();
+ }
+ void writeTo(uint8_t *buf) const override;
+
+private:
+ std::vector<llvm::MachO::data_in_code_entry> entries;
+};
+
// Stores ULEB128 delta encoded addresses of functions.
class FunctionStartsSection final : public LinkEditSection {
public:
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index c620d847441ea..b55795da831ba 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -71,6 +71,7 @@ class Writer {
SymtabSection *symtabSection = nullptr;
IndirectSymtabSection *indirectSymtabSection = nullptr;
CodeSignatureSection *codeSignatureSection = nullptr;
+ DataInCodeSection *dataInCodeSection = nullptr;
FunctionStartsSection *functionStartsSection = nullptr;
LCUuid *uuidCommand = nullptr;
@@ -142,6 +143,25 @@ class LCFunctionStarts final : public LoadCommand {
FunctionStartsSection *functionStartsSection;
};
+class LCDataInCode final : public LoadCommand {
+public:
+ explicit LCDataInCode(DataInCodeSection *dataInCodeSection)
+ : dataInCodeSection(dataInCodeSection) {}
+
+ uint32_t getSize() const override { return sizeof(linkedit_data_command); }
+
+ void writeTo(uint8_t *buf) const override {
+ auto *c = reinterpret_cast<linkedit_data_command *>(buf);
+ c->cmd = LC_DATA_IN_CODE;
+ c->cmdsize = getSize();
+ c->dataoff = dataInCodeSection->fileOff;
+ c->datasize = dataInCodeSection->getFileSize();
+ }
+
+private:
+ DataInCodeSection *dataInCodeSection;
+};
+
class LCDysymtab final : public LoadCommand {
public:
LCDysymtab(SymtabSection *symtabSection,
@@ -646,6 +666,7 @@ template <class LP> void Writer::createLoadCommands() {
make<LCDysymtab>(symtabSection, indirectSymtabSection));
if (functionStartsSection)
in.header->addLoadCommand(make<LCFunctionStarts>(functionStartsSection));
+ in.header->addLoadCommand(make<LCDataInCode>(dataInCodeSection));
if (config->emitEncryptionInfo)
in.header->addLoadCommand(make<LCEncryptionInfo<LP>>());
for (StringRef path : config->runtimePaths)
@@ -844,6 +865,7 @@ template <class LP> void Writer::createOutputSections() {
indirectSymtabSection = make<IndirectSymtabSection>();
if (config->adhocCodesign)
codeSignatureSection = make<CodeSignatureSection>();
+ dataInCodeSection = make<DataInCodeSection>();
if (config->emitFunctionStarts)
functionStartsSection = make<FunctionStartsSection>();
if (config->emitBitcodeBundle)
@@ -944,8 +966,15 @@ void Writer::finalizeLinkEditSegment() {
TimeTraceScope timeScope("Finalize __LINKEDIT segment");
// Fill __LINKEDIT contents.
std::vector<LinkEditSection *> linkEditSections{
- in.rebase, in.binding, in.weakBinding, in.lazyBinding,
- in.exports, symtabSection, indirectSymtabSection, functionStartsSection,
+ in.rebase,
+ in.binding,
+ in.weakBinding,
+ in.lazyBinding,
+ in.exports,
+ symtabSection,
+ indirectSymtabSection,
+ dataInCodeSection,
+ functionStartsSection,
};
parallelForEach(linkEditSections, [](LinkEditSection *osec) {
if (osec)
diff --git a/lld/test/MachO/data-in-code.s b/lld/test/MachO/data-in-code.s
new file mode 100644
index 0000000000000..6e06fb6bbf34a
--- /dev/null
+++ b/lld/test/MachO/data-in-code.s
@@ -0,0 +1,64 @@
+# REQUIRES: x86
+
+# RUN: rm -rf %t; split-file %s %t
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/foo.s -o %t/foo.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/bar.s -o %t/bar.o
+# RUN: %lld %t/foo.o %t/bar.o -o %t/main.exe
+# RUN: llvm-objdump --private-headers %t/main.exe > %t/objdump
+# RUN: llvm-objdump --macho --data-in-code %t/main.exe >> %t/objdump
+# RUN: FileCheck %s < %t/objdump
+
+
+# CHECK-LABEL: sectname __text
+# CHECK-NEXT: segname __TEXT
+# CHECK-NEXT: addr
+# CHECK-NEXT: size
+# CHECK-NEXT: offset [[#%,TEXT:]]
+
+# CHECK-LABEL: cmd LC_DATA_IN_CODE
+# CHECK-NEXT: cmdsize 16
+# CHECK-NEXT: dataoff
+# CHECK-NEXT: datasize 16
+
+# CHECK-LABEL: Data in code table (2 entries)
+# CHECK-NEXT: offset length kind
+# CHECK-NEXT: [[#%x,TEXT + 28]] 24 JUMP_TABLE32
+# CHECK-NEXT: [[#%x,TEXT + 68]] 12 JUMP_TABLE32
+
+#--- foo.s
+.text
+.globl _main
+.p2align 4, 0x90
+_main:
+pushq %rbp
+movq %rsp, %rbp
+subq $16, %rsp
+movl $0, -4(%rbp)
+movb $0, %al
+callq _bar
+addq $16, %rsp
+popq %rbp
+retq
+.p2align 2, 0x90
+.data_region jt32
+.long 0
+.long 0
+.long 0
+.long 0
+.long 0
+.long 0
+.end_data_region
+
+#--- bar.s
+.text
+.globl _bar
+.p2align 4
+_bar:
+retq
+.p2align 2, 0x90
+.data_region jt32
+.long 0
+.long 0
+.long 0
+.end_data_region
diff --git a/lld/test/MachO/headerpad.s b/lld/test/MachO/headerpad.s
index 3f7f201ba7d8e..fa59a9f79d8a7 100644
--- a/lld/test/MachO/headerpad.s
+++ b/lld/test/MachO/headerpad.s
@@ -72,7 +72,7 @@
# PADMAX-NEXT: segname __TEXT
# PADMAX-NEXT: addr
# PADMAX-NEXT: size
-# PADMAX-NEXT: offset [[#%u, CMDSIZE + 0x20 + mul(0x400, N - 8)]]
+# PADMAX-NEXT: offset [[#%u, CMDSIZE + 0x20 + mul(0x400, N - 9)]]
################ All 3 kinds of LCDylib swamped by a larger override
# RUN: %lld -o %t/libnull.dylib %t/null.o -dylib \
diff --git a/lld/test/MachO/local-got.s b/lld/test/MachO/local-got.s
index f79fef40a7eef..1d415e35e91af 100644
--- a/lld/test/MachO/local-got.s
+++ b/lld/test/MachO/local-got.s
@@ -15,12 +15,12 @@
## address offset and the contents at that address very similarly, so am using
## --match-full-lines to make sure we match on the right thing.
# CHECK: Contents of section __TEXT,__cstring:
-# CHECK-NEXT: 100000434 {{.*}}
+# CHECK-NEXT: 100000444 {{.*}}
## 1st 8 bytes refer to the start of __cstring + 0xe, 2nd 8 bytes refer to the
## start of __cstring
# CHECK: Contents of section __DATA_CONST,__got:
-# CHECK-NEXT: [[#%X,ADDR:]] 42040000 01000000 34040000 01000000 {{.*}}
+# CHECK-NEXT: [[#%X,ADDR:]] 52040000 01000000 44040000 01000000 {{.*}}
# CHECK-NEXT: [[#ADDR + 16]] 00000000 00000000 {{.*}}
## Check that the rebase table is empty.
More information about the llvm-commits
mailing list